From a881dcddd0a4cd66f09ea66299e1392021d3fc32 Mon Sep 17 00:00:00 2001 From: notgitika Date: Thu, 30 Apr 2026 21:10:28 -0400 Subject: [PATCH] =?UTF-8?q?chore:=20sync=20main=20into=20preview=20?= =?UTF-8?q?=E2=80=94=20evo=20features?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brings in all evo preview features from main: - Config bundles, batch evaluation, recommendations, AB testing - Deploy teardown cleanup for evo resources - Schema fixes and JSON Schema compat - Version bump to 0.13.0 Conflicts resolved keeping both preview's harness support and main's evo additions. --- .github/harness/prompts/review.md | 11 +- .github/harness/prompts/system.md | 10 +- .github/scripts/prompts/review.md | 13 + .github/scripts/prompts/system.md | 21 + .github/scripts/python/harness_review.py | 217 ++++ .github/workflows/e2e-tests-full.yml | 5 +- .../workflows/slack-issue-notification.yml | 2 +- .../workflows/slack-open-prs-notification.yml | 2 +- .gitignore | 5 - .prettierignore | 2 +- CHANGELOG.md | 113 +- README.md | 35 +- docs/TESTING.md | 18 + docs/batch-evaluation.md | 143 +++ docs/config-bundles.md | 114 +++ docs/recommendations.md | 158 +++ e2e-tests/ab-test-config-bundle.test.ts | 211 ++++ e2e-tests/ab-test-target-based.test.ts | 301 ++++++ e2e-tests/byo-custom-jwt.test.ts | 2 +- e2e-tests/config-bundle-eval-rec.test.ts | 633 ++++++++++++ .../fixtures/import/cleanup_resources.py | 6 - e2e-tests/fixtures/import/common.py | 9 +- e2e-tests/fixtures/import/setup_evaluator.py | 5 +- e2e-tests/fixtures/import/setup_gateway.py | 5 +- .../fixtures/import/setup_memory_full.py | 6 +- .../fixtures/import/setup_runtime_basic.py | 7 +- e2e-tests/http-gateway-targets.test.ts | 228 +++++ e2e-tests/import-gateway.test.ts | 4 +- e2e-tests/import-resources.test.ts | 4 +- esbuild.config.mjs | 2 +- .../add-remove-ab-test-target-based.test.ts | 461 +++++++++ integ-tests/add-remove-ab-test.test.ts | 170 +++ integ-tests/add-remove-config-bundle.test.ts | 312 ++++++ .../add-remove-online-eval-endpoint.test.ts | 199 ++++ integ-tests/add-remove-resources.test.ts | 58 +- integ-tests/create-frameworks.test.ts | 1 - integ-tests/create-memory.test.ts | 1 - integ-tests/create-no-agent.test.ts | 2 +- integ-tests/create-protocols.test.ts | 1 - integ-tests/create-with-agent.test.ts | 2 +- integ-tests/dev-server.test.ts | 2 +- integ-tests/help.test.ts | 47 +- integ-tests/recommendation.test.ts | 290 ++++++ package-lock.json | 12 +- package.json | 4 +- scripts/bundle.mjs | 7 +- .../assets.snapshot.test.ts.snap | 142 ++- src/assets/cdk/test/cdk.test.ts | 1 + .../http/langchain_langgraph/base/main.py | 60 +- src/assets/python/http/strands/base/main.py | 79 +- src/cli/__tests__/global-config.test.ts | 43 +- .../aws/__tests__/agentcore-ab-tests.test.ts | 345 +++++++ .../__tests__/agentcore-http-gateways.test.ts | 235 +++++ .../agentcore-recommendation.test.ts | 295 ++++++ src/cli/aws/__tests__/agentcore.test.ts | 42 +- src/cli/aws/agentcore-ab-tests.ts | 360 +++++++ src/cli/aws/agentcore-batch-evaluation.ts | 411 ++++++++ src/cli/aws/agentcore-config-bundles.ts | 368 +++++++ src/cli/aws/agentcore-http-gateways.ts | 519 ++++++++++ src/cli/aws/agentcore-recommendation.ts | 371 +++++++ src/cli/aws/agentcore.ts | 53 +- src/cli/aws/index.ts | 18 + src/cli/cli.ts | 17 +- .../__tests__/outputs-extended.test.ts | 8 +- .../cloudformation/__tests__/outputs.test.ts | 114 +++ src/cli/cloudformation/outputs.ts | 28 +- src/cli/commands/abtest/command.ts | 199 ++++ src/cli/commands/abtest/index.ts | 1 + src/cli/commands/add/types.ts | 1 + src/cli/commands/config-bundle/command.tsx | 347 +++++++ src/cli/commands/config-bundle/index.ts | 1 + src/cli/commands/create/action.ts | 9 + src/cli/commands/create/command.tsx | 2 + src/cli/commands/create/types.ts | 1 + src/cli/commands/deploy/actions.ts | 8 +- src/cli/commands/deploy/command.tsx | 10 +- src/cli/commands/deploy/types.ts | 1 + src/cli/commands/import/command.ts | 2 +- src/cli/commands/import/import-evaluator.ts | 16 +- src/cli/commands/index.ts | 1 + src/cli/commands/invoke/action.ts | 17 + .../commands/logs/__tests__/action.test.ts | 12 + src/cli/commands/pause/command.tsx | 258 +++++ src/cli/commands/pause/index.ts | 2 +- src/cli/commands/recommendations/command.tsx | 63 ++ src/cli/commands/recommendations/index.ts | 1 + src/cli/commands/remove/command.tsx | 3 + src/cli/commands/remove/types.ts | 4 +- src/cli/commands/run/command.tsx | 407 +++++++- src/cli/commands/status/action.ts | 60 ++ src/cli/commands/status/command.tsx | 36 +- src/cli/commands/stop/command.tsx | 57 ++ src/cli/commands/stop/index.ts | 1 + .../telemetry/__tests__/telemetry.test.ts | 2 +- src/cli/commands/telemetry/actions.ts | 2 +- src/cli/commands/update/command.tsx | 51 +- .../__tests__/checks-extended.test.ts | 30 + src/cli/logging/remove-logger.ts | 4 +- .../ab-test/__tests__/promote.test.ts | 270 +++++ src/cli/operations/ab-test/promote.ts | 124 +++ .../agent/config-bundle-defaults.ts | 30 + .../agent/generate/schema-mapper.ts | 1 + .../agent/generate/write-agent-to-project.ts | 3 + .../__tests__/bundle-name-variants.test.ts | 22 + .../__tests__/resolve-bundle.test.ts | 103 ++ .../config-bundle/bundle-name-variants.ts | 11 + .../operations/config-bundle/diff-versions.ts | 63 ++ .../config-bundle/resolve-bundle.ts | 91 ++ .../__tests__/post-deploy-ab-tests.test.ts | 597 +++++++++++ .../post-deploy-config-bundles.test.ts | 652 ++++++++++++ .../post-deploy-http-gateways.test.ts | 436 ++++++++ .../post-deploy-observability.test.ts | 14 +- .../post-deploy-online-evals.test.ts | 179 ++++ .../deploy/__tests__/preflight.test.ts | 10 + .../__tests__/harness-deployer.test.ts | 3 + src/cli/operations/deploy/index.ts | 24 + .../operations/deploy/post-deploy-ab-tests.ts | 733 +++++++++++++ .../deploy/post-deploy-config-bundles.ts | 348 +++++++ .../deploy/post-deploy-http-gateways.ts | 628 ++++++++++++ .../deploy/post-deploy-observability.ts | 4 +- .../deploy/post-deploy-online-evals.ts | 80 ++ src/cli/operations/deploy/preflight.ts | 1 + src/cli/operations/deploy/teardown.ts | 80 ++ .../operations/dev/__tests__/config.test.ts | 63 ++ src/cli/operations/eval/batch-eval-storage.ts | 75 ++ .../operations/eval/run-batch-evaluation.ts | 347 +++++++ src/cli/operations/eval/run-eval.ts | 2 +- .../operations/fetch-access/list-gateways.ts | 12 + .../__tests__/apply-to-bundle.test.ts | 199 ++++ .../__tests__/fetch-session-spans.test.ts | 224 ++++ .../__tests__/recommendation-storage.test.ts | 134 +++ .../__tests__/run-recommendation.test.ts | 700 +++++++++++++ .../recommendation/apply-to-bundle.ts | 140 +++ .../operations/recommendation/constants.ts | 11 + .../recommendation/fetch-session-spans.ts | 158 +++ src/cli/operations/recommendation/index.ts | 18 + .../recommendation/recommendation-storage.ts | 84 ++ .../recommendation/run-recommendation.ts | 610 +++++++++++ src/cli/operations/recommendation/types.ts | 72 ++ src/cli/primitives/ABTestPrimitive.ts | 728 +++++++++++++ src/cli/primitives/AgentPrimitive.tsx | 199 ++-- src/cli/primitives/ConfigBundlePrimitive.ts | 237 +++++ src/cli/primitives/CredentialPrimitive.tsx | 66 +- src/cli/primitives/EvaluatorPrimitive.ts | 49 +- src/cli/primitives/GatewayPrimitive.ts | 51 +- src/cli/primitives/GatewayTargetPrimitive.ts | 70 +- src/cli/primitives/MemoryPrimitive.tsx | 55 +- .../primitives/OnlineEvalConfigPrimitive.ts | 84 +- src/cli/primitives/PolicyEnginePrimitive.ts | 55 +- src/cli/primitives/PolicyPrimitive.ts | 75 +- .../primitives/RuntimeEndpointPrimitive.ts | 30 +- .../__tests__/ABTestPrimitive.test.ts | 278 +++++ .../__tests__/GatewayPrimitive.test.ts | 3 + .../__tests__/HarnessPrimitive.test.ts | 6 + .../primitives/__tests__/auth-utils.test.ts | 3 + src/cli/primitives/index.ts | 3 + src/cli/primitives/registry.ts | 6 + src/cli/project.ts | 3 + src/cli/telemetry/__tests__/client.test.ts | 37 +- src/cli/telemetry/cli-command-run.ts | 71 ++ src/cli/telemetry/client-accessor.ts | 2 +- src/cli/telemetry/client.ts | 17 +- src/cli/telemetry/config.ts | 2 +- .../schemas/__tests__/command-run.test.ts | 54 +- src/cli/telemetry/schemas/command-run.ts | 1 + src/cli/telemetry/schemas/common-shapes.ts | 44 + src/cli/templates/types.ts | 2 + src/cli/tui/App.tsx | 60 +- src/cli/tui/components/DeployStatus.tsx | 40 +- src/cli/tui/components/MultiSelectList.tsx | 28 +- src/cli/tui/components/PathInput.tsx | 24 +- src/cli/tui/components/ResourceGraph.tsx | 53 +- src/cli/tui/components/SelectList.tsx | 26 +- src/cli/tui/components/WizardSelect.tsx | 22 +- .../__tests__/DeployStatus.test.tsx | 49 + src/cli/tui/constants.ts | 2 + src/cli/tui/copy.ts | 48 +- .../__tests__/usePanelNavigation.test.tsx | 347 +++++++ src/cli/tui/hooks/useCreateABTest.ts | 93 ++ src/cli/tui/hooks/useCreateConfigBundle.ts | 59 ++ src/cli/tui/hooks/useCreateEvaluator.ts | 20 +- src/cli/tui/hooks/useCreateMcp.ts | 45 +- src/cli/tui/hooks/useCreateMemory.ts | 27 +- src/cli/tui/hooks/useCreateOnlineEval.ts | 27 +- src/cli/tui/hooks/usePanelNavigation.ts | 196 ++++ src/cli/tui/hooks/useRemove.ts | 40 + .../screens/ab-test/ABTestDetailScreen.tsx | 623 +++++++++++ .../screens/ab-test/ABTestPickerScreen.tsx | 90 ++ src/cli/tui/screens/ab-test/AddABTestFlow.tsx | 281 +++++ .../tui/screens/ab-test/AddABTestScreen.tsx | 914 +++++++++++++++++ .../screens/ab-test/RemoveABTestScreen.tsx | 26 + .../ab-test/TargetBasedABTestScreen.tsx | 712 +++++++++++++ .../tui/screens/ab-test/VariantConfigForm.tsx | 268 +++++ .../__tests__/useAddABTestWizard.test.tsx | 286 ++++++ .../__tests__/useTargetBasedWizard.test.tsx | 319 ++++++ src/cli/tui/screens/ab-test/index.ts | 4 + src/cli/tui/screens/ab-test/types.ts | 89 ++ .../tui/screens/ab-test/useAddABTestWizard.ts | 324 ++++++ .../screens/ab-test/useTargetBasedWizard.ts | 188 ++++ src/cli/tui/screens/add/AddFlow.tsx | 40 + src/cli/tui/screens/add/AddScreen.tsx | 2 + src/cli/tui/screens/agent/AddAgentScreen.tsx | 9 + src/cli/tui/screens/agent/types.ts | 2 + src/cli/tui/screens/agent/useAddAgent.ts | 103 +- .../config-bundle-hub/ConfigBundleFlow.tsx | 60 ++ .../ConfigBundleHubScreen.tsx | 129 +++ .../screens/config-bundle-hub/DiffScreen.tsx | 149 +++ .../VersionHistoryScreen.tsx | 245 +++++ .../tui/screens/config-bundle-hub/index.ts | 4 + .../config-bundle-hub/useConfigBundleHub.ts | 220 ++++ .../config-bundle/AddConfigBundleFlow.tsx | 177 ++++ .../config-bundle/AddConfigBundleScreen.tsx | 275 +++++ src/cli/tui/screens/config-bundle/index.ts | 1 + src/cli/tui/screens/config-bundle/types.ts | 57 ++ .../config-bundle/useAddConfigBundleWizard.ts | 113 ++ src/cli/tui/screens/create/useCreateFlow.ts | 7 + src/cli/tui/screens/deploy/DeployScreen.tsx | 24 +- src/cli/tui/screens/deploy/useDeployFlow.ts | 231 ++++- src/cli/tui/screens/eval/EvalHubScreen.tsx | 14 +- .../tui/screens/generate/GenerateWizardUI.tsx | 6 + src/cli/tui/screens/generate/types.ts | 16 +- .../tui/screens/generate/useGenerateWizard.ts | 4 + src/cli/tui/screens/home/HelpScreen.tsx | 4 +- .../tui/screens/identity/useCreateIdentity.ts | 9 +- src/cli/tui/screens/import/ArnInputScreen.tsx | 3 +- src/cli/tui/screens/invoke/useInvokeFlow.ts | 18 + .../screens/online-eval/AddOnlineEvalFlow.tsx | 22 +- .../online-eval/AddOnlineEvalScreen.tsx | 65 +- src/cli/tui/screens/online-eval/types.ts | 17 +- .../online-eval/useAddOnlineEvalWizard.ts | 43 +- src/cli/tui/screens/policy/AddPolicyFlow.tsx | 33 +- .../recommendation/RecommendationFlow.tsx | 552 ++++++++++ .../RecommendationHistoryScreen.tsx | 250 +++++ .../recommendation/RecommendationScreen.tsx | 599 +++++++++++ .../RecommendationsHubScreen.tsx | 43 + src/cli/tui/screens/recommendation/index.ts | 3 + src/cli/tui/screens/recommendation/types.ts | 86 ++ .../recommendation/useRecommendationWizard.ts | 232 +++++ .../remove/RemoveConfigBundleScreen.tsx | 26 + src/cli/tui/screens/remove/RemoveFlow.tsx | 213 +++- src/cli/tui/screens/remove/RemoveScreen.tsx | 22 + .../remove/__tests__/RemoveScreen.test.tsx | 4 + .../run-eval/BatchEvalHistoryScreen.tsx | 307 ++++++ .../tui/screens/run-eval/RunBatchEvalFlow.tsx | 968 ++++++++++++++++++ src/cli/tui/screens/run-eval/RunScreen.tsx | 20 +- src/cli/tui/screens/run-eval/index.ts | 2 + .../AddRuntimeEndpointFlow.tsx | 27 +- src/cli/tui/utils/commands.ts | 4 +- src/lib/packaging/__tests__/helpers.test.ts | 71 +- src/lib/packaging/build-args.ts | 4 +- src/lib/packaging/helpers.ts | 14 +- src/lib/schemas/io/cli-config.ts | 36 - src/lib/schemas/io/config-io.ts | 7 +- src/{cli => lib/schemas/io}/global-config.ts | 29 +- src/lib/schemas/io/index.ts | 1 - src/schema/llm-compacted/agentcore.ts | 309 +++++- src/schema/schemas/agentcore-project.ts | 104 ++ src/schema/schemas/deployed-state.ts | 51 + .../primitives/__tests__/ab-test.test.ts | 228 +++++ .../primitives/__tests__/http-gateway.test.ts | 82 ++ src/schema/schemas/primitives/ab-test.ts | 147 +++ .../schemas/primitives/config-bundle.ts | 49 + src/schema/schemas/primitives/http-gateway.ts | 42 + src/schema/schemas/primitives/index.ts | 24 + .../schemas/primitives/online-eval-config.ts | 2 + src/test-utils/cli-runner.ts | 12 +- src/test-utils/index.ts | 20 + src/test-utils/project-factory.ts | 2 +- src/test-utils/telemetry-helper.ts | 56 + 269 files changed, 29757 insertions(+), 889 deletions(-) create mode 100644 .github/scripts/prompts/review.md create mode 100644 .github/scripts/prompts/system.md create mode 100644 .github/scripts/python/harness_review.py create mode 100644 docs/batch-evaluation.md create mode 100644 docs/config-bundles.md create mode 100644 docs/recommendations.md create mode 100644 e2e-tests/ab-test-config-bundle.test.ts create mode 100644 e2e-tests/ab-test-target-based.test.ts create mode 100644 e2e-tests/config-bundle-eval-rec.test.ts create mode 100644 e2e-tests/http-gateway-targets.test.ts create mode 100644 integ-tests/add-remove-ab-test-target-based.test.ts create mode 100644 integ-tests/add-remove-ab-test.test.ts create mode 100644 integ-tests/add-remove-config-bundle.test.ts create mode 100644 integ-tests/add-remove-online-eval-endpoint.test.ts create mode 100644 integ-tests/recommendation.test.ts create mode 100644 src/cli/aws/__tests__/agentcore-ab-tests.test.ts create mode 100644 src/cli/aws/__tests__/agentcore-http-gateways.test.ts create mode 100644 src/cli/aws/__tests__/agentcore-recommendation.test.ts create mode 100644 src/cli/aws/agentcore-ab-tests.ts create mode 100644 src/cli/aws/agentcore-batch-evaluation.ts create mode 100644 src/cli/aws/agentcore-config-bundles.ts create mode 100644 src/cli/aws/agentcore-http-gateways.ts create mode 100644 src/cli/aws/agentcore-recommendation.ts create mode 100644 src/cli/commands/abtest/command.ts create mode 100644 src/cli/commands/abtest/index.ts create mode 100644 src/cli/commands/config-bundle/command.tsx create mode 100644 src/cli/commands/config-bundle/index.ts create mode 100644 src/cli/commands/recommendations/command.tsx create mode 100644 src/cli/commands/recommendations/index.ts create mode 100644 src/cli/commands/stop/command.tsx create mode 100644 src/cli/commands/stop/index.ts create mode 100644 src/cli/operations/ab-test/__tests__/promote.test.ts create mode 100644 src/cli/operations/ab-test/promote.ts create mode 100644 src/cli/operations/agent/config-bundle-defaults.ts create mode 100644 src/cli/operations/config-bundle/__tests__/bundle-name-variants.test.ts create mode 100644 src/cli/operations/config-bundle/__tests__/resolve-bundle.test.ts create mode 100644 src/cli/operations/config-bundle/bundle-name-variants.ts create mode 100644 src/cli/operations/config-bundle/diff-versions.ts create mode 100644 src/cli/operations/config-bundle/resolve-bundle.ts create mode 100644 src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts create mode 100644 src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts create mode 100644 src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts create mode 100644 src/cli/operations/deploy/__tests__/post-deploy-online-evals.test.ts create mode 100644 src/cli/operations/deploy/post-deploy-ab-tests.ts create mode 100644 src/cli/operations/deploy/post-deploy-config-bundles.ts create mode 100644 src/cli/operations/deploy/post-deploy-http-gateways.ts create mode 100644 src/cli/operations/deploy/post-deploy-online-evals.ts create mode 100644 src/cli/operations/eval/batch-eval-storage.ts create mode 100644 src/cli/operations/eval/run-batch-evaluation.ts create mode 100644 src/cli/operations/recommendation/__tests__/apply-to-bundle.test.ts create mode 100644 src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts create mode 100644 src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts create mode 100644 src/cli/operations/recommendation/__tests__/run-recommendation.test.ts create mode 100644 src/cli/operations/recommendation/apply-to-bundle.ts create mode 100644 src/cli/operations/recommendation/constants.ts create mode 100644 src/cli/operations/recommendation/fetch-session-spans.ts create mode 100644 src/cli/operations/recommendation/index.ts create mode 100644 src/cli/operations/recommendation/recommendation-storage.ts create mode 100644 src/cli/operations/recommendation/run-recommendation.ts create mode 100644 src/cli/operations/recommendation/types.ts create mode 100644 src/cli/primitives/ABTestPrimitive.ts create mode 100644 src/cli/primitives/ConfigBundlePrimitive.ts create mode 100644 src/cli/primitives/__tests__/ABTestPrimitive.test.ts create mode 100644 src/cli/telemetry/cli-command-run.ts create mode 100644 src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx create mode 100644 src/cli/tui/hooks/useCreateABTest.ts create mode 100644 src/cli/tui/hooks/useCreateConfigBundle.ts create mode 100644 src/cli/tui/hooks/usePanelNavigation.ts create mode 100644 src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx create mode 100644 src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx create mode 100644 src/cli/tui/screens/ab-test/AddABTestFlow.tsx create mode 100644 src/cli/tui/screens/ab-test/AddABTestScreen.tsx create mode 100644 src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx create mode 100644 src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx create mode 100644 src/cli/tui/screens/ab-test/VariantConfigForm.tsx create mode 100644 src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx create mode 100644 src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx create mode 100644 src/cli/tui/screens/ab-test/index.ts create mode 100644 src/cli/tui/screens/ab-test/types.ts create mode 100644 src/cli/tui/screens/ab-test/useAddABTestWizard.ts create mode 100644 src/cli/tui/screens/ab-test/useTargetBasedWizard.ts create mode 100644 src/cli/tui/screens/config-bundle-hub/ConfigBundleFlow.tsx create mode 100644 src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx create mode 100644 src/cli/tui/screens/config-bundle-hub/DiffScreen.tsx create mode 100644 src/cli/tui/screens/config-bundle-hub/VersionHistoryScreen.tsx create mode 100644 src/cli/tui/screens/config-bundle-hub/index.ts create mode 100644 src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts create mode 100644 src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx create mode 100644 src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx create mode 100644 src/cli/tui/screens/config-bundle/index.ts create mode 100644 src/cli/tui/screens/config-bundle/types.ts create mode 100644 src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts create mode 100644 src/cli/tui/screens/recommendation/RecommendationFlow.tsx create mode 100644 src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx create mode 100644 src/cli/tui/screens/recommendation/RecommendationScreen.tsx create mode 100644 src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx create mode 100644 src/cli/tui/screens/recommendation/index.ts create mode 100644 src/cli/tui/screens/recommendation/types.ts create mode 100644 src/cli/tui/screens/recommendation/useRecommendationWizard.ts create mode 100644 src/cli/tui/screens/remove/RemoveConfigBundleScreen.tsx create mode 100644 src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx create mode 100644 src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx delete mode 100644 src/lib/schemas/io/cli-config.ts rename src/{cli => lib/schemas/io}/global-config.ts (73%) create mode 100644 src/schema/schemas/primitives/__tests__/ab-test.test.ts create mode 100644 src/schema/schemas/primitives/__tests__/http-gateway.test.ts create mode 100644 src/schema/schemas/primitives/ab-test.ts create mode 100644 src/schema/schemas/primitives/config-bundle.ts create mode 100644 src/schema/schemas/primitives/http-gateway.ts create mode 100644 src/test-utils/telemetry-helper.ts diff --git a/.github/harness/prompts/review.md b/.github/harness/prompts/review.md index 0a4f85fc7..d34c67b95 100644 --- a/.github/harness/prompts/review.md +++ b/.github/harness/prompts/review.md @@ -3,11 +3,16 @@ Review this GitHub PR: {pr_url} You have tools to fetch the PR diff, read files, search the web, and post comments on the PR. You have these repos cloned locally for context: + - /opt/workspace/agentcore-cli — aws/agentcore-cli - /opt/workspace/agentcore-l3-cdk-constructs — aws/agentcore-l3-cdk-constructs -Before reviewing, read all existing comments on the PR to understand what has already been discussed. Do not repeat or re-post issues that have already been raised in existing comments. +Before reviewing, read all existing comments on the PR to understand what has already been discussed. Do not repeat or +re-post issues that have already been raised in existing comments. -Review the PR. If there are any serious issues that require code changes before merging, post a comment on the PR for each issue explaining the problem. If there are multiple ways to fix an issue, list the options so the author can choose. Skip style nits and minor suggestions — only flag things that actually need to change. +Review the PR. If there are any serious issues that require code changes before merging, post a comment on the PR for +each issue explaining the problem. If there are multiple ways to fix an issue, list the options so the author can +choose. Skip style nits and minor suggestions — only flag things that actually need to change. -If all serious issues have already been raised in existing comments, or if you found no new issues, post a single comment on the PR saying it looks good to merge (or that all issues have already been flagged). +If all serious issues have already been raised in existing comments, or if you found no new issues, post a single +comment on the PR saying it looks good to merge (or that all issues have already been flagged). diff --git a/.github/harness/prompts/system.md b/.github/harness/prompts/system.md index 963accb8a..52a3d2260 100644 --- a/.github/harness/prompts/system.md +++ b/.github/harness/prompts/system.md @@ -6,11 +6,13 @@ This workspace contains two repos for developing and testing the AgentCore CLI. ### agentcore-cli/ (`aws/agentcore-cli`) -The terminal experience for creating, developing, and deploying AI agents to AgentCore. Node.js/TypeScript CLI built with Ink (React-based TUI). +The terminal experience for creating, developing, and deploying AI agents to AgentCore. Node.js/TypeScript CLI built +with Ink (React-based TUI). ### agentcore-l3-cdk-constructs/ (`aws/agentcore-l3-cdk-constructs`) -AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used by agentcore-cli to vend CDK projects when users run `agentcore create`. +AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used by agentcore-cli to vend CDK projects +when users run `agentcore create`. ## How they relate @@ -18,4 +20,6 @@ AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used ## Testing with a bundled distribution -Run `npm run bundle` in `agentcore-cli/` to create a tar distribution that includes the packaged `agentcore-l3-cdk-constructs`. You can then install it globally with `npm install -g ` to test the CLI end-to-end. +Run `npm run bundle` in `agentcore-cli/` to create a tar distribution that includes the packaged +`agentcore-l3-cdk-constructs`. You can then install it globally with `npm install -g ` to test the CLI +end-to-end. diff --git a/.github/scripts/prompts/review.md b/.github/scripts/prompts/review.md new file mode 100644 index 000000000..0a4f85fc7 --- /dev/null +++ b/.github/scripts/prompts/review.md @@ -0,0 +1,13 @@ +Review this GitHub PR: {pr_url} + +You have tools to fetch the PR diff, read files, search the web, and post comments on the PR. + +You have these repos cloned locally for context: +- /opt/workspace/agentcore-cli — aws/agentcore-cli +- /opt/workspace/agentcore-l3-cdk-constructs — aws/agentcore-l3-cdk-constructs + +Before reviewing, read all existing comments on the PR to understand what has already been discussed. Do not repeat or re-post issues that have already been raised in existing comments. + +Review the PR. If there are any serious issues that require code changes before merging, post a comment on the PR for each issue explaining the problem. If there are multiple ways to fix an issue, list the options so the author can choose. Skip style nits and minor suggestions — only flag things that actually need to change. + +If all serious issues have already been raised in existing comments, or if you found no new issues, post a single comment on the PR saying it looks good to merge (or that all issues have already been flagged). diff --git a/.github/scripts/prompts/system.md b/.github/scripts/prompts/system.md new file mode 100644 index 000000000..963accb8a --- /dev/null +++ b/.github/scripts/prompts/system.md @@ -0,0 +1,21 @@ +# AgentCore CLI Development Workspace + +This workspace contains two repos for developing and testing the AgentCore CLI. + +## Repositories + +### agentcore-cli/ (`aws/agentcore-cli`) + +The terminal experience for creating, developing, and deploying AI agents to AgentCore. Node.js/TypeScript CLI built with Ink (React-based TUI). + +### agentcore-l3-cdk-constructs/ (`aws/agentcore-l3-cdk-constructs`) + +AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used by agentcore-cli to vend CDK projects when users run `agentcore create`. + +## How they relate + +`agentcore-cli` is the main product. It vends CDK projects using constructs from `agentcore-l3-cdk-constructs`. + +## Testing with a bundled distribution + +Run `npm run bundle` in `agentcore-cli/` to create a tar distribution that includes the packaged `agentcore-l3-cdk-constructs`. You can then install it globally with `npm install -g ` to test the CLI end-to-end. diff --git a/.github/scripts/python/harness_review.py b/.github/scripts/python/harness_review.py new file mode 100644 index 000000000..fbfd0b0f9 --- /dev/null +++ b/.github/scripts/python/harness_review.py @@ -0,0 +1,217 @@ +"""Invoke Bedrock AgentCore Harness to review a GitHub PR. + +Reads PR_URL from the environment. Streams harness output to stdout. +Uses raw HTTP with SigV4 signing — no custom service model needed. +""" + +import json +import os +import sys +import time +import uuid + +import boto3 +from botocore.auth import SigV4Auth +from botocore.awsrequest import AWSRequest +from botocore.eventstream import EventStreamBuffer +from urllib.parse import quote +import urllib3 + +# ANSI color codes +CYAN = "\033[36m" +YELLOW = "\033[33m" +GREEN = "\033[32m" +RED = "\033[31m" +DIM = "\033[2m" +RESET = "\033[0m" + +SCRIPTS_DIR = os.path.join(os.path.dirname(__file__), "..") + + +def read_prompt(filename): + """Read a prompt template from the prompts directory.""" + path = os.path.join(SCRIPTS_DIR, "prompts", filename) + with open(path) as f: + return f.read() + + +def invoke_harness(harness_arn, body, region): + """Send a SigV4-signed request to the harness invoke endpoint. Returns a streaming response. + + InvokeHarness is not in standard boto3, so we call the REST API directly. + boto3 is only used to resolve AWS credentials (from env vars, OIDC, etc.) + and sign the request with SigV4. The response is an AWS binary event stream. + """ + session = boto3.Session(region_name=region) + credentials = session.get_credentials().get_frozen_credentials() + url = f"https://bedrock-agentcore.{region}.amazonaws.com/harnesses/invoke?harnessArn={quote(harness_arn, safe='')}" + request = AWSRequest(method="POST", url=url, data=body, headers={ + "Content-Type": "application/json", + "Accept": "application/vnd.amazon.eventstream", + }) + SigV4Auth(credentials, "bedrock-agentcore", region).add_auth(request) + return urllib3.PoolManager().urlopen( + "POST", url, body=body, + headers=dict(request.headers), + preload_content=False, + timeout=urllib3.Timeout(connect=10, read=600), + ) + + +def parse_events(http_response): + """Yield (event_type, payload) tuples from the harness binary event stream. + + The response arrives as raw bytes in AWS binary event stream format. + EventStreamBuffer reassembles complete events from the 4KB chunks, + and we decode each event's JSON payload before yielding it. + """ + event_buffer = EventStreamBuffer() + for chunk in http_response.stream(4096): + event_buffer.add_data(chunk) + for event in event_buffer: + if event.headers.get(":message-type") == "exception": + payload = json.loads(event.payload.decode("utf-8")) + print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr) + sys.exit(1) + event_type = event.headers.get(":event-type", "") + if event.payload: + yield event_type, json.loads(event.payload.decode("utf-8")) + + +def print_stream(http_response): + """Display harness events with GitHub Actions log groups. + + The harness streams events as the agent works: + contentBlockStart — a new block begins (text or tool call) + contentBlockDelta — incremental chunks of text or tool input JSON + contentBlockStop — block complete, we now have full tool input to display + messageStop — agent finished + internalServerException — server error + + Tool calls are wrapped in ::group::/::endgroup:: for collapsible sections + in the GitHub Actions log UI. Agent reasoning text is printed inline in dim. + """ + start_time = time.time() + iteration = 0 + tool_name = None + tool_input = "" + tool_start = 0.0 + in_group = False + text_buffer = "" + + def close_group(): + nonlocal in_group + if in_group: + print("::endgroup::", flush=True) + in_group = False + + def flush_text(): + nonlocal text_buffer + if text_buffer: + for line in text_buffer.splitlines(): + print(f"{DIM}{line}{RESET}", flush=True) + text_buffer = "" + + for event_type, payload in parse_events(http_response): + + if event_type == "contentBlockStart": + start = payload.get("start", {}) + if "toolUse" in start: + tool_name = start["toolUse"].get("name", "unknown") + tool_input = "" + tool_start = time.time() + iteration += 1 + + elif event_type == "contentBlockDelta": + delta = payload.get("delta", {}) + if "text" in delta: + close_group() + text_buffer += delta["text"] + if "toolUse" in delta: + tool_input += delta["toolUse"].get("input", "") + + elif event_type == "contentBlockStop": + flush_text() + if tool_name: + elapsed = time.time() - tool_start + try: + parsed = json.loads(tool_input) + except (json.JSONDecodeError, TypeError): + parsed = tool_input + + close_group() + + cmd = parsed.get("command") if isinstance(parsed, dict) else None + header = f"{CYAN}[{iteration}]{RESET} {YELLOW}{tool_name}{RESET} {DIM}({elapsed:.1f}s){RESET}" + if cmd: + header += f": $ {cmd}" + + print(f"::group::{header}", flush=True) + in_group = True + + if isinstance(parsed, dict): + for k, v in parsed.items(): + if k != "command": + print(f" {DIM}{k}:{RESET} {str(v)[:300]}", flush=True) + + tool_name = None + tool_input = "" + + elif event_type == "messageStop": + flush_text() + close_group() + if payload.get("stopReason") == "end_turn": + total = time.time() - start_time + print(f"\n\n{GREEN}{'=' * 50}", flush=True) + print(f" Done ({int(total // 60)}m {int(total % 60)}s)", flush=True) + print(f"{'=' * 50}{RESET}", flush=True) + + elif event_type == "internalServerException": + close_group() + print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr) + sys.exit(1) + + close_group() + total = time.time() - start_time + print(f"\n{GREEN}Review complete.{RESET} {DIM}({iteration} tool calls, {int(total)}s total){RESET}") + + +# --- Main --- + +# All config comes from environment variables (set via GitHub secrets/workflow) +MODEL_ID = os.environ.get("HARNESS_MODEL_ID", "us.anthropic.claude-opus-4-7") +HARNESS_ARN = os.environ.get("HARNESS_ARN", "") +PR_URL = os.environ.get("PR_URL", "") + +for name, val in [("HARNESS_ARN", HARNESS_ARN), ("PR_URL", PR_URL)]: + if not val: + print(f"{RED}ERROR: {name} environment variable is required{RESET}", file=sys.stderr) + sys.exit(1) + +# Extract region from the ARN (arn:aws:bedrock-agentcore:{region}:{account}:harness/{id}) +REGION = HARNESS_ARN.split(":")[3] +SESSION_ID = str(uuid.uuid4()).upper() + +print(f"{CYAN}Session:{RESET} {SESSION_ID}") +print(f"{CYAN}PR:{RESET} {PR_URL}") +print(f"{CYAN}Harness:{RESET} {HARNESS_ARN}") +print() + +SYSTEM_PROMPT = read_prompt("system.md") +REVIEW_PROMPT = read_prompt("review.md").format(pr_url=PR_URL) + +request_body = json.dumps({ + "runtimeSessionId": SESSION_ID, + "systemPrompt": [{"text": SYSTEM_PROMPT}], + "messages": [{"role": "user", "content": [{"text": REVIEW_PROMPT}]}], + "model": {"bedrockModelConfig": {"modelId": MODEL_ID}}, +}) + +http_response = invoke_harness(HARNESS_ARN, request_body, REGION) + +if http_response.status != 200: + error = http_response.read().decode("utf-8") + print(f"{RED}ERROR: HTTP {http_response.status}: {error}{RESET}", file=sys.stderr) + sys.exit(1) + +print_stream(http_response) diff --git a/.github/workflows/e2e-tests-full.yml b/.github/workflows/e2e-tests-full.yml index bc2ea18c8..35f92ad59 100644 --- a/.github/workflows/e2e-tests-full.yml +++ b/.github/workflows/e2e-tests-full.yml @@ -27,6 +27,7 @@ jobs: fail-fast: false matrix: cdk-source: [npm, main] + shard: ['1/6', '2/6', '3/6', '4/6', '5/6', '6/6'] steps: - uses: actions/checkout@v6 with: @@ -70,7 +71,7 @@ jobs: CDK_REPO: ${{ secrets.CDK_REPO_NAME }} - name: Install CLI globally run: npm install -g "$(npm pack | tail -1)" - - name: Run E2E tests (${{ matrix.cdk-source }}) + - name: Run E2E tests (${{ matrix.cdk-source }}, shard ${{ matrix.shard }}) env: AWS_ACCOUNT_ID: ${{ steps.aws.outputs.account_id }} AWS_REGION: ${{ inputs.aws_region || 'us-east-1' }} @@ -78,7 +79,7 @@ jobs: OPENAI_API_KEY: ${{ env.E2E_OPENAI_API_KEY }} GEMINI_API_KEY: ${{ env.E2E_GEMINI_API_KEY }} CDK_TARBALL: ${{ env.CDK_TARBALL }} - run: npm run test:e2e + run: npx vitest run --project e2e --shard=${{ matrix.shard }} browser-tests: runs-on: ubuntu-latest environment: e2e-testing diff --git a/.github/workflows/slack-issue-notification.yml b/.github/workflows/slack-issue-notification.yml index 758add1d1..1d3bbc4ee 100644 --- a/.github/workflows/slack-issue-notification.yml +++ b/.github/workflows/slack-issue-notification.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Send issue details to Slack - uses: slackapi/slack-github-action@v3.0.2 + uses: slackapi/slack-github-action@v2.1.1 with: webhook: ${{ secrets.SLACK_WEBHOOK_URL }} webhook-type: webhook-trigger diff --git a/.github/workflows/slack-open-prs-notification.yml b/.github/workflows/slack-open-prs-notification.yml index 11641bed4..68dd1df49 100644 --- a/.github/workflows/slack-open-prs-notification.yml +++ b/.github/workflows/slack-open-prs-notification.yml @@ -40,7 +40,7 @@ jobs: ); - name: Send open PRs summary to Slack - uses: slackapi/slack-github-action@v3.0.2 + uses: slackapi/slack-github-action@v2.1.1 with: webhook: ${{ secrets.SLACK_OPEN_PRS_WEBHOOK_URL }} webhook-type: webhook-trigger diff --git a/.gitignore b/.gitignore index ac3d53fc4..6613a8f02 100644 --- a/.gitignore +++ b/.gitignore @@ -69,11 +69,6 @@ ProtocolTesting/ .cdk-constructs-clone/ .omc/ -# E2E test artifacts -e2e-tests/fixtures/import/bugbash-resources.json - -# oh-my-claudecode -.omc/ # Browser tests browser-tests/.browser-test-env browser-tests/test-results/ diff --git a/.prettierignore b/.prettierignore index 3b1452b18..8eda17e39 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,3 +1,3 @@ CHANGELOG.md src/assets/**/*.md -.github/harness/prompts/ +.github/scripts/prompts/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d950296a..30a011f94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,111 +2,26 @@ All notable changes to this project will be documented in this file. -## [1.0.0-preview.5] - 2026-04-30 +## [0.13.0] - 2026-05-01 ### Added -- feat: add telemetry audit mode with FileSystemSink (#1014) (397c187) +- feat: evo preview features — config bundles, batch evaluation, recommendations, AB testing (#1068) (9ccf802) +- feat: wire telemetry into all add.* commands (#1050) (e9dfc16) +- feat: make parsing resilient to individual failures (#1062) (a4c37a2) +- feat: update @aws/agent-inspector to 0.3.0 (90f17b4) +- feat: update @aws/agent-inspector to 0.3.0 (278783a) ### Fixed -- fix: add Accept header to HTTP protocol invocation proxy (#1051) (821e4c3) +- fix: remove unnecessary non-null assertions after .default([]) revert (#1075) (eab8c87) +- fix: revert .optional() to .default([]) and strip empty evo arrays on write (#1074) (8c5cdfe) +- fix: remove dead preflight patch, proper teardown, optional evo schema fields (#1073) (839b32b) +- fix: remove dead preflight patch and use proper teardown for evo resources (#1072) (0e38e9e) +- fix: resolve e2e import test concurrency races (#1067) (bd6f841) +- fix: forward custom headers in bearer token invoke paths (#1065) (3dccd97) ### Other Changes -- Merge pull request #1057 from aws/sync-preview/merge-main-20260430-v2 (18fa2c9) -- chore: merge main into preview (7590650) -- Merge pull request #1058 from aws/release/v0.12.2 (68b25bf) -- chore: bump version to 0.12.2 (5ce4bdc) -- chore: merge main into preview (6e01e4e) -- fix(harness): add error handling for invoke_harness API call (#1056) (9a6a5d0) -- Merge pull request #1054 from aws/fix/remove-coauthor-reland (0afeaf5) -- refactor: move harness resources to .github/harness/ and use boto3 invoke_harness (ad2ba9b) -- Revert "refactor: move harness resources to .github/harness/ (#992)" (b8a90c9) -- refactor: move harness resources to .github/harness/ (#992) (aef3890) -- Merge pull request #1053 from aws/sync-preview/merge-main-20260430 (26b1c4c) -- chore: merge main into preview (9f2702a) - -## [1.0.0-preview.4] - 2026-04-29 - -### Added -- feat: add CloudWatch traces API for web UI (#997) (76b07aa) - -### Fixed -- fix: remove CONFIG_DIR exclusion from zip stage to preserve dependency agentcore/ packages (#1015) (d1e5241) - -### Other Changes -- Merge pull request #1040 from aws/sync-preview/merge-main-20260429-v5 (dd76d17) -- chore: merge main into preview (ecda10c) -- fix(ci): install uv in release workflow prepare steps (#1038) (#1039) (01b3b7d) -- fix(ci): install uv in release workflow prepare steps (#1038) (29ae8e5) -- Merge pull request #1037 from aws/sync-preview/merge-main-20260429-v3 (7f315c6) -- chore: merge main into preview (a951aed) -- fix(ci): move snapshot update after build in release workflow (#1036) (227c840) -- Merge pull request #1035 from aws/sync-preview/merge-main-20260429-v2 (451868a) -- chore: merge main into preview (c44d8c1) -- fix(ci): enable coverage collection in sharded unit test runs (#1034) (061b6b3) -- ci: run all PR and merge workflows on preview branch (#1023) (fc1cd56) -- fix(ci): update snapshots after CDK version sync in release workflow (#1033) (d3b412f) -- chore(deps): bump @opentelemetry/sdk-metrics from 2.6.1 to 2.7.0 (#1030) (ad59fc0) -- chore(deps-dev): bump secretlint from 12.2.0 to 12.3.1 (#1029) (36755e9) -- chore(deps-dev): bump @secretlint/secretlint-rule-preset-recommend (#1028) (56a6d4c) -- chore(deps): bump @opentelemetry/resources from 2.6.1 to 2.7.0 (#1026) (ad482cf) -- chore(deps): bump the aws-cdk group with 2 updates (#1025) (1686e4d) -- chore(deps): bump the aws-sdk group with 14 updates (#1024) (1fc366c) -- Merge pull request #1018 from aws/sync-preview/real-merge-main-20260429 (8c4d6eb) -- chore: merge main into preview (553a520) -- sync-preview: merge main into preview (#1017) (1c726d8) -- ci: add coordinated main + preview release workflow (#995) (7e8cae4) -- chore: merge main into preview (#1013) (3e7e15b) -- fix(import): use GatewayNameSchema for gateway import name validation (#1011) (29b6522) -- test: remove 44 render-only and framework-testing tests (#998) (13b34a3) -- chore: bump version to 0.12.0 (#1002) (dd9270d) - -## [1.0.0-preview.3] - 2026-04-28 - -### Added -- feat: add gateway import command with executionRoleArn support (#855) (2df1387) -- feat: runtime endpoint support in AgentCore CLI (#979) (41c59ef) -- feat: add project-name option to create (#969) (9b46fbb) -- feat: add project-name option to preview create (#970) (a19fc8f) -- feat: add agentcore-cli User-Agent to all API calls (#960) (398dc50) -- feat: add telemetry schemas and client (#941) (7c37fa6) -- feat: add GitHub Action for automated PR review via AgentCore Harness (#934) (a365bf5) - -### Fixed -- fix: duplicate header flash and help menu truncation (closes #895, closes #637) (#955) (e7b85c1) -- fix: show 'Computing diff changes...' step during deploy diff phase (#952) (a725d12) -- fix: display session ID after CLI invoke completes (#957) (51e4a8e) -- fix: lower eventExpiryDuration minimum from 7 to 3 days (closes #744) (#956) (8613657) -- fix: use pull_request_target for fork PR support (#958) (933bac8) -- fix: agentcore dev not working in windows (#951) (5271f55) -- fix: add TTY detection before TUI fallbacks to prevent agent/CI hangs (#949) (c30ed54) -- fix: allow code-based evaluators in online eval configs (#947) (3d2d671) -- fix: buffer streaming text to avoid per-token log lines in GitHub Actions (#946) (cb1e81a) - -### Other Changes -- fix(tests): fix 2 test failures on preview branch (8a4ea58) -- Merge main into preview (3fd6668) -- fix(e2e): add debug logging for gateway import CI failures (#1001) (8012d6c) -- fix(e2e): separate gateway import test and add PR-changed test detection (#999) (19b7d13) -- fix(import): remove resourceName/executionRoleArn co-variance refine (#996) (ad0ee58) -- test: speed up CI and fix mock cleanup gaps (#989) (51240ac) -- chore(deps-dev): bump esbuild from 0.27.4 to 0.28.0 (#862) (a778fb5) -- chore(deps-dev): bump hono from 4.12.12 to 4.12.14 (#868) (d64d2b8) -- chore(deps): bump the aws-sdk group across 1 directory with 14 updates (#912) (6061958) -- chore(deps-dev): bump @secretlint/secretlint-rule-preset-recommend (#914) (8ed1fe7) -- chore(deps-dev): bump @vitest/coverage-v8 from 4.1.2 to 4.1.5 (#915) (a74cab9) -- chore(deps-dev): bump secretlint from 11.4.1 to 12.2.0 (#916) (80fc145) -- chore(deps): bump postcss from 8.5.8 to 8.5.10 (#961) (760ac17) -- chore(deps-dev): bump aws-cdk-lib (#962) (8a264fb) -- ci: bump the github-actions group across 1 directory with 4 updates (#964) (9962c3e) -- test: configure git in browser tests workflow (#976) (17b5727) -- fix(import): remove experimental warning from import command (#977) (fdd6631) -- Remove inline container build from vended cdk-stack.ts (#954) (57ee733) -- feat(invoke): add --prompt-file and stdin support for long prompts (#974) (f6a3e99) -- test: split browser tests into its own job, fix logs path (#975) (acbfb9e) -- fix(invoke): auto-generate session ID for bearer-token invocations (#953) (343fedc) -- chore: bump version to 0.11.0 (#967) (f8dc490) -- test: add browser tests for agent inspector (#938) (7a4104d) -- chore: bump version to 0.10.0 (#944) (12275c3) +- refactor: consolidate cli-config into global-config (#802) (3aec000) +- ci: cut full e2e time in half via vitest sharding (#1016) (4daca83) ## [0.11.0] - 2026-04-24 diff --git a/README.md b/README.md index 27dc204ce..af7769bfe 100644 --- a/README.md +++ b/README.md @@ -110,15 +110,29 @@ agentcore invoke ### Evaluations -| Command | Description | -| -------------------- | --------------------------------------------- | -| `add evaluator` | Add a custom LLM-as-a-Judge evaluator | -| `add online-eval` | Add continuous evaluation for live traffic | -| `run eval` | Run on-demand evaluation against agent traces | -| `evals history` | View past eval run results | -| `pause online-eval` | Pause a deployed online eval config | -| `resume online-eval` | Resume a paused online eval config | -| `logs evals` | Stream or search online eval logs | +| Command | Description | +| ----------------------- | ------------------------------------------------ | +| `add evaluator` | Add a custom LLM-as-a-Judge evaluator | +| `add online-eval` | Add continuous evaluation for live traffic | +| `run eval` | Run on-demand evaluation against agent traces | +| `run batch-evaluation` | Run evaluators across all sessions [preview] | +| `run recommendation` | Optimize prompts and tool descriptions [preview] | +| `evals history` | View past eval run results | +| `pause online-eval` | Pause a deployed online eval config | +| `resume online-eval` | Resume a paused online eval config | +| `stop batch-evaluation` | Stop a running batch evaluation [preview] | +| `logs evals` | Stream or search online eval logs | + +### Config Bundles [preview] + +| Command | Description | +| ------------------- | ----------------------------------------- | +| `add config-bundle` | Add a versioned configuration bundle | +| `cb versions` | List version history for a bundle | +| `cb diff` | Diff two versions of a bundle | +| `cb create-branch` | Create a new branch on an existing bundle | + +> Create agents with `--with-config-bundle` to auto-wire config bundle support into the generated template. ### Utilities @@ -171,6 +185,9 @@ Projects use JSON schema files in the `agentcore/` directory: - [CLI Commands Reference](docs/commands.md) - Full command reference for scripting and CI/CD - [Configuration](docs/configuration.md) - Schema reference for config files - [Evaluations](docs/evals.md) - Evaluators, on-demand evals, and online monitoring +- [Batch Evaluation](docs/batch-evaluation.md) - Run evaluators across sessions at scale [preview] +- [Recommendations](docs/recommendations.md) - Optimize prompts and tool descriptions [preview] +- [Config Bundles](docs/config-bundles.md) - Versioned runtime configurations [preview] - [Frameworks](docs/frameworks.md) - Supported frameworks and model providers - [Gateway](docs/gateway.md) - Gateway setup, targets, and authentication - [Memory](docs/memory.md) - Memory strategies and sharing diff --git a/docs/TESTING.md b/docs/TESTING.md index 700ab3aae..9c70af6b3 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -415,6 +415,24 @@ Test configuration is in `vitest.config.ts` using Vitest projects: - Test timeout: 120 seconds - Hook timeout: 120 seconds +## Troubleshooting + +### `Cannot find module '@playwright/test'` + +Playwright is not installed. Run: + +```bash +npm install +``` + +### `browserType.launch: Executable doesn't exist` (Playwright browsers) + +Playwright browsers need to be downloaded after install. Run: + +```bash +npx playwright install chromium +``` + ## Integration Tests Integration tests require: diff --git a/docs/batch-evaluation.md b/docs/batch-evaluation.md new file mode 100644 index 000000000..ea13d3707 --- /dev/null +++ b/docs/batch-evaluation.md @@ -0,0 +1,143 @@ +# Batch Evaluation [preview] + +Batch evaluation runs evaluators across all agent sessions in CloudWatch, producing per-session scores and aggregate +metrics. Use it to measure agent quality over time, compare before/after prompt changes, or validate ground truth +expectations. + +## Quick Start + +```bash +# Run a single evaluator across all sessions +agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness + +# Multiple evaluators +agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness Builtin.Helpfulness Builtin.Faithfulness + +# JSON output for scripting +agentcore run batch-evaluation -r MyAgent -e Builtin.Helpfulness --json +``` + +## Available Evaluators + +Built-in evaluators provided by AgentCore: + +| Evaluator | What it measures | +| ----------------------------------- | ---------------------------------------------- | +| `Builtin.Correctness` | Factual accuracy of responses | +| `Builtin.Helpfulness` | How well responses address the user's goal | +| `Builtin.Faithfulness` | Grounding in tool results / provided context | +| `Builtin.GoalSuccessRate` | Whether the agent achieved the user's goal | +| `Builtin.ToolSelectionAccuracy` | Correct tool chosen for the task | +| `Builtin.Completeness` | Whether all parts of the request were handled | +| `Builtin.TrajectoryExactOrderMatch` | Tool call sequence matches expected trajectory | + +Custom evaluators defined in your project (via `agentcore add evaluator`) can also be used. + +## Filtering Sessions + +### By time window + +```bash +# Only sessions from the last 3 days +agentcore run batch-evaluation -r MyAgent -e Builtin.Helpfulness --lookback-days 3 +``` + +### By session ID + +```bash +agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness -s +``` + +## Ground Truth + +Provide expected responses, assertions, or expected tool trajectories for specific sessions: + +```bash +agentcore run batch-evaluation \ + -r MyAgent \ + -e Builtin.Correctness Builtin.GoalSuccessRate \ + -s \ + --ground-truth ./ground_truth.json +``` + +### Ground truth file format + +```json +[ + { + "sessionId": "", + "groundTruth": { + "inline": { + "assertions": [{ "text": "Agent should use the lookup_order tool" }], + "expectedTrajectory": { + "toolNames": ["lookup_order"] + }, + "turns": [ + { + "input": "What's the status of order ORD-1001?", + "expectedResponse": { "text": "Order ORD-1001 has been delivered" } + } + ] + } + } + } +] +``` + +All fields inside `inline` are optional — include only what's relevant: + +- `assertions` — free-text expectations evaluated by `Builtin.GoalSuccessRate` +- `expectedTrajectory` — tool call sequence evaluated by `Builtin.TrajectoryExactOrderMatch` +- `turns` — input/expected-response pairs evaluated by `Builtin.Correctness` + +## Custom Name + +```bash +agentcore run batch-evaluation -r MyAgent -e Builtin.Helpfulness -n "weekly_quality_check" +``` + +Names must start with a letter and contain only letters, digits, and underscores (max 48 characters). + +## Stopping a Running Evaluation + +```bash +agentcore stop batch-evaluation -i +``` + +## Viewing Results + +### CLI output + +The CLI shows scores grouped by evaluator with average scores after the run completes. + +### Local history + +Results are saved in `.cli/eval-job-results/`. View past runs via the TUI: + +```bash +agentcore +# Navigate to: Evals → Batch Evaluation History +``` + +### JSON output + +```bash +agentcore run batch-evaluation -r MyAgent -e Builtin.Helpfulness --json +``` + +Returns `batchEvaluationId`, `evaluationResults` with `numberOfSessionsCompleted`, `evaluatorSummaries` with +per-evaluator `averageScore`. + +## TUI Wizard + +Run `agentcore` → Run → Batch Evaluation for a guided flow: + +1. Select agent +2. Multi-select evaluators +3. Set lookback days +4. Optionally select specific sessions +5. Optionally add ground truth +6. Name the run (optional) +7. Confirm and run + +The TUI shows real-time progress with elapsed time and step indicators. diff --git a/docs/config-bundles.md b/docs/config-bundles.md new file mode 100644 index 000000000..890ad7aaf --- /dev/null +++ b/docs/config-bundles.md @@ -0,0 +1,114 @@ +# Configuration Bundles [preview] + +Config bundles are versioned configurations that store your agent's runtime settings — system prompt, tool descriptions, +model parameters, or any custom keys. Instead of hardcoding values in your agent code, your agent reads its config at +invocation time from whichever bundle version is active. + +## Concepts + +| Concept | Description | +| ------------- | ----------------------------------------------------------------------------------- | +| **Bundle** | A named container for component configurations, stored in `agentcore.json` | +| **Version** | An immutable snapshot of a bundle's configuration, created on each deploy or update | +| **Branch** | A named lineage within a bundle (e.g. `mainline`, `experiment-1`) | +| **Component** | A runtime or gateway whose configuration is managed by the bundle | + +## Creating a Config Bundle + +### With agent creation + +Create an agent with a pre-wired config bundle that injects system prompt and tool descriptions at runtime: + +```bash +agentcore create --name MyProject --defaults --with-config-bundle +``` + +This creates a `{AgentName}Config` bundle with smart defaults and generates a template that uses +`BedrockAgentCoreContext.get_config_bundle()` to read config at runtime. + +### Standalone + +```bash +agentcore add config-bundle \ + --name MyBundle \ + --description "Production configuration" \ + --components '{"{{runtime:MyAgent}}": {"configuration": {"systemPrompt": "You are helpful.", "temperature": 0.7}}}' \ + --branch mainline \ + --commit-message "Initial config" \ + --json +``` + +The `{{runtime:MyAgent}}` placeholder resolves to the real runtime ARN at deploy time. + +### Via TUI + +Run `agentcore` → Add → select "Configuration Bundle", or select "Config bundle" in the Advanced Configuration step when +adding an agent. + +## Deploying + +```bash +agentcore deploy +``` + +On deploy, the CLI creates or updates the config bundle in the API and stores the bundle ID, ARN, and version ID in +`deployed-state.json`. + +## Managing Versions + +### List versions + +```bash +agentcore cb versions --bundle MyBundle +``` + +Shows version history grouped by branch with commit messages, timestamps, and parent lineage. + +### Diff two versions + +```bash +agentcore cb diff --bundle MyBundle --from --to +``` + +### Create a branch + +```bash +agentcore cb create-branch --bundle MyBundle --branch experiment-1 +``` + +Creates a new branch from the latest version (or a specific version with `--from`). + +## Updating Without Redeploying Code + +Edit the `systemPrompt` or other fields in `agentcore.json` under `configBundles`, then: + +```bash +agentcore deploy +``` + +A new version is created in the API. The next invocation picks up the new config automatically — no code changes needed. + +## How It Works at Runtime + +When you invoke an agent with an associated config bundle, the CLI passes the bundle ARN and version as W3C baggage +headers. The SDK's `BedrockAgentCoreContext.get_config_bundle()` reads the baggage, fetches the config from the API +(cached per version), and makes it available to your agent code. + +The generated template uses a `ConfigBundleHook` (Strands) or `ConfigBundleCallback` (LangGraph) to inject the system +prompt and tool descriptions before each invocation. + +## Bundle Name in agentcore.json + +The CLI prefixes your bundle name with the project name when creating it in the API (e.g. `MyProject` + `MyBundle` → +`MyProjectMyBundle`). You always use the local name (`MyBundle`) in CLI commands — the CLI resolves the prefix +automatically. + +## JSON Output + +All commands support `--json` for scripting: + +```bash +agentcore cb versions --bundle MyBundle --json +agentcore cb diff --bundle MyBundle --from v1 --to v2 --json +agentcore cb create-branch --bundle MyBundle --branch exp-1 --json +``` diff --git a/docs/recommendations.md b/docs/recommendations.md new file mode 100644 index 000000000..c5a5c4ac3 --- /dev/null +++ b/docs/recommendations.md @@ -0,0 +1,158 @@ +# Recommendations [preview] + +Recommendations optimize your agent's system prompt or tool descriptions using historical traces as signal. The +recommendation service analyzes how your agent performed, then produces an improved version scored by an evaluator. + +## Quick Start + +```bash +# Optimize a system prompt (inline) +agentcore run recommendation \ + -r MyAgent \ + -e Builtin.Helpfulness \ + --type system-prompt \ + --inline "You are a helpful assistant." + +# Optimize tool descriptions +agentcore run recommendation \ + -r MyAgent \ + --type tool-description \ + --tools "search:Searches the web" "calc:Does math" +``` + +## System Prompt Recommendations + +### From inline text + +```bash +agentcore run recommendation \ + -r MyAgent \ + -e Builtin.Helpfulness \ + --type system-prompt \ + --inline "You are a helpful assistant. Use tools when appropriate." +``` + +### From a file + +```bash +agentcore run recommendation \ + -r MyAgent \ + -e Builtin.Helpfulness \ + --type system-prompt \ + --prompt-file ./system-prompt.txt +``` + +### From a config bundle + +Read the current prompt from a deployed config bundle, optimize it, and write the result back as a new bundle version: + +```bash +agentcore run recommendation \ + -r MyAgent \ + -e Builtin.Helpfulness \ + --type system-prompt \ + --bundle-name MyBundle \ + --bundle-version \ + --system-prompt-json-path systemPrompt +``` + +The `--system-prompt-json-path` is the field name under `configuration` in the bundle (e.g. `systemPrompt`). The CLI +resolves it to the full path automatically using the component ARN from your deployed state. + +> **JSONPath format:** The API uses dot notation (`$.{ARN}.configuration.{field}`), not standard JSONPath bracket +> notation. You don't need to worry about this — just pass the short field name and the CLI handles the resolution. If +> you need the full path for direct API calls, use `$.arn:aws:...:runtime/MyAgent.configuration.systemPrompt` (no +> brackets, no quotes around the ARN). + +On success, the recommendation writes a new config bundle version with the optimized prompt. The agent picks it up on +the next invocation — no redeploy needed. + +## Tool Description Recommendations + +```bash +agentcore run recommendation \ + -r MyAgent \ + --type tool-description \ + --tools "add_numbers:Return the sum of two numbers" "search:Searches the web" +``` + +Returns optimized tool descriptions for each tool. + +## Trace Source + +By default, the recommendation service fetches traces from CloudWatch using a 7-day lookback. Customize with: + +```bash +# Custom lookback window +agentcore run recommendation ... --lookback 14 + +# Specific sessions only +agentcore run recommendation ... --session-id + +# From a local spans file (OTEL format) +agentcore run recommendation ... --spans-file ./traces.json +``` + +## JSON Output + +```bash +agentcore run recommendation -r MyAgent -e Builtin.Helpfulness --type system-prompt --inline "..." --json +``` + +Returns `recommendationId`, `status`, and `result` with `systemPromptRecommendationResult.recommendedSystemPrompt` or +`toolDescriptionRecommendationResult.tools`. + +When using `--bundle-name`, the result also includes `configurationBundle.versionId` — the new bundle version. + +## End-to-End Workflow: Recommendation → Config Bundle → Invoke + +1. Create agent with config bundle: + + ```bash + agentcore create --name MyAgent --defaults --with-config-bundle + agentcore deploy + ``` + +2. Invoke a few times to generate traces: + + ```bash + agentcore invoke --prompt "What is 2 + 3?" + agentcore invoke --prompt "Tell me about Paris" + ``` + +3. Run recommendation from config bundle: + + ```bash + agentcore run recommendation \ + -r MyAgent -e Builtin.Helpfulness --type system-prompt \ + --bundle-name MyAgentConfig --bundle-version \ + --system-prompt-json-path systemPrompt + ``` + +4. Invoke again — the agent uses the optimized prompt without code changes: + ```bash + agentcore invoke --prompt "Who are you?" + ``` + +## Viewing History + +Results are saved in `.cli/recommendations/`. View past runs via the TUI: + +```bash +agentcore +# Navigate to: Recommendations → History +``` + +## TUI Wizard + +Run `agentcore` → Run → Recommendation for a guided flow: + +1. Select recommendation type (system prompt or tool description) +2. Select agent +3. Select evaluator (system prompt only) +4. Choose input source (inline, file, or config bundle) +5. Choose trace source (CloudWatch or sessions) +6. Confirm and run + +The TUI shows real-time progress and displays the recommended changes when complete, with an option to apply config +bundle updates. diff --git a/e2e-tests/ab-test-config-bundle.test.ts b/e2e-tests/ab-test-config-bundle.test.ts new file mode 100644 index 000000000..9c18b2f31 --- /dev/null +++ b/e2e-tests/ab-test-config-bundle.test.ts @@ -0,0 +1,211 @@ +import { parseJsonOutput, retry } from '../src/test-utils/index.js'; +import { + baseCanRun, + hasAws, + installCdkTarball, + runAgentCoreCLI, + teardownE2EProject, + writeAwsTargets, +} from './e2e-helper.js'; +import { randomUUID } from 'node:crypto'; +import { mkdir, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const canRun = baseCanRun && hasAws; + +describe.sequential('e2e: config-bundle AB test lifecycle', () => { + let testDir: string; + let projectPath: string; + const agentName = `E2eCfgAB${String(Date.now()).slice(-8)}`; + const abTestName = 'ConfigBundleABTest'; + const evalName = 'BundleEvaluator'; + const onlineEvalName = 'BundleOnlineEval'; + + beforeAll(async () => { + if (!canRun) return; + + testDir = join(tmpdir(), `agentcore-e2e-cfg-ab-${randomUUID()}`); + await mkdir(testDir, { recursive: true }); + + const result = await runAgentCoreCLI( + [ + 'create', + '--name', + agentName, + '--language', + 'Python', + '--framework', + 'Strands', + '--model-provider', + 'Bedrock', + '--memory', + 'none', + '--json', + ], + testDir + ); + expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0); + projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath; + + await writeAwsTargets(projectPath); + installCdkTarball(projectPath); + }, 300000); + + afterAll(async () => { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); + }, 600000); + + const run = (args: string[]) => runAgentCoreCLI(args, projectPath); + + it.skipIf(!canRun)( + 'adds evaluator and online eval config', + async () => { + let result = await run([ + 'add', + 'evaluator', + '--name', + evalName, + '--level', + 'SESSION', + '--model', + 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', + '--instructions', + 'Evaluate session quality. Context: {context}', + '--json', + ]); + expect(result.exitCode, `Add evaluator failed: ${result.stdout}`).toBe(0); + + result = await run([ + 'add', + 'online-eval', + '--name', + onlineEvalName, + '--runtime', + agentName, + '--evaluator', + evalName, + '--sampling-rate', + '100', + '--enable-on-create', + '--json', + ]); + expect(result.exitCode, `Add online-eval failed: ${result.stdout}`).toBe(0); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'deploys agent before AB test (needed for config bundles)', + async () => { + await retry( + async () => { + const result = await run(['deploy', '--yes', '--json']); + if (result.exitCode !== 0) { + console.log('Initial deploy stdout:', result.stdout); + console.log('Initial deploy stderr:', result.stderr); + } + expect(result.exitCode, `Initial deploy failed`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 2, + 30000 + ); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'adds config-bundle AB test with 90/10 split', + async () => { + // Config bundles reference ARNs from deployed resources. + // Use placeholder bundle ARNs — the deploy step will validate or create them. + const controlBundle = `arn:aws:bedrock-agentcore:ap-southeast-2:998846730471:config-bundle/control-v1`; + const treatmentBundle = `arn:aws:bedrock-agentcore:ap-southeast-2:998846730471:config-bundle/treatment-v1`; + + const result = await run([ + 'add', + 'ab-test', + '--mode', + 'config-bundle', + '--name', + abTestName, + '--runtime', + agentName, + '--control-bundle', + controlBundle, + '--control-version', + 'v1', + '--treatment-bundle', + treatmentBundle, + '--treatment-version', + 'v1', + '--control-weight', + '90', + '--treatment-weight', + '10', + '--online-eval', + onlineEvalName, + '--json', + ]); + expect(result.exitCode, `Add AB test failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean; abTestName: string }; + expect(json.success).toBe(true); + expect(json.abTestName).toBe(abTestName); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'status shows AB test in config', + async () => { + const result = await run(['status', '--json']); + expect(result.exitCode, `Status failed: ${result.stderr}`).toBe(0); + + const json = parseJsonOutput(result.stdout) as { + success: boolean; + resources: { resourceType: string; name: string; deploymentState: string }[]; + }; + expect(json.success).toBe(true); + + // Agent should be deployed + const agent = json.resources.find(r => r.resourceType === 'agent' && r.name === agentName); + expect(agent, `Agent "${agentName}" should appear in status`).toBeDefined(); + expect(agent!.deploymentState).toBe('deployed'); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'invokes the deployed agent', + async () => { + await retry( + async () => { + const result = await run(['invoke', '--prompt', 'Say hello', '--runtime', agentName, '--json']); + expect(result.exitCode, `Invoke failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 3, + 15000 + ); + }, + 180000 + ); + + it.skipIf(!canRun)( + 'removes config-bundle AB test', + async () => { + const result = await run(['remove', 'ab-test', '--name', abTestName, '--json']); + expect(result.exitCode, `Remove failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + }, + 60000 + ); +}); diff --git a/e2e-tests/ab-test-target-based.test.ts b/e2e-tests/ab-test-target-based.test.ts new file mode 100644 index 000000000..ac687e4fb --- /dev/null +++ b/e2e-tests/ab-test-target-based.test.ts @@ -0,0 +1,301 @@ +import { parseJsonOutput, retry } from '../src/test-utils/index.js'; +import { + baseCanRun, + hasAws, + installCdkTarball, + runAgentCoreCLI, + teardownE2EProject, + writeAwsTargets, +} from './e2e-helper.js'; +import { randomUUID } from 'node:crypto'; +import { mkdir, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const canRun = baseCanRun && hasAws; + +describe.sequential('e2e: target-based AB test lifecycle', () => { + let testDir: string; + let projectPath: string; + const agentName = `E2eTargAB${String(Date.now()).slice(-8)}`; + const abTestName = 'TargetABTest'; + const evalName = 'ABTestEvaluator'; + const controlEvalName = 'ControlEvalConfig'; + const treatmentEvalName = 'TreatmentEvalConfig'; + + beforeAll(async () => { + if (!canRun) return; + + testDir = join(tmpdir(), `agentcore-e2e-target-ab-${randomUUID()}`); + await mkdir(testDir, { recursive: true }); + + const result = await runAgentCoreCLI( + [ + 'create', + '--name', + agentName, + '--language', + 'Python', + '--framework', + 'Strands', + '--model-provider', + 'Bedrock', + '--memory', + 'none', + '--json', + ], + testDir + ); + expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0); + projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath; + + await writeAwsTargets(projectPath); + installCdkTarball(projectPath); + }, 300000); + + afterAll(async () => { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); + }, 600000); + + const run = (args: string[]) => runAgentCoreCLI(args, projectPath); + + it.skipIf(!canRun)( + 'adds runtime endpoints (prod v1, staging v1)', + async () => { + let result = await run([ + 'add', + 'runtime-endpoint', + '--runtime', + agentName, + '--endpoint', + 'prod', + '--version', + '1', + '--json', + ]); + expect(result.exitCode, `Add prod endpoint failed: ${result.stdout}`).toBe(0); + + result = await run([ + 'add', + 'runtime-endpoint', + '--runtime', + agentName, + '--endpoint', + 'staging', + '--version', + '1', + '--json', + ]); + expect(result.exitCode, `Add staging endpoint failed: ${result.stdout}`).toBe(0); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'adds evaluator and per-variant online eval configs', + async () => { + let result = await run([ + 'add', + 'evaluator', + '--name', + evalName, + '--level', + 'SESSION', + '--model', + 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', + '--instructions', + 'Evaluate quality. Context: {context}', + '--json', + ]); + expect(result.exitCode, `Add evaluator failed: ${result.stdout}`).toBe(0); + + result = await run([ + 'add', + 'online-eval', + '--name', + controlEvalName, + '--runtime', + agentName, + '--evaluator', + evalName, + '--sampling-rate', + '100', + '--endpoint', + 'prod', + '--enable-on-create', + '--json', + ]); + expect(result.exitCode, `Add control online-eval failed: ${result.stdout}`).toBe(0); + + result = await run([ + 'add', + 'online-eval', + '--name', + treatmentEvalName, + '--runtime', + agentName, + '--evaluator', + evalName, + '--sampling-rate', + '100', + '--endpoint', + 'staging', + '--enable-on-create', + '--json', + ]); + expect(result.exitCode, `Add treatment online-eval failed: ${result.stdout}`).toBe(0); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'adds target-based AB test with 90/10 split', + async () => { + const result = await run([ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + abTestName, + '--runtime', + agentName, + '--gateway', + `${abTestName}-gw`, + '--control-endpoint', + 'prod', + '--treatment-endpoint', + 'staging', + '--control-weight', + '90', + '--treatment-weight', + '10', + '--control-online-eval', + controlEvalName, + '--treatment-online-eval', + treatmentEvalName, + '--enable', + '--json', + ]); + expect(result.exitCode, `Add AB test failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean; abTestName: string }; + expect(json.success).toBe(true); + expect(json.abTestName).toBe(abTestName); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'deploys project (creates gateway, targets, AB test, eval configs)', + async () => { + await retry( + async () => { + const result = await run(['deploy', '--yes', '--json']); + if (result.exitCode !== 0) { + console.log('Deploy stdout:', result.stdout); + console.log('Deploy stderr:', result.stderr); + } + expect(result.exitCode, `Deploy failed (stderr: ${result.stderr})`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 2, + 30000 + ); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'status shows all resources deployed', + async () => { + await retry( + async () => { + const result = await run(['status', '--json']); + expect(result.exitCode, `Status failed: ${result.stderr}`).toBe(0); + + const json = parseJsonOutput(result.stdout) as { + success: boolean; + resources: { resourceType: string; name: string; deploymentState: string }[]; + }; + expect(json.success).toBe(true); + + // Agent should be deployed + const agent = json.resources.find(r => r.resourceType === 'agent' && r.name === agentName); + expect(agent, `Agent "${agentName}" should appear in status`).toBeDefined(); + expect(agent!.deploymentState).toBe('deployed'); + + // Gateway should be deployed + const gateway = json.resources.find(r => r.resourceType === 'http-gateway' && r.name === `${abTestName}-gw`); + expect(gateway, 'HTTP gateway should appear in status').toBeDefined(); + }, + 3, + 15000 + ); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'pauses AB test', + async () => { + await retry( + async () => { + const result = await run(['pause', 'ab-test', abTestName, '--json']); + expect(result.exitCode, `Pause failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('executionStatus', 'PAUSED'); + }, + 3, + 10000 + ); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'resumes AB test', + async () => { + await retry( + async () => { + const result = await run(['resume', 'ab-test', abTestName, '--json']); + expect(result.exitCode, `Resume failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('executionStatus', 'RUNNING'); + }, + 3, + 10000 + ); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'promotes AB test (updates agentcore.json)', + async () => { + const result = await run(['promote', 'ab-test', abTestName, '--json']); + expect(result.exitCode, `Promote failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('promoted', true); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'removes AB test from config', + async () => { + const result = await run(['remove', 'ab-test', '--name', abTestName, '--delete-gateway', '--json']); + expect(result.exitCode, `Remove failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + }, + 60000 + ); +}); diff --git a/e2e-tests/byo-custom-jwt.test.ts b/e2e-tests/byo-custom-jwt.test.ts index b7391a522..64e534e20 100644 --- a/e2e-tests/byo-custom-jwt.test.ts +++ b/e2e-tests/byo-custom-jwt.test.ts @@ -48,7 +48,7 @@ const region = process.env.AWS_REGION ?? 'us-east-1'; * Run the local CLI build without skipping install (needed for deploy). */ function runLocalCLI(args: string[], cwd: string): Promise { - return runCLI(args, cwd, /* skipInstall */ false); + return runCLI(args, cwd, { skipInstall: false }); } describe.sequential('e2e: BYO agent with CUSTOM_JWT auth', () => { diff --git a/e2e-tests/config-bundle-eval-rec.test.ts b/e2e-tests/config-bundle-eval-rec.test.ts new file mode 100644 index 000000000..8151ac586 --- /dev/null +++ b/e2e-tests/config-bundle-eval-rec.test.ts @@ -0,0 +1,633 @@ +/** + * E2E tests for Config Bundles, Batch Evaluation, and Recommendations. + * + * Flow: create project → add config bundle → add evaluator → deploy → + * invoke → test config-bundle CLI → run batch-evaluation → run recommendation + * + * Prerequisites: + * - AWS credentials + * - npm, git, uv installed + */ +import { parseJsonOutput, retry } from '../src/test-utils/index.js'; +import { + baseCanRun, + hasAws, + installCdkTarball, + runAgentCoreCLI, + teardownE2EProject, + writeAwsTargets, +} from './e2e-helper.js'; +import { randomUUID } from 'node:crypto'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const canRun = baseCanRun && hasAws; + +describe.sequential('e2e: config bundles, batch evaluation, and recommendations', () => { + let testDir: string; + let projectPath: string; + const agentName = `E2eCbEr${String(Date.now()).slice(-8)}`; + const bundleName = 'E2eTestBundle'; + const evalName = 'E2eCustomEval'; + + beforeAll(async () => { + if (!canRun) return; + + testDir = join(tmpdir(), `agentcore-e2e-cb-eval-rec-${randomUUID()}`); + await mkdir(testDir, { recursive: true }); + + // Create project with agent + const result = await runAgentCoreCLI( + [ + 'create', + '--name', + agentName, + '--language', + 'Python', + '--framework', + 'Strands', + '--model-provider', + 'Bedrock', + '--memory', + 'none', + '--json', + ], + testDir + ); + expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0); + projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath; + + await writeAwsTargets(projectPath); + installCdkTarball(projectPath); + }, 300000); + + afterAll(async () => { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); + }, 600000); + + const run = (args: string[]) => runAgentCoreCLI(args, projectPath); + + // ════════════════════════════════════════════════════════════════════════ + // Config Bundle — add to project + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'adds a config bundle to the project', + async () => { + const components = JSON.stringify({ + [`{{runtime:${agentName}}}`]: { + configuration: { + systemPrompt: 'You are a helpful e2e test assistant.', + temperature: 0.7, + }, + }, + }); + + const result = await run([ + 'add', + 'config-bundle', + '--name', + bundleName, + '--description', + 'E2E test config bundle', + '--components', + components, + '--branch', + 'mainline', + '--commit-message', + 'Initial e2e bundle', + '--json', + ]); + + expect(result.exitCode, `Add config-bundle failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(true); + expect(json.bundleName).toBe(bundleName); + }, + 60000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Evaluator — add to project + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'adds a custom evaluator to the project', + async () => { + const result = await run([ + 'add', + 'evaluator', + '--name', + evalName, + '--level', + 'SESSION', + '--model', + 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', + '--instructions', + 'Evaluate the overall quality of this session. Context: {context}', + '--json', + ]); + + expect(result.exitCode, `Add evaluator failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(true); + expect(json.evaluatorName).toBe(evalName); + }, + 60000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Deploy + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'deploys the project with config bundle and evaluator', + async () => { + const result = await run(['deploy', '--yes', '--json']); + + if (result.exitCode !== 0) { + console.log('Deploy stdout:', result.stdout); + console.log('Deploy stderr:', result.stderr); + } + + expect(result.exitCode, 'Deploy failed').toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 600000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Invoke — generate traces for evaluation + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'invokes the deployed agent to generate traces', + async () => { + await retry( + async () => { + const result = await run(['invoke', '--prompt', 'Say hello', '--runtime', agentName, '--json']); + expect(result.exitCode, `Invoke failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 3, + 15000 + ); + }, + 180000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Status — verify config bundle and evaluator deployed + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'status shows deployed config bundle and evaluator', + async () => { + const result = await run(['status', '--json']); + + expect(result.exitCode, `Status failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { + success: boolean; + resources: { resourceType: string; name: string; deploymentState: string }[]; + }; + expect(json.success).toBe(true); + + const bundle = json.resources.find(r => r.resourceType === 'configBundle' && r.name === bundleName); + expect(bundle, `Config bundle "${bundleName}" should appear in status`).toBeDefined(); + + const evaluator = json.resources.find(r => r.resourceType === 'evaluator' && r.name === evalName); + expect(evaluator, `Evaluator "${evalName}" should appear in status`).toBeDefined(); + }, + 120000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Config Bundle — versions and diff via CLI + // ════════════════════════════════════════════════════════════════════════ + + let initialVersionId: string; + + it.skipIf(!canRun)( + 'config-bundle versions lists the deployed version', + async () => { + const result = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + + expect(result.exitCode, `cb versions failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { + versions: { versionId: string; lineageMetadata?: { branchName?: string; commitMessage?: string } }[]; + bundleName: string; + }; + + expect(json.bundleName).toBe(bundleName); + expect(json.versions.length).toBeGreaterThanOrEqual(1); + initialVersionId = json.versions[0]!.versionId; + expect(initialVersionId).toBeTruthy(); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'config-bundle versions supports --branch filter', + async () => { + const result = await run(['config-bundle', 'versions', '--bundle', bundleName, '--branch', 'mainline', '--json']); + + expect(result.exitCode, `cb versions --branch failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { + versions: { versionId: string; lineageMetadata?: { branchName?: string } }[]; + }; + + for (const v of json.versions) { + expect(v.lineageMetadata?.branchName).toBe('mainline'); + } + }, + 120000 + ); + + it.skipIf(!canRun)( + 'updates config bundle by redeploying with changed components', + async () => { + // Update the config bundle in agentcore.json with new component values + const components = JSON.stringify({ + [`{{runtime:${agentName}}}`]: { + configuration: { + systemPrompt: 'You are an UPDATED e2e test assistant.', + temperature: 0.9, + maxTokens: 2048, + }, + }, + }); + + // Remove old bundle, add new one with same name but different components + let result = await run(['remove', 'config-bundle', '--name', bundleName, '--json']); + expect(result.exitCode, `Remove config-bundle failed: ${result.stdout}`).toBe(0); + + result = await run([ + 'add', + 'config-bundle', + '--name', + bundleName, + '--description', + 'E2E test config bundle - updated', + '--components', + components, + '--branch', + 'mainline', + '--commit-message', + 'Update system prompt and add maxTokens', + '--json', + ]); + expect(result.exitCode, `Re-add config-bundle failed: ${result.stdout}`).toBe(0); + + // Redeploy to push the updated bundle + result = await run(['deploy', '--yes', '--json']); + expect(result.exitCode, `Redeploy failed: ${result.stdout}`).toBe(0); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'config-bundle versions shows both versions after update', + async () => { + const result = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + + expect(result.exitCode, `cb versions failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { + versions: { versionId: string }[]; + }; + + expect(json.versions.length).toBeGreaterThanOrEqual(2); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'config-bundle diff shows changes between versions', + async () => { + // Get the latest two versions + const versionsResult = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + const versionsJson = parseJsonOutput(versionsResult.stdout) as { + versions: { versionId: string }[]; + }; + + expect(versionsJson.versions.length).toBeGreaterThanOrEqual(2); + const newestVersion = versionsJson.versions[0]!.versionId; + const oldestVersion = versionsJson.versions[versionsJson.versions.length - 1]!.versionId; + + const result = await run([ + 'config-bundle', + 'diff', + '--bundle', + bundleName, + '--from', + oldestVersion, + '--to', + newestVersion, + '--json', + ]); + + expect(result.exitCode, `cb diff failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('fromVersion'); + expect(json).toHaveProperty('toVersion'); + expect(json.diffs).toBeInstanceOf(Array); + expect((json.diffs as unknown[]).length).toBeGreaterThan(0); + }, + 120000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Batch Evaluation — run through CLI + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'runs batch evaluation with Builtin evaluator via CLI', + async () => { + await retry( + async () => { + const result = await run([ + 'run', + 'batch-evaluation', + '--runtime', + agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--lookback-days', + '1', + '--json', + ]); + + expect(result.exitCode, `batch-evaluation failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe( + 0 + ); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('batchEvaluateId'); + expect(json.status).toBeDefined(); + expect(json.status).not.toBe('FAILED'); + }, + 6, + 15000 + ); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'runs batch evaluation with ground truth file', + async () => { + // Invoke to get a real session ID for ground truth + const invokeResult = await run(['invoke', '--prompt', 'What is 2+2?', '--runtime', agentName, '--json']); + expect(invokeResult.exitCode).toBe(0); + const invokeJson = parseJsonOutput(invokeResult.stdout) as { sessionId: string }; + expect(invokeJson.sessionId).toBeTruthy(); + + // Create ground truth file using the real session ID + const gtData = [ + { + sessionId: invokeJson.sessionId, + groundTruth: { + inline: { + assertions: [{ text: 'Agent should provide a numerical answer' }], + }, + }, + }, + ]; + const gtPath = join(projectPath, 'ground-truth.json'); + await writeFile(gtPath, JSON.stringify(gtData)); + + await retry( + async () => { + const result = await run([ + 'run', + 'batch-evaluation', + '--runtime', + agentName, + '--evaluator', + 'Builtin.Correctness', + '--ground-truth', + gtPath, + '--lookback-days', + '1', + '--json', + ]); + + expect(result.exitCode, `batch-evaluation with GT failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + }, + 6, + 15000 + ); + }, + 600000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // On-demand Eval — run eval via CLI (existing pattern) + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'runs on-demand eval with Builtin evaluator via CLI', + async () => { + // Retries needed: traces from invoke take time to propagate to CloudWatch + await retry( + async () => { + const result = await run([ + 'run', + 'eval', + '--runtime', + agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--lookback', + '1', + '--json', + ]); + + expect(result.exitCode, `run eval failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('run'); + expect(json).toHaveProperty('filePath'); + }, + 10, + 15000 + ); + }, + 300000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Recommendation — run through CLI + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'runs system prompt recommendation with inline content via CLI', + async () => { + await retry( + async () => { + const result = await run([ + 'run', + 'recommendation', + '--runtime', + agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--inline', + 'You are a helpful assistant for testing.', + '--lookback', + '1', + '--json', + ]); + + expect(result.exitCode, `recommendation failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('recommendationId'); + expect(json.result).toBeDefined(); + expect(json.result).not.toBe(''); + expect(json.result).not.toBeNull(); + }, + 6, + 30000 + ); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'runs system prompt recommendation with prompt file via CLI', + async () => { + const promptFile = join(projectPath, 'system-prompt.txt'); + await writeFile(promptFile, 'You are a helpful customer support assistant. Answer politely.'); + + await retry( + async () => { + const result = await run([ + 'run', + 'recommendation', + '--runtime', + agentName, + '--evaluator', + 'Builtin.Helpfulness', + '--prompt-file', + promptFile, + '--lookback', + '1', + '--json', + ]); + + expect(result.exitCode, `recommendation from file failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('recommendationId'); + }, + 6, + 30000 + ); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'runs tool description recommendation via CLI', + async () => { + await retry( + async () => { + const result = await run([ + 'run', + 'recommendation', + '--type', + 'tool-description', + '--runtime', + agentName, + '--tools', + 'search:Searches the web for information', + '--tools', + 'calculator:Performs mathematical calculations', + '--lookback', + '1', + '--json', + ]); + + expect(result.exitCode, `tool-desc recommendation failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('recommendationId'); + }, + 6, + 30000 + ); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'runs recommendation with config bundle source via CLI', + async () => { + // Get the latest version ID for the bundle + const versionsResult = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + const versionsJson = parseJsonOutput(versionsResult.stdout) as { + versions: { versionId: string }[]; + }; + const latestVersion = versionsJson.versions[0]!.versionId; + + await retry( + async () => { + const result = await run([ + 'run', + 'recommendation', + '--runtime', + agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--bundle-name', + bundleName, + '--bundle-version', + latestVersion, + '--system-prompt-json-path', + 'systemPrompt', + '--lookback', + '1', + '--json', + ]); + + expect(result.exitCode, `bundle recommendation failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json).toHaveProperty('success', true); + expect(json).toHaveProperty('recommendationId'); + }, + 6, + 30000 + ); + }, + 600000 + ); + + // ════════════════════════════════════════════════════════════════════════ + // Cleanup — remove config bundle from project + // ════════════════════════════════════════════════════════════════════════ + + it.skipIf(!canRun)( + 'removes config bundle from project and redeploys (reconciliation deletes it)', + async () => { + let result = await run(['remove', 'config-bundle', '--name', bundleName, '--json']); + expect(result.exitCode, `Remove config-bundle failed: ${result.stdout}`).toBe(0); + + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(true); + + // Redeploy triggers reconciliation (orphaned bundle deleted server-side) + result = await run(['deploy', '--yes', '--json']); + expect(result.exitCode, `Final deploy failed: ${result.stdout}`).toBe(0); + }, + 600000 + ); +}); diff --git a/e2e-tests/fixtures/import/cleanup_resources.py b/e2e-tests/fixtures/import/cleanup_resources.py index 0728b711e..120ced18f 100644 --- a/e2e-tests/fixtures/import/cleanup_resources.py +++ b/e2e-tests/fixtures/import/cleanup_resources.py @@ -51,10 +51,6 @@ def main(): rid = val.get("id") if not rid: continue - # Gateway targets are deleted automatically when the parent gateway is deleted - if "gateway" in key and "target" in key: - print(f"Skipping {key} (deleted with parent gateway)") - continue try: if "runtime" in key: client.delete_agent_runtime(agentRuntimeId=rid) @@ -62,8 +58,6 @@ def main(): client.delete_memory(memoryId=rid) elif "evaluator" in key: client.delete_evaluator(evaluatorId=rid) - elif "gateway" in key: - client.delete_gateway(gatewayIdentifier=rid) print(f"Deleted {key}: {rid}") except Exception as e: print(f"Could not delete {key} ({rid}): {e}") diff --git a/e2e-tests/fixtures/import/common.py b/e2e-tests/fixtures/import/common.py index 369ec0bb0..3573ed519 100644 --- a/e2e-tests/fixtures/import/common.py +++ b/e2e-tests/fixtures/import/common.py @@ -2,15 +2,20 @@ import json import os import time +import uuid import zipfile import tempfile import boto3 REGION = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") or "us-east-1" +RESOURCE_SUFFIX = os.environ.get("RESOURCE_SUFFIX", "") +# Unique suffix for resource names — avoids collisions across parallel CI shards. +NAME_SUFFIX = RESOURCE_SUFFIX or uuid.uuid4().hex[:12] SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) APP_DIR = os.path.join(SCRIPT_DIR, "app") -RESOURCES_FILE = os.path.join(SCRIPT_DIR, "bugbash-resources.json") +_resources_name = f"bugbash-resources-{RESOURCE_SUFFIX}.json" if RESOURCE_SUFFIX else "bugbash-resources.json" +RESOURCES_FILE = os.path.join(SCRIPT_DIR, _resources_name) INLINE_POLICY_NAME = "bugbash-agentcore-permissions" @@ -35,6 +40,8 @@ def upload_code(prefix="bugbash"): """Zip APP_DIR and upload to S3. Returns (bucket, s3_key).""" bucket_name = get_code_bucket() s3 = boto3.client("s3", region_name=REGION) + if RESOURCE_SUFFIX: + prefix = f"{prefix}-{RESOURCE_SUFFIX}" # Create zip of app directory with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp: diff --git a/e2e-tests/fixtures/import/setup_evaluator.py b/e2e-tests/fixtures/import/setup_evaluator.py index d49787d0e..e4573da45 100644 --- a/e2e-tests/fixtures/import/setup_evaluator.py +++ b/e2e-tests/fixtures/import/setup_evaluator.py @@ -8,10 +8,10 @@ import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -import time from common import ( get_control_client, save_resource, tag_resource, wait_for_evaluator, print_import_command, + NAME_SUFFIX, ) DEFAULT_EVALUATOR_MODEL = os.environ.get("DEFAULT_EVALUATOR_MODEL", "us.anthropic.claude-sonnet-4-5-20250929-v1:0") @@ -19,8 +19,7 @@ def main(): client = get_control_client() - ts = int(time.time()) - evaluator_name = f"bugbash_eval_{ts}" + evaluator_name = f"bugbash_eval_{NAME_SUFFIX}" print(f"Creating evaluator: {evaluator_name}") resp = client.create_evaluator( diff --git a/e2e-tests/fixtures/import/setup_gateway.py b/e2e-tests/fixtures/import/setup_gateway.py index e190d0dfc..a846617aa 100644 --- a/e2e-tests/fixtures/import/setup_gateway.py +++ b/e2e-tests/fixtures/import/setup_gateway.py @@ -12,18 +12,17 @@ import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -import time from common import ( REGION, get_control_client, ensure_role, save_resource, tag_resource, wait_for_gateway, wait_for_gateway_target, + NAME_SUFFIX, ) def main(): role_arn = ensure_role() client = get_control_client() - ts = int(time.time()) - gateway_name = f"bugbashGw{ts}" + gateway_name = f"bugbashGw{NAME_SUFFIX}" # ------------------------------------------------------------------ # 1. Create gateway diff --git a/e2e-tests/fixtures/import/setup_memory_full.py b/e2e-tests/fixtures/import/setup_memory_full.py index 5df196524..277179cfb 100644 --- a/e2e-tests/fixtures/import/setup_memory_full.py +++ b/e2e-tests/fixtures/import/setup_memory_full.py @@ -8,22 +8,22 @@ import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -import time from common import ( ensure_role, get_control_client, wait_for_memory, save_resource, print_import_command, tag_resource, + NAME_SUFFIX, ) def main(): role_arn = ensure_role() client = get_control_client() - memory_name = f"bugbash_memory_{int(time.time())}" + memory_name = f"bugbash_memory_{NAME_SUFFIX}" print(f"Creating memory: {memory_name}") resp = client.create_memory( name=memory_name, - clientToken=f"bugbash-{int(time.time())}", + clientToken=f"bugbash-{NAME_SUFFIX}", eventExpiryDuration=30, memoryExecutionRoleArn=role_arn, memoryStrategies=[ diff --git a/e2e-tests/fixtures/import/setup_runtime_basic.py b/e2e-tests/fixtures/import/setup_runtime_basic.py index 65e1585a1..d29ddbd1c 100644 --- a/e2e-tests/fixtures/import/setup_runtime_basic.py +++ b/e2e-tests/fixtures/import/setup_runtime_basic.py @@ -7,20 +7,19 @@ import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -import time from common import ( ensure_role, get_control_client, wait_for_runtime, save_resource, print_import_command, upload_code, + NAME_SUFFIX, ) def main(): role_arn = ensure_role() client = get_control_client() - ts = int(time.time()) - runtime_name = f"bugbash_basic_{ts}" + runtime_name = f"bugbash_basic_{NAME_SUFFIX}" - bucket, s3_key = upload_code(f"bugbash-basic-{ts}") + bucket, s3_key = upload_code(f"bugbash-basic-{NAME_SUFFIX}") print(f"Creating basic runtime: {runtime_name}") resp = client.create_agent_runtime( diff --git a/e2e-tests/http-gateway-targets.test.ts b/e2e-tests/http-gateway-targets.test.ts new file mode 100644 index 000000000..c2bef22fb --- /dev/null +++ b/e2e-tests/http-gateway-targets.test.ts @@ -0,0 +1,228 @@ +import { parseJsonOutput, retry } from '../src/test-utils/index.js'; +import { + baseCanRun, + hasAws, + installCdkTarball, + runAgentCoreCLI, + teardownE2EProject, + writeAwsTargets, +} from './e2e-helper.js'; +import { randomUUID } from 'node:crypto'; +import { mkdir, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const canRun = baseCanRun && hasAws; + +describe.sequential('e2e: HTTP gateway with targets lifecycle', () => { + let testDir: string; + let projectPath: string; + const agentName = `E2eGwTgt${String(Date.now()).slice(-8)}`; + const gatewayName = 'e2e-target-gw'; + + beforeAll(async () => { + if (!canRun) return; + + testDir = join(tmpdir(), `agentcore-e2e-gw-targets-${randomUUID()}`); + await mkdir(testDir, { recursive: true }); + + const result = await runAgentCoreCLI( + [ + 'create', + '--name', + agentName, + '--language', + 'Python', + '--framework', + 'Strands', + '--model-provider', + 'Bedrock', + '--memory', + 'none', + '--json', + ], + testDir + ); + expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0); + projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath; + + await writeAwsTargets(projectPath); + installCdkTarball(projectPath); + }, 300000); + + afterAll(async () => { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); + }, 600000); + + const run = (args: string[]) => runAgentCoreCLI(args, projectPath); + + it.skipIf(!canRun)( + 'adds runtime endpoints (prod, staging)', + async () => { + let result = await run([ + 'add', + 'runtime-endpoint', + '--runtime', + agentName, + '--endpoint', + 'prod', + '--version', + '1', + '--json', + ]); + expect(result.exitCode, `Add prod endpoint failed: ${result.stdout}`).toBe(0); + + result = await run([ + 'add', + 'runtime-endpoint', + '--runtime', + agentName, + '--endpoint', + 'staging', + '--version', + '1', + '--json', + ]); + expect(result.exitCode, `Add staging endpoint failed: ${result.stdout}`).toBe(0); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'adds HTTP gateway with name', + async () => { + const result = await run(['add', 'gateway', '--name', gatewayName, '--json']); + expect(result.exitCode, `Add gateway failed: ${result.stdout}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'adds gateway targets for prod and staging endpoints', + async () => { + let result = await run([ + 'add', + 'gateway-target', + '--name', + `${agentName}-prod`, + '--type', + 'mcp-server', + '--endpoint', + 'https://placeholder-prod.example.com', + '--gateway', + gatewayName, + '--json', + ]); + expect(result.exitCode, `Add prod target failed: ${result.stdout}`).toBe(0); + + result = await run([ + 'add', + 'gateway-target', + '--name', + `${agentName}-staging`, + '--type', + 'mcp-server', + '--endpoint', + 'https://placeholder-staging.example.com', + '--gateway', + gatewayName, + '--json', + ]); + expect(result.exitCode, `Add staging target failed: ${result.stdout}`).toBe(0); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'deploys project with gateway and targets', + async () => { + await retry( + async () => { + const result = await run(['deploy', '--yes', '--json']); + if (result.exitCode !== 0) { + console.log('Deploy stdout:', result.stdout); + console.log('Deploy stderr:', result.stderr); + } + expect(result.exitCode, `Deploy failed (stderr: ${result.stderr})`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 2, + 30000 + ); + }, + 600000 + ); + + it.skipIf(!canRun)( + 'status shows gateway deployed', + async () => { + await retry( + async () => { + const result = await run(['status', '--json']); + expect(result.exitCode, `Status failed: ${result.stderr}`).toBe(0); + + const json = parseJsonOutput(result.stdout) as { + success: boolean; + resources: { resourceType: string; name: string; deploymentState: string; identifier?: string }[]; + }; + expect(json.success).toBe(true); + + // Agent should be deployed + const agent = json.resources.find(r => r.resourceType === 'agent' && r.name === agentName); + expect(agent, `Agent "${agentName}" should appear in status`).toBeDefined(); + expect(agent!.deploymentState).toBe('deployed'); + }, + 3, + 15000 + ); + }, + 120000 + ); + + it.skipIf(!canRun)( + 'invokes the deployed agent directly', + async () => { + await retry( + async () => { + const result = await run(['invoke', '--prompt', 'Say hello', '--runtime', agentName, '--json']); + expect(result.exitCode, `Invoke failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 3, + 15000 + ); + }, + 180000 + ); + + it.skipIf(!canRun)( + 'removes gateway targets', + async () => { + let result = await run(['remove', 'gateway-target', '--name', `${agentName}-prod`, '--json']); + expect(result.exitCode, `Remove prod target failed: ${result.stderr}`).toBe(0); + + result = await run(['remove', 'gateway-target', '--name', `${agentName}-staging`, '--json']); + expect(result.exitCode, `Remove staging target failed: ${result.stderr}`).toBe(0); + }, + 60000 + ); + + it.skipIf(!canRun)( + 'removes gateway', + async () => { + const result = await run(['remove', 'gateway', '--name', gatewayName, '--json']); + expect(result.exitCode, `Remove gateway failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success).toBe(true); + }, + 60000 + ); +}); diff --git a/e2e-tests/import-gateway.test.ts b/e2e-tests/import-gateway.test.ts index 2aea04f02..fd80ae967 100644 --- a/e2e-tests/import-gateway.test.ts +++ b/e2e-tests/import-gateway.test.ts @@ -43,6 +43,7 @@ describe.sequential('e2e: import gateway', () => { const result = await spawnAndCollect('uv', ['run', '--with', 'boto3', 'python3', 'setup_gateway.py'], fixtureDir, { AWS_REGION: region, + RESOURCE_SUFFIX: suffix, }); if (result.exitCode !== 0) { throw new Error( @@ -50,7 +51,7 @@ describe.sequential('e2e: import gateway', () => { ); } - const resourcesPath = join(fixtureDir, 'bugbash-resources.json'); + const resourcesPath = join(fixtureDir, `bugbash-resources-${suffix}.json`); const resources = JSON.parse(await readFile(resourcesPath, 'utf-8')) as Record; gatewayArn = resources.gateway!.arn; @@ -80,6 +81,7 @@ describe.sequential('e2e: import gateway', () => { try { await spawnAndCollect('uv', ['run', '--with', 'boto3', 'python3', 'cleanup_resources.py'], fixtureDir, { AWS_REGION: region, + RESOURCE_SUFFIX: suffix, }); } catch { /* ignore — resources may already be deleted by CFN teardown */ diff --git a/e2e-tests/import-resources.test.ts b/e2e-tests/import-resources.test.ts index d51cbffac..72d9c253a 100644 --- a/e2e-tests/import-resources.test.ts +++ b/e2e-tests/import-resources.test.ts @@ -54,6 +54,7 @@ describe.sequential('e2e: import runtime/memory/evaluator', () => { const result = await spawnAndCollect('uv', ['run', '--with', 'boto3', 'python3', script], fixtureDir, { AWS_REGION: region, DEFAULT_EVALUATOR_MODEL, + RESOURCE_SUFFIX: suffix, }); if (result.exitCode !== 0) { throw new Error( @@ -63,7 +64,7 @@ describe.sequential('e2e: import runtime/memory/evaluator', () => { } // 2. Read resource ARNs from bugbash-resources.json - const resourcesPath = join(fixtureDir, 'bugbash-resources.json'); + const resourcesPath = join(fixtureDir, `bugbash-resources-${suffix}.json`); const resources = JSON.parse(await readFile(resourcesPath, 'utf-8')) as Record; runtimeArn = resources['runtime-basic']!.arn; memoryArn = resources['memory-full']!.arn; @@ -102,6 +103,7 @@ describe.sequential('e2e: import runtime/memory/evaluator', () => { try { await spawnAndCollect('uv', ['run', '--with', 'boto3', 'python3', 'cleanup_resources.py'], fixtureDir, { AWS_REGION: region, + RESOURCE_SUFFIX: suffix, }); } catch { /* ignore — resources may already be deleted by CFN teardown */ diff --git a/esbuild.config.mjs b/esbuild.config.mjs index 91e557270..2cbd5b81f 100644 --- a/esbuild.config.mjs +++ b/esbuild.config.mjs @@ -51,7 +51,7 @@ await esbuild.build({ jsx: 'automatic', // Inject require shim for ESM compatibility with CommonJS dependencies banner: { - js: `import { createRequire } from 'module'; const require = createRequire(import.meta.url);`, + js: `import { createRequire } from 'module'; import { fileURLToPath as __ef } from 'url'; import { dirname as __ed } from 'path'; const require = createRequire(import.meta.url); const __filename = __ef(import.meta.url); const __dirname = __ed(__filename);`, }, external: ['fsevents', '@aws-cdk/toolkit-lib'], plugins: [optionalDepsPlugin, textLoaderPlugin], diff --git a/integ-tests/add-remove-ab-test-target-based.test.ts b/integ-tests/add-remove-ab-test-target-based.test.ts new file mode 100644 index 000000000..8a77b1f06 --- /dev/null +++ b/integ-tests/add-remove-ab-test-target-based.test.ts @@ -0,0 +1,461 @@ +import { + type TestProject, + createTestProject, + parseJsonOutput, + readProjectConfig, + runCLI, +} from '../src/test-utils/index.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +async function runSuccess(args: string[], cwd: string) { + const result = await runCLI(args, cwd); + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', true); + return json as Record; +} + +async function runFailure(args: string[], cwd: string) { + const result = await runCLI(args, cwd); + expect(result.exitCode).toBe(1); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', false); + expect(json).toHaveProperty('error'); + return json as Record; +} + +describe('integration: add and remove target-based ab-test', () => { + let project: TestProject; + const gatewayName = 'my-test-gw'; + + beforeAll(async () => { + project = await createTestProject({ + name: 'TargetABTest', + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + + // Add runtime endpoints (prod and staging) for the agent + await runSuccess( + ['add', 'runtime-endpoint', '--runtime', project.agentName, '--endpoint', 'prod', '--version', '1', '--json'], + project.projectPath + ); + await runSuccess( + ['add', 'runtime-endpoint', '--runtime', project.agentName, '--endpoint', 'staging', '--version', '1', '--json'], + project.projectPath + ); + + // Add an evaluator and two online eval configs (one per variant) + await runSuccess( + [ + 'add', + 'evaluator', + '--name', + 'TestEval', + '--level', + 'SESSION', + '--model', + 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', + '--instructions', + 'Evaluate quality. Context: {context}', + '--json', + ], + project.projectPath + ); + await runSuccess( + [ + 'add', + 'online-eval', + '--name', + 'ControlEval', + '--runtime', + project.agentName, + '--evaluator', + 'TestEval', + '--sampling-rate', + '100', + '--endpoint', + 'prod', + '--json', + ], + project.projectPath + ); + await runSuccess( + [ + 'add', + 'online-eval', + '--name', + 'TreatmentEval', + '--runtime', + project.agentName, + '--evaluator', + 'TestEval', + '--sampling-rate', + '100', + '--endpoint', + 'staging', + '--json', + ], + project.projectPath + ); + }, 120000); + + afterAll(async () => { + await project.cleanup(); + }); + + it('adds target-based AB test with --control-endpoint and --treatment-endpoint', async () => { + const json = await runSuccess( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'TargetTest1', + '--runtime', + project.agentName, + '--gateway', + gatewayName, + '--control-endpoint', + 'prod', + '--treatment-endpoint', + 'staging', + '--control-weight', + '90', + '--treatment-weight', + '10', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.abTestName).toBe('TargetTest1'); + + // Verify agentcore.json has correct mode, targets, gateway auto-created + const spec = await readProjectConfig(project.projectPath); + const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'TargetTest1'); + expect(abTest).toBeDefined(); + expect(abTest!.mode).toBe('target-based'); + expect(abTest!.variants).toHaveLength(2); + expect(abTest!.variants[0]!.name).toBe('C'); + expect(abTest!.variants[0]!.weight).toBe(90); + expect(abTest!.variants[0]!.variantConfiguration.target).toBeDefined(); + expect(abTest!.variants[0]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-prod`); + expect(abTest!.variants[1]!.name).toBe('T1'); + expect(abTest!.variants[1]!.weight).toBe(10); + expect(abTest!.variants[1]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-staging`); + expect(abTest!.gatewayRef).toBe(`{{gateway:${gatewayName}}}`); + + // Verify gateway was auto-created with targets + const gw = spec.httpGateways?.find((g: { name: string }) => g.name === gatewayName); + expect(gw, 'HTTP gateway should have been auto-created').toBeDefined(); + expect(gw!.targets).toBeDefined(); + expect(gw!.targets!.length).toBeGreaterThanOrEqual(2); + + const controlTarget = gw!.targets!.find((t: { name: string }) => t.name === `${project.agentName}-prod`); + expect(controlTarget).toBeDefined(); + expect(controlTarget!.qualifier).toBe('prod'); + + const treatmentTarget = gw!.targets!.find((t: { name: string }) => t.name === `${project.agentName}-staging`); + expect(treatmentTarget).toBeDefined(); + expect(treatmentTarget!.qualifier).toBe('staging'); + + // Verify per-variant evaluation config + const evalConfig = abTest!.evaluationConfig; + expect('perVariantOnlineEvaluationConfig' in evalConfig).toBe(true); + if ('perVariantOnlineEvaluationConfig' in evalConfig) { + expect(evalConfig.perVariantOnlineEvaluationConfig).toHaveLength(2); + const controlEval = evalConfig.perVariantOnlineEvaluationConfig.find( + (p: { treatmentName: string }) => p.treatmentName === 'C' + ); + expect(controlEval?.onlineEvaluationConfigArn).toBe('ControlEval'); + const treatmentEval = evalConfig.perVariantOnlineEvaluationConfig.find( + (p: { treatmentName: string }) => p.treatmentName === 'T1' + ); + expect(treatmentEval?.onlineEvaluationConfigArn).toBe('TreatmentEval'); + } + }); + + it('adds target-based AB test with existing gateway', async () => { + // TargetTest1 already created the gateway — reuse it + const json = await runSuccess( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'TargetTest2', + '--runtime', + project.agentName, + '--gateway', + gatewayName, + '--control-endpoint', + 'prod', + '--treatment-endpoint', + 'staging', + '--control-weight', + '50', + '--treatment-weight', + '50', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.abTestName).toBe('TargetTest2'); + + const spec = await readProjectConfig(project.projectPath); + // Gateway should still exist (reused, not duplicated) + const gateways = spec.httpGateways?.filter((g: { name: string }) => g.name === gatewayName); + expect(gateways).toHaveLength(1); + }); + + it('rejects duplicate AB test name', async () => { + const json = await runFailure( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'TargetTest1', + '--runtime', + project.agentName, + '--gateway', + gatewayName, + '--control-endpoint', + 'prod', + '--treatment-endpoint', + 'staging', + '--control-weight', + '50', + '--treatment-weight', + '50', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.error).toContain('already exists'); + }); + + it('rejects weights that do not sum to 100', async () => { + const json = await runFailure( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'BadWeights', + '--runtime', + project.agentName, + '--gateway', + gatewayName, + '--control-endpoint', + 'prod', + '--treatment-endpoint', + 'staging', + '--control-weight', + '80', + '--treatment-weight', + '80', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.error).toBeDefined(); + }); + + it('errors when --control-endpoint is missing in target-based mode', async () => { + const json = await runFailure( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'MissingControl', + '--runtime', + project.agentName, + '--gateway', + gatewayName, + '--treatment-endpoint', + 'staging', + '--control-weight', + '50', + '--treatment-weight', + '50', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.error).toContain('--control-endpoint'); + }); + + it('errors when --runtime is missing in target-based mode', async () => { + const json = await runFailure( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'MissingRuntime', + '--gateway', + gatewayName, + '--control-endpoint', + 'prod', + '--treatment-endpoint', + 'staging', + '--control-weight', + '50', + '--treatment-weight', + '50', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.error).toContain('--runtime'); + }); + + it('errors when endpoint does not exist on runtime', async () => { + const json = await runFailure( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'BadEndpoint', + '--runtime', + project.agentName, + '--gateway', + gatewayName, + '--control-endpoint', + 'nonexistent', + '--treatment-endpoint', + 'staging', + '--control-weight', + '50', + '--treatment-weight', + '50', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.error).toContain('nonexistent'); + }); + + it('deprecated --control-qualifier still works as alias for --control-endpoint', async () => { + const json = await runSuccess( + [ + 'add', + 'ab-test', + '--mode', + 'target-based', + '--name', + 'QualifierAlias', + '--runtime', + project.agentName, + '--gateway', + gatewayName, + '--control-qualifier', + 'prod', + '--treatment-qualifier', + 'staging', + '--control-weight', + '60', + '--treatment-weight', + '40', + '--control-online-eval', + 'ControlEval', + '--treatment-online-eval', + 'TreatmentEval', + '--json', + ], + project.projectPath + ); + + expect(json.abTestName).toBe('QualifierAlias'); + + const spec = await readProjectConfig(project.projectPath); + const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'QualifierAlias'); + expect(abTest).toBeDefined(); + expect(abTest!.mode).toBe('target-based'); + expect(abTest!.variants[0]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-prod`); + expect(abTest!.variants[1]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-staging`); + }); + + it('removes target-based AB test without --delete-gateway', async () => { + const json = await runSuccess(['remove', 'ab-test', '--name', 'TargetTest2', '--json'], project.projectPath); + expect(json.success).toBe(true); + + // Verify removal from agentcore.json + const spec = await readProjectConfig(project.projectPath); + const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'TargetTest2'); + expect(abTest).toBeUndefined(); + + // Gateway should still exist (other AB tests reference it) + const gw = spec.httpGateways?.find((g: { name: string }) => g.name === gatewayName); + expect(gw, 'Gateway should still exist when other AB tests reference it').toBeDefined(); + }); + + it('removes target-based AB test with --delete-gateway flag', async () => { + // First remove QualifierAlias so only TargetTest1 is left referencing the gateway + await runSuccess(['remove', 'ab-test', '--name', 'QualifierAlias', '--json'], project.projectPath); + + // Now remove TargetTest1 with --delete-gateway + const json = await runSuccess( + ['remove', 'ab-test', '--name', 'TargetTest1', '--delete-gateway', '--json'], + project.projectPath + ); + expect(json.success).toBe(true); + + // Verify gateway was also removed (no other AB tests reference it) + const spec = await readProjectConfig(project.projectPath); + const gw = spec.httpGateways?.find((g: { name: string }) => g.name === gatewayName); + expect(gw, 'Gateway should be removed with --delete-gateway when no other AB tests reference it').toBeUndefined(); + }); + + it('remove returns error for non-existent test', async () => { + const json = await runFailure(['remove', 'ab-test', '--name', 'DoesNotExist', '--json'], project.projectPath); + expect(json.error).toContain('not found'); + }); +}); diff --git a/integ-tests/add-remove-ab-test.test.ts b/integ-tests/add-remove-ab-test.test.ts new file mode 100644 index 000000000..551c86010 --- /dev/null +++ b/integ-tests/add-remove-ab-test.test.ts @@ -0,0 +1,170 @@ +import { + type TestProject, + createTestProject, + parseJsonOutput, + readProjectConfig, + runCLI, +} from '../src/test-utils/index.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +async function runSuccess(args: string[], cwd: string) { + const result = await runCLI(args, cwd); + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', true); + return json as Record; +} + +async function runFailure(args: string[], cwd: string) { + const result = await runCLI(args, cwd); + expect(result.exitCode).toBe(1); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', false); + expect(json).toHaveProperty('error'); + return json as Record; +} + +describe('integration: add and remove ab-test', () => { + let project: TestProject; + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + }); + + it('requires --name for JSON mode', async () => { + const json = await runFailure(['add', 'ab-test', '--json'], project.projectPath); + expect(json.error).toContain('--name'); + }); + + it('requires --runtime when --name is provided', async () => { + const json = await runFailure(['add', 'ab-test', '--name', 'Test1', '--json'], project.projectPath); + expect(json.error).toContain('--runtime'); + }); + + it('adds ab-test with all required flags', async () => { + const json = await runSuccess( + [ + 'add', + 'ab-test', + '--name', + 'MyIntegTest', + '--runtime', + project.agentName, + '--control-bundle', + 'arn:bundle:control', + '--control-version', + 'v1', + '--treatment-bundle', + 'arn:bundle:treatment', + '--treatment-version', + 'v1', + '--control-weight', + '80', + '--treatment-weight', + '20', + '--online-eval', + 'arn:eval:config', + '--json', + ], + project.projectPath + ); + + expect(json.abTestName).toBe('MyIntegTest'); + + // Verify it's in agentcore.json with correct structure + const spec = await readProjectConfig(project.projectPath); + const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'MyIntegTest'); + expect(abTest).toBeDefined(); + expect(abTest!.variants).toHaveLength(2); + expect(abTest!.variants[0]!.name).toBe('C'); + expect(abTest!.variants[0]!.weight).toBe(80); + expect(abTest!.variants[1]!.name).toBe('T1'); + expect(abTest!.variants[1]!.weight).toBe(20); + }); + + it('rejects duplicate AB test name', async () => { + const json = await runFailure( + [ + 'add', + 'ab-test', + '--name', + 'MyIntegTest', + '--runtime', + project.agentName, + '--control-bundle', + 'arn:cb', + '--control-version', + 'v1', + '--treatment-bundle', + 'arn:tb', + '--treatment-version', + 'v1', + '--control-weight', + '50', + '--treatment-weight', + '50', + '--online-eval', + 'arn:eval', + '--json', + ], + project.projectPath + ); + + expect(json.error).toContain('already exists'); + }); + + it('rejects weights that do not sum to 100', async () => { + const json = await runFailure( + [ + 'add', + 'ab-test', + '--name', + 'BadWeights', + '--runtime', + project.agentName, + '--control-bundle', + 'arn:cb', + '--control-version', + 'v1', + '--treatment-bundle', + 'arn:tb', + '--treatment-version', + 'v1', + '--control-weight', + '80', + '--treatment-weight', + '80', + '--online-eval', + 'arn:eval', + '--json', + ], + project.projectPath + ); + + expect(json.error).toBeDefined(); + }); + + it('removes ab-test', async () => { + const json = await runSuccess(['remove', 'ab-test', '--name', 'MyIntegTest', '--json'], project.projectPath); + expect(json.success).toBe(true); + + // Verify removal from agentcore.json + const spec = await readProjectConfig(project.projectPath); + const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'MyIntegTest'); + expect(abTest).toBeUndefined(); + }); + + it('remove returns error for non-existent test', async () => { + const json = await runFailure(['remove', 'ab-test', '--name', 'DoesNotExist', '--json'], project.projectPath); + expect(json.error).toContain('not found'); + }); +}); diff --git a/integ-tests/add-remove-config-bundle.test.ts b/integ-tests/add-remove-config-bundle.test.ts new file mode 100644 index 000000000..bd53e7f31 --- /dev/null +++ b/integ-tests/add-remove-config-bundle.test.ts @@ -0,0 +1,312 @@ +import { + type TestProject, + createTestProject, + parseJsonOutput, + readProjectConfig, + runCLI, + runFailure, + runSuccess, +} from '../src/test-utils/index.js'; +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +describe('integration: add and remove config-bundle', () => { + let project: TestProject; + + beforeAll(async () => { + project = await createTestProject({ noAgent: true }); + }); + + afterAll(async () => { + await project.cleanup(); + }); + + // ── Add lifecycle ───────────────────────────────────────────────────── + + describe('add config-bundle', () => { + it('adds a config bundle with inline --components', async () => { + const components = JSON.stringify({ + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-abc': { + configuration: { systemPrompt: 'You are a helpful assistant.' }, + }, + }); + + const json = await runSuccess( + ['add', 'config-bundle', '--name', 'InlineBundle', '--components', components, '--json'], + project.projectPath + ); + + expect(json.bundleName).toBe('InlineBundle'); + + const config = await readProjectConfig(project.projectPath); + const bundle = config.configBundles!.find(b => b.name === 'InlineBundle'); + expect(bundle).toBeDefined(); + expect(bundle!.type).toBe('ConfigurationBundle'); + expect(bundle!.branchName).toBe('mainline'); + expect(Object.keys(bundle!.components)).toHaveLength(1); + }); + + it('adds a config bundle with --components-file', async () => { + const componentsData = { + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-def': { + configuration: { temperature: 0.7, maxTokens: 1024 }, + }, + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:gateway/gw-xyz': { + configuration: { rateLimit: 100 }, + }, + }; + + const filePath = join(project.projectPath, 'test-components.json'); + await writeFile(filePath, JSON.stringify(componentsData)); + + const json = await runSuccess( + ['add', 'config-bundle', '--name', 'FileBundle', '--components-file', filePath, '--json'], + project.projectPath + ); + + expect(json.bundleName).toBe('FileBundle'); + + const config = await readProjectConfig(project.projectPath); + const bundle = config.configBundles!.find(b => b.name === 'FileBundle'); + expect(bundle).toBeDefined(); + expect(Object.keys(bundle!.components)).toHaveLength(2); + }); + + it('adds a config bundle with optional description, branch, and commit message', async () => { + const components = JSON.stringify({ + '{{runtime:MyAgent}}': { + configuration: { systemPrompt: 'Placeholder-based bundle' }, + }, + }); + + const json = await runSuccess( + [ + 'add', + 'config-bundle', + '--name', + 'FullOptsBundle', + '--description', + 'A bundle with all optional fields', + '--components', + components, + '--branch', + 'feature-branch', + '--commit-message', + 'initial config', + '--json', + ], + project.projectPath + ); + + expect(json.bundleName).toBe('FullOptsBundle'); + + const config = await readProjectConfig(project.projectPath); + const bundle = config.configBundles!.find(b => b.name === 'FullOptsBundle'); + expect(bundle).toBeDefined(); + expect(bundle!.description).toBe('A bundle with all optional fields'); + expect(bundle!.branchName).toBe('feature-branch'); + expect(bundle!.commitMessage).toBe('initial config'); + }); + + it('adds a config bundle with placeholder component keys', async () => { + const components = JSON.stringify({ + '{{runtime:AgentA}}': { + configuration: { systemPrompt: 'Runtime placeholder' }, + }, + '{{gateway:GatewayB}}': { + configuration: { rateLimitPerSecond: 50 }, + }, + }); + + const json = await runSuccess( + ['add', 'config-bundle', '--name', 'PlaceholderBundle', '--components', components, '--json'], + project.projectPath + ); + + expect(json.bundleName).toBe('PlaceholderBundle'); + + const config = await readProjectConfig(project.projectPath); + const bundle = config.configBundles!.find(b => b.name === 'PlaceholderBundle'); + expect(bundle).toBeDefined(); + const keys = Object.keys(bundle!.components); + expect(keys).toContain('{{runtime:AgentA}}'); + expect(keys).toContain('{{gateway:GatewayB}}'); + }); + }); + + // ── Validation / error cases ────────────────────────────────────────── + + describe('validation errors', () => { + it('rejects duplicate config bundle name', async () => { + const components = JSON.stringify({ + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-dup': { + configuration: { foo: 'bar' }, + }, + }); + + const json = await runFailure( + ['add', 'config-bundle', '--name', 'InlineBundle', '--components', components, '--json'], + project.projectPath + ); + + expect(json.error).toContain('already exists'); + }); + + it('requires --name in non-interactive (JSON) mode', async () => { + const result = await runCLI( + ['add', 'config-bundle', '--components', '{"arn:test": {"configuration": {}}}', '--json'], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('--name'); + }); + + it('requires --components or --components-file when --name is provided', async () => { + const json = await runFailure(['add', 'config-bundle', '--name', 'NoComponents', '--json'], project.projectPath); + + expect(json.error).toContain('--components'); + }); + + it('rejects invalid JSON in --components', async () => { + const result = await runCLI( + ['add', 'config-bundle', '--name', 'BadJson', '--components', '{not valid json}', '--json'], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + }); + + it('rejects --components-file with non-existent file', async () => { + const result = await runCLI( + [ + 'add', + 'config-bundle', + '--name', + 'MissingFile', + '--components-file', + '/tmp/does-not-exist-xyz.json', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + }); + + it('rejects bundle name with invalid characters', async () => { + const components = JSON.stringify({ + 'arn:test': { configuration: {} }, + }); + + const json = await runFailure( + ['add', 'config-bundle', '--name', 'invalid-name!', '--components', components, '--json'], + project.projectPath + ); + + expect(json.error).toBeDefined(); + }); + + it('rejects bundle name starting with a number', async () => { + const components = JSON.stringify({ + 'arn:test': { configuration: {} }, + }); + + const json = await runFailure( + ['add', 'config-bundle', '--name', '1BadName', '--components', components, '--json'], + project.projectPath + ); + + expect(json.error).toBeDefined(); + }); + }); + + // ── Remove lifecycle ────────────────────────────────────────────────── + + describe('remove config-bundle', () => { + it('removes an existing config bundle', async () => { + const json = await runSuccess( + ['remove', 'config-bundle', '--name', 'InlineBundle', '--json'], + project.projectPath + ); + + expect(json.success).toBe(true); + + const config = await readProjectConfig(project.projectPath); + const bundle = config.configBundles!.find(b => b.name === 'InlineBundle'); + expect(bundle).toBeUndefined(); + }); + + it('returns error for non-existent bundle', async () => { + const json = await runFailure( + ['remove', 'config-bundle', '--name', 'DoesNotExist', '--json'], + project.projectPath + ); + + expect(json.error).toContain('not found'); + }); + + it('removes all remaining config bundles one by one', async () => { + const configBefore = await readProjectConfig(project.projectPath); + const remaining = configBefore.configBundles!.map(b => b.name); + + for (const name of remaining) { + await runSuccess(['remove', 'config-bundle', '--name', name, '--json'], project.projectPath); + } + + const configAfter = await readProjectConfig(project.projectPath); + expect(configAfter.configBundles!).toHaveLength(0); + }); + }); + + // ── Multiple bundles coexistence ────────────────────────────────────── + + describe('multiple bundles coexistence', () => { + const bundleNames = ['BundleAlpha', 'BundleBeta', 'BundleGamma']; + + it('can add multiple config bundles to the same project', async () => { + for (const name of bundleNames) { + const components = JSON.stringify({ + [`arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/${name}`]: { + configuration: { bundleId: name }, + }, + }); + + await runSuccess( + ['add', 'config-bundle', '--name', name, '--components', components, '--json'], + project.projectPath + ); + } + + const config = await readProjectConfig(project.projectPath); + expect(config.configBundles!).toHaveLength(bundleNames.length); + + for (const name of bundleNames) { + expect(config.configBundles!.find(b => b.name === name)).toBeDefined(); + } + }); + + it('removing one bundle does not affect others', async () => { + await runSuccess(['remove', 'config-bundle', '--name', 'BundleBeta', '--json'], project.projectPath); + + const config = await readProjectConfig(project.projectPath); + expect(config.configBundles!).toHaveLength(2); + expect(config.configBundles!.find(b => b.name === 'BundleAlpha')).toBeDefined(); + expect(config.configBundles!.find(b => b.name === 'BundleGamma')).toBeDefined(); + expect(config.configBundles!.find(b => b.name === 'BundleBeta')).toBeUndefined(); + }); + + afterAll(async () => { + for (const name of bundleNames) { + try { + await runCLI(['remove', 'config-bundle', '--name', name, '--json'], project.projectPath); + } catch { + // already removed + } + } + }); + }); +}); diff --git a/integ-tests/add-remove-online-eval-endpoint.test.ts b/integ-tests/add-remove-online-eval-endpoint.test.ts new file mode 100644 index 000000000..cb2a614c8 --- /dev/null +++ b/integ-tests/add-remove-online-eval-endpoint.test.ts @@ -0,0 +1,199 @@ +import { + type TestProject, + createTestProject, + parseJsonOutput, + readProjectConfig, + runCLI, +} from '../src/test-utils/index.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +async function runSuccess(args: string[], cwd: string) { + const result = await runCLI(args, cwd); + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', true); + return json as Record; +} + +async function runFailure(args: string[], cwd: string) { + const result = await runCLI(args, cwd); + expect(result.exitCode).toBe(1); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', false); + expect(json).toHaveProperty('error'); + return json as Record; +} + +describe('integration: add and remove online-eval with endpoint', () => { + let project: TestProject; + + beforeAll(async () => { + project = await createTestProject({ + name: 'OnlineEvalEP', + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + + // Add runtime endpoints (prod and staging) for the agent + await runSuccess( + ['add', 'runtime-endpoint', '--runtime', project.agentName, '--endpoint', 'prod', '--version', '1', '--json'], + project.projectPath + ); + await runSuccess( + ['add', 'runtime-endpoint', '--runtime', project.agentName, '--endpoint', 'staging', '--version', '1', '--json'], + project.projectPath + ); + + // Add an evaluator to reference in online eval configs + await runSuccess( + [ + 'add', + 'evaluator', + '--name', + 'QualityEval', + '--level', + 'SESSION', + '--model', + 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', + '--instructions', + 'Evaluate quality. Context: {context}', + '--json', + ], + project.projectPath + ); + }, 120000); + + afterAll(async () => { + await project.cleanup(); + }); + + it('adds online eval with --endpoint prod', async () => { + const json = await runSuccess( + [ + 'add', + 'online-eval', + '--name', + 'ProdEval', + '--runtime', + project.agentName, + '--evaluator', + 'QualityEval', + '--sampling-rate', + '100', + '--endpoint', + 'prod', + '--json', + ], + project.projectPath + ); + + expect(json.configName).toBe('ProdEval'); + + // Verify agentcore.json has endpoint field + const spec = await readProjectConfig(project.projectPath); + const evalConfig = spec.onlineEvalConfigs?.find((c: { name: string }) => c.name === 'ProdEval'); + expect(evalConfig).toBeDefined(); + expect(evalConfig!.endpoint).toBe('prod'); + expect(evalConfig!.agent).toBe(project.agentName); + expect(evalConfig!.evaluators).toContain('QualityEval'); + expect(evalConfig!.samplingRate).toBe(100); + }); + + it('adds online eval with --endpoint staging', async () => { + const json = await runSuccess( + [ + 'add', + 'online-eval', + '--name', + 'StagingEval', + '--runtime', + project.agentName, + '--evaluator', + 'QualityEval', + '--sampling-rate', + '50', + '--endpoint', + 'staging', + '--json', + ], + project.projectPath + ); + + expect(json.configName).toBe('StagingEval'); + + const spec = await readProjectConfig(project.projectPath); + const evalConfig = spec.onlineEvalConfigs?.find((c: { name: string }) => c.name === 'StagingEval'); + expect(evalConfig).toBeDefined(); + expect(evalConfig!.endpoint).toBe('staging'); + }); + + it('adds online eval without --endpoint (no endpoint field in config)', async () => { + const json = await runSuccess( + [ + 'add', + 'online-eval', + '--name', + 'NoEndpointEval', + '--runtime', + project.agentName, + '--evaluator', + 'QualityEval', + '--sampling-rate', + '100', + '--json', + ], + project.projectPath + ); + + expect(json.configName).toBe('NoEndpointEval'); + + const spec = await readProjectConfig(project.projectPath); + const evalConfig = spec.onlineEvalConfigs?.find((c: { name: string }) => c.name === 'NoEndpointEval'); + expect(evalConfig).toBeDefined(); + expect(evalConfig!.endpoint).toBeUndefined(); + }); + + it('errors when endpoint does not exist on runtime', async () => { + const json = await runFailure( + [ + 'add', + 'online-eval', + '--name', + 'BadEndpointEval', + '--runtime', + project.agentName, + '--evaluator', + 'QualityEval', + '--sampling-rate', + '100', + '--endpoint', + 'nonexistent', + '--json', + ], + project.projectPath + ); + + expect(json.error).toContain('nonexistent'); + }); + + it('removes online eval config', async () => { + const json = await runSuccess(['remove', 'online-eval', '--name', 'ProdEval', '--json'], project.projectPath); + expect(json.success).toBe(true); + + // Verify removal from agentcore.json + const spec = await readProjectConfig(project.projectPath); + const evalConfig = spec.onlineEvalConfigs?.find((c: { name: string }) => c.name === 'ProdEval'); + expect(evalConfig).toBeUndefined(); + + // Other eval configs should remain + const stagingEval = spec.onlineEvalConfigs?.find((c: { name: string }) => c.name === 'StagingEval'); + expect(stagingEval).toBeDefined(); + }); + + it('remove returns error for non-existent online eval', async () => { + const json = await runFailure(['remove', 'online-eval', '--name', 'DoesNotExist', '--json'], project.projectPath); + expect(json.error).toContain('not found'); + }); +}); diff --git a/integ-tests/add-remove-resources.test.ts b/integ-tests/add-remove-resources.test.ts index 57dd48483..a89c761dd 100644 --- a/integ-tests/add-remove-resources.test.ts +++ b/integ-tests/add-remove-resources.test.ts @@ -1,7 +1,10 @@ import { createTestProject, readProjectConfig, runCLI } from '../src/test-utils/index.js'; import type { TestProject } from '../src/test-utils/index.js'; +import { createTelemetryHelper } from '../src/test-utils/telemetry-helper.js'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +const telemetry = createTelemetryHelper(); + describe('integration: add and remove resources', () => { let project: TestProject; @@ -16,13 +19,16 @@ describe('integration: add and remove resources', () => { afterAll(async () => { await project.cleanup(); + telemetry.destroy(); }); describe('memory lifecycle', () => { const memoryName = `IntegMem${Date.now().toString().slice(-6)}`; it('adds a memory resource', async () => { - const result = await runCLI(['add', 'memory', '--name', memoryName, '--json'], project.projectPath); + const result = await runCLI(['add', 'memory', '--name', memoryName, '--json'], project.projectPath, { + env: telemetry.env, + }); expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); const json = JSON.parse(result.stdout); @@ -34,13 +40,17 @@ describe('integration: add and remove resources', () => { expect(memories, 'memories should exist').toBeDefined(); const found = memories!.some((m: Record) => m.name === memoryName); expect(found, `Memory "${memoryName}" should be in config`).toBe(true); + + // Verify telemetry + telemetry.assertMetricEmitted({ command: 'add.memory', exit_reason: 'success' }); }); it('adds a memory with EPISODIC strategy and verifies reflectionNamespaces', async () => { const episodicMemName = `EpiMem${Date.now().toString().slice(-6)}`; const result = await runCLI( ['add', 'memory', '--name', episodicMemName, '--strategies', 'EPISODIC', '--json'], - project.projectPath + project.projectPath, + { env: telemetry.env } ); expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); @@ -61,6 +71,14 @@ describe('integration: add and remove resources', () => { expect(episodic!.reflectionNamespaces, 'Should have reflectionNamespaces').toBeDefined(); expect(episodic!.reflectionNamespaces!.length).toBeGreaterThan(0); + // Verify telemetry + telemetry.assertMetricEmitted({ + command: 'add.memory', + exit_reason: 'success', + strategy_count: '1', + strategy_episodic: 'true', + }); + // Clean up await runCLI(['remove', 'memory', '--name', episodicMemName, '--json'], project.projectPath); }); @@ -86,7 +104,8 @@ describe('integration: add and remove resources', () => { it('adds a credential resource', async () => { const result = await runCLI( ['add', 'credential', '--name', credentialName, '--api-key', 'test-key-integ-123', '--json'], - project.projectPath + project.projectPath, + { env: telemetry.env } ); expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); @@ -99,6 +118,13 @@ describe('integration: add and remove resources', () => { expect(credentials, 'credentials should exist').toBeDefined(); const found = credentials!.some((c: Record) => c.name === credentialName); expect(found, `Credential "${credentialName}" should be in config`).toBe(true); + + // Verify telemetry + telemetry.assertMetricEmitted({ + command: 'add.credential', + exit_reason: 'success', + credential_type: 'api-key', + }); }); it('removes the credential resource', async () => { @@ -115,4 +141,30 @@ describe('integration: add and remove resources', () => { expect(found, `Credential "${credentialName}" should be removed from config`).toBe(false); }); }); + + describe('policy-engine', () => { + const engineName = `TestEngine${Date.now().toString().slice(-6)}`; + + it('adds a policy engine resource', async () => { + const result = await runCLI(['add', 'policy-engine', '--name', engineName, '--json'], project.projectPath, { + env: telemetry.env, + }); + + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json = JSON.parse(result.stdout); + expect(json.success).toBe(true); + + telemetry.assertMetricEmitted({ + command: 'add.policy-engine', + exit_reason: 'success', + attach_gateway_count: '0', + }); + }); + + it('removes the policy engine resource', async () => { + const result = await runCLI(['remove', 'policy-engine', '--name', engineName, '--json'], project.projectPath); + + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + }); + }); }); diff --git a/integ-tests/create-frameworks.test.ts b/integ-tests/create-frameworks.test.ts index dee93cc1e..82bbc0871 100644 --- a/integ-tests/create-frameworks.test.ts +++ b/integ-tests/create-frameworks.test.ts @@ -1,4 +1,3 @@ -/* eslint-disable security/detect-non-literal-fs-filename */ import { exists, prereqs, readProjectConfig, runCLI } from '../src/test-utils/index.js'; import { randomUUID } from 'node:crypto'; import { mkdir, readFile, rm } from 'node:fs/promises'; diff --git a/integ-tests/create-memory.test.ts b/integ-tests/create-memory.test.ts index 35cd4436d..ac80f1ba4 100644 --- a/integ-tests/create-memory.test.ts +++ b/integ-tests/create-memory.test.ts @@ -1,4 +1,3 @@ -/* eslint-disable security/detect-non-literal-fs-filename */ import { prereqs, readProjectConfig, runCLI } from '../src/test-utils/index.js'; import { randomUUID } from 'node:crypto'; import { mkdir, rm } from 'node:fs/promises'; diff --git a/integ-tests/create-no-agent.test.ts b/integ-tests/create-no-agent.test.ts index 4bcca2690..bcdf80eaa 100644 --- a/integ-tests/create-no-agent.test.ts +++ b/integ-tests/create-no-agent.test.ts @@ -32,7 +32,7 @@ describe('integration: create without agent', () => { it.skipIf(!hasNpm || !hasGit)('creates project with real npm install and git init', async () => { const name = `NoAgent${Date.now().toString().slice(-6)}`; - const result = await runCLI(['create', '--name', name, '--no-agent', '--json'], testDir, false); + const result = await runCLI(['create', '--name', name, '--no-agent', '--json'], testDir, { skipInstall: false }); expect(result.exitCode, `stderr: ${result.stderr}`).toBe(0); diff --git a/integ-tests/create-protocols.test.ts b/integ-tests/create-protocols.test.ts index 30b707f8c..440050fdb 100644 --- a/integ-tests/create-protocols.test.ts +++ b/integ-tests/create-protocols.test.ts @@ -1,4 +1,3 @@ -/* eslint-disable security/detect-non-literal-fs-filename */ import { exists, prereqs, readProjectConfig, runCLI } from '../src/test-utils/index.js'; import { randomUUID } from 'node:crypto'; import { mkdir, readFile, rm } from 'node:fs/promises'; diff --git a/integ-tests/create-with-agent.test.ts b/integ-tests/create-with-agent.test.ts index 7fb20bdbf..69f0594b8 100644 --- a/integ-tests/create-with-agent.test.ts +++ b/integ-tests/create-with-agent.test.ts @@ -49,7 +49,7 @@ describe('integration: create with Python agent', () => { '--json', ], testDir, - false + { skipInstall: false } ); expect(result.exitCode, `stderr: ${result.stderr}`).toBe(0); diff --git a/integ-tests/dev-server.test.ts b/integ-tests/dev-server.test.ts index 5f60976e7..4b07b7284 100644 --- a/integ-tests/dev-server.test.ts +++ b/integ-tests/dev-server.test.ts @@ -60,7 +60,7 @@ describe('integration: dev server', () => { '--json', ], testDir, - false + { skipInstall: false } ); if (result.exitCode === 0) { diff --git a/integ-tests/help.test.ts b/integ-tests/help.test.ts index 052605c7a..7e2176e2f 100644 --- a/integ-tests/help.test.ts +++ b/integ-tests/help.test.ts @@ -1,10 +1,9 @@ import { spawnAndCollect } from '../src/test-utils/cli-runner.js'; import { runCLI } from '../src/test-utils/index.js'; +import { createTelemetryHelper } from '../src/test-utils/telemetry-helper.js'; import { readdirSync } from 'node:fs'; -import { mkdir, readFile, rm } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { afterAll, describe, expect, it } from 'vitest'; const COMMANDS = [ 'create', @@ -45,52 +44,46 @@ describe('CLI help', () => { }); describe('help modes telemetry', () => { - let testConfigDir: string; + const telemetry = createTelemetryHelper(); const cliPath = join(__dirname, '..', 'dist', 'cli', 'index.mjs'); - beforeAll(async () => { - testConfigDir = join(tmpdir(), `agentcore-help-telemetry-${Date.now()}`); - await mkdir(testConfigDir, { recursive: true }); - }); - afterAll(() => rm(testConfigDir, { recursive: true, force: true })); + afterAll(() => telemetry.destroy()); function run(args: string[], extraEnv: Record = {}) { - return spawnAndCollect('node', [cliPath, ...args], tmpdir(), { + return spawnAndCollect('node', [cliPath, ...args], process.cwd(), { AGENTCORE_SKIP_INSTALL: '1', - AGENTCORE_CONFIG_DIR: testConfigDir, + ...telemetry.env, ...extraEnv, }); } it('writes JSONL audit file when audit is enabled via env var', async () => { - const result = await run(['help', 'modes'], { AGENTCORE_TELEMETRY_AUDIT: '1' }); + const result = await run(['help', 'modes']); expect(result.exitCode).toBe(0); - const telemetryDir = join(testConfigDir, 'telemetry'); - const files = readdirSync(telemetryDir).filter(f => f.startsWith('help-')); - expect(files).toHaveLength(1); - - const content = await readFile(join(telemetryDir, files[0]!), 'utf-8'); - const entry = JSON.parse(content.trim()); - expect(entry.attrs).toMatchObject({ - 'service.name': 'agentcore-cli', - 'agentcore-cli.mode': 'cli', + const entries = telemetry.readEntries(); + expect(entries).toHaveLength(1); + telemetry.assertMetricEmitted({ command_group: 'help', command: 'help.modes', exit_reason: 'success', }); - expect(entry.attrs['agentcore-cli.session_id']).toBeDefined(); - expect(entry.attrs['os.type']).toBeDefined(); - expect(entry.value).toBeGreaterThanOrEqual(0); + expect(entries[0]!.attrs['agentcore-cli.session_id']).toBeDefined(); + expect(entries[0]!.attrs['os.type']).toBeDefined(); + expect(entries[0]!.value).toBeGreaterThanOrEqual(0); }); it('does not write audit file when audit is not enabled', async () => { - const telemetryDir = join(testConfigDir, 'telemetry'); - await rm(telemetryDir, { recursive: true, force: true }); + telemetry.clearEntries(); - const result = await run(['help', 'modes']); + const noAuditCliPath = join(__dirname, '..', 'dist', 'cli', 'index.mjs'); + const result = await spawnAndCollect('node', [noAuditCliPath, 'help', 'modes'], process.cwd(), { + AGENTCORE_SKIP_INSTALL: '1', + AGENTCORE_CONFIG_DIR: telemetry.dir, + }); expect(result.exitCode).toBe(0); + const telemetryDir = join(telemetry.dir, 'telemetry'); try { const files = readdirSync(telemetryDir); expect(files).toHaveLength(0); diff --git a/integ-tests/recommendation.test.ts b/integ-tests/recommendation.test.ts new file mode 100644 index 000000000..dc3037a3e --- /dev/null +++ b/integ-tests/recommendation.test.ts @@ -0,0 +1,290 @@ +import { type TestProject, createTestProject, parseJsonOutput, runCLI } from '../src/test-utils/index.js'; +import { writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +describe('integration: run recommendation CLI validation', () => { + let project: TestProject; + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + }); + + describe('required flags', () => { + it('requires --runtime', async () => { + const result = await runCLI( + ['run', 'recommendation', '--evaluator', 'Builtin.Faithfulness', '--inline', 'test prompt', '--json'], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('--runtime'); + }); + + it('requires --evaluator for system-prompt type', async () => { + const result = await runCLI( + ['run', 'recommendation', '--runtime', project.agentName, '--inline', 'test prompt', '--json'], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('--evaluator'); + }); + + it('rejects invalid --type', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--type', + 'invalid-type', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--inline', + 'test prompt', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('--type'); + }); + }); + + describe('system-prompt recommendation input validation', () => { + it('fails when agent not deployed (inline input)', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--inline', + 'You are a helpful assistant.', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('deployed'); + }); + + it('fails when agent not deployed (file input)', async () => { + const promptFile = join(project.projectPath, 'system-prompt.txt'); + await writeFile(promptFile, 'You are a helpful assistant for testing.'); + + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--prompt-file', + promptFile, + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('deployed'); + }); + + it('fails with non-existent prompt file', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--prompt-file', + '/tmp/nonexistent-prompt-file-xyz.txt', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + }); + }); + + describe('tool-description recommendation input validation', () => { + it('fails when agent not deployed (tool-description type with --tools)', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--type', + 'tool-description', + '--runtime', + project.agentName, + '--tools', + 'search:Searches the web for information', + '--tools', + 'calculator:Performs math calculations', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('deployed'); + }); + }); + + describe('config bundle source validation', () => { + it('fails when bundle not found in deployed state', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--bundle-name', + 'NonExistentBundle', + '--bundle-version', + 'v1', + '--system-prompt-json-path', + 'systemPrompt', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + // Fails at agent resolution (not deployed) before bundle resolution + expect(json.error).toContain('deployed'); + }); + }); + + describe('spans file validation', () => { + it('fails when spans file does not exist', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--inline', + 'You are a helpful assistant.', + '--spans-file', + '/tmp/nonexistent-spans-xyz.json', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + }); + + it('fails when spans file contains invalid JSON', async () => { + const spansFile = join(project.projectPath, 'bad-spans.json'); + await writeFile(spansFile, 'not valid json'); + + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--inline', + 'You are a helpful assistant.', + '--spans-file', + spansFile, + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + }); + }); + + describe('lookback and session options', () => { + it('accepts --lookback flag (fails at deploy check, not parsing)', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--inline', + 'You are a helpful assistant.', + '--lookback', + '14', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.error).toContain('deployed'); + }); + + it('accepts --session-id flag (fails at deploy check, not parsing)', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--evaluator', + 'Builtin.Faithfulness', + '--inline', + 'You are a helpful assistant.', + '--session-id', + 'sess-001', + 'sess-002', + '--json', + ], + project.projectPath + ); + + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.error).toContain('deployed'); + }); + }); +}); diff --git a/package-lock.json b/package-lock.json index 1db6a9d5f..7dc1e1b28 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@aws/agentcore", - "version": "1.0.0-preview.5", + "version": "0.13.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@aws/agentcore", - "version": "1.0.0-preview.5", + "version": "0.13.0", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { @@ -24,7 +24,7 @@ "@aws-sdk/client-sts": "^3.893.0", "@aws-sdk/client-xray": "^3.1003.0", "@aws-sdk/credential-providers": "^3.893.0", - "@aws/agent-inspector": "0.2.1", + "@aws/agent-inspector": "0.3.0", "@commander-js/extra-typings": "^14.0.0", "@opentelemetry/api": "^1.9.1", "@opentelemetry/exporter-metrics-otlp-http": "^0.214.0", @@ -2903,9 +2903,9 @@ } }, "node_modules/@aws/agent-inspector": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/@aws/agent-inspector/-/agent-inspector-0.2.1.tgz", - "integrity": "sha512-kyL6RBcTj1hYIchtrHDlDyeqm2viVYMBxhZKVn8wJn058YhI52GIDuUFlKD1avd57X+LJKlHr5VcKvBZp7Sg6A==", + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/@aws/agent-inspector/-/agent-inspector-0.3.0.tgz", + "integrity": "sha512-xD7QPr1WWkT9QWRWo6e9kq8kYxJLQ8egGscgSZ6jCyW3wNV5fcQ6THcAR/71hxxMFF2aleNUc3D8MoqgiS4DVw==", "license": "Apache-2.0", "dependencies": { "@ag-ui/core": "^0.0.52", diff --git a/package.json b/package.json index 57d03d078..0ddeb810c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@aws/agentcore", - "version": "1.0.0-preview.5", + "version": "0.13.0", "description": "CLI for Amazon Bedrock AgentCore", "license": "Apache-2.0", "repository": { @@ -87,7 +87,7 @@ "@aws-sdk/client-sts": "^3.893.0", "@aws-sdk/client-xray": "^3.1003.0", "@aws-sdk/credential-providers": "^3.893.0", - "@aws/agent-inspector": "0.2.1", + "@aws/agent-inspector": "0.3.0", "@commander-js/extra-typings": "^14.0.0", "@opentelemetry/api": "^1.9.1", "@opentelemetry/exporter-metrics-otlp-http": "^0.214.0", diff --git a/scripts/bundle.mjs b/scripts/bundle.mjs index 303a83ba6..ec46df8cc 100644 --- a/scripts/bundle.mjs +++ b/scripts/bundle.mjs @@ -109,7 +109,8 @@ function resolveInspectorPath() { log('Starting bundle process...'); -const timestamp = Math.floor(Date.now() / 1000); +const now = new Date(); +const timestamp = now.toISOString().replace(/[-:T]/g, '').slice(0, 14); log(`Bundle timestamp: ${timestamp}`); // Helper to bump a package version with a unique e2e timestamp tag. @@ -119,7 +120,9 @@ function bumpVersion(pkgDir) { const pkg = JSON.parse(fs.readFileSync(pkgJsonPath, 'utf8')); const originalVersion = pkg.version; const baseVersion = originalVersion.split('-')[0]; - pkg.version = `${baseVersion}-${timestamp}`; + const prerelease = originalVersion.includes('-') ? originalVersion.split('-').slice(1).join('-') : ''; + const tag = prerelease ? `${prerelease}-${timestamp}` : timestamp; + pkg.version = `${baseVersion}-${tag}`; fs.writeFileSync(pkgJsonPath, JSON.stringify(pkg, null, 2) + '\n'); log(`Bumped ${pkg.name} version: ${originalVersion} -> ${pkg.version}`); return { pkgJsonPath, originalVersion, bumpedVersion: pkg.version }; diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap index 46e16e304..30e9be0f4 100644 --- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap +++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap @@ -443,6 +443,7 @@ test('AgentCoreStack synthesizes with empty spec', () => { credentials: [], evaluators: [], onlineEvalConfigs: [], + configBundles: [], policyEngines: [], agentCoreGateways: [], mcpRuntimeTools: [], @@ -3762,9 +3763,15 @@ Thumbs.db exports[`Assets Directory Snapshots > Python framework assets > python/python/http/langchain_langgraph/base/main.py should match snapshot 1`] = ` "import os -from langchain_core.messages import HumanMessage +from typing import Any + +from langchain_core.messages import HumanMessage{{#if hasConfigBundle}}, SystemMessage{{/if}} from langgraph.prebuilt import create_react_agent from langchain.tools import tool +{{#if hasConfigBundle}} +from langchain_core.callbacks import BaseCallbackHandler +from bedrock_agentcore.runtime.context import BedrockAgentCoreContext +{{/if}} from opentelemetry.instrumentation.langchain import LangchainInstrumentor from bedrock_agentcore.runtime import BedrockAgentCoreApp from model.load import load_model @@ -3788,6 +3795,14 @@ def get_or_create_model(): return _llm +DEFAULT_SYSTEM_PROMPT = """ +You are a helpful assistant. Use tools when appropriate. +{{#if sessionStorageMountPath}} +You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. +{{/if}} +""" + + # Define a simple function tool @tool def add_numbers(a: int, b: int) -> int: @@ -3851,13 +3866,28 @@ def list_files(directory: str = "") -> str: tools.extend([file_read, file_write, list_files]) {{/if}} -SYSTEM_PROMPT = """ -You are a helpful assistant. Use tools when appropriate. -{{#if sessionStorageMountPath}} -You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. -{{/if}} -""" +{{#if hasConfigBundle}} +class ConfigBundleCallback(BaseCallbackHandler): + """Injects config bundle values into LangGraph agent at runtime. + + BedrockAgentCoreContext.get_config_bundle() fetches the component configuration + for the current runtime ARN from the config bundle service. The SDK caches the + result and refreshes on bundle version changes. + """ + + def on_chain_start(self, serialized: dict, inputs: dict, **kwargs: Any) -> None: + config = BedrockAgentCoreContext.get_config_bundle() + prompt = config.get("systemPrompt", DEFAULT_SYSTEM_PROMPT) + + messages = inputs.get("messages", []) + if messages and isinstance(messages[0], SystemMessage): + messages[0] = SystemMessage(content=prompt) + else: + messages.insert(0, SystemMessage(content=prompt)) + inputs["messages"] = messages + +{{/if}} @app.entrypoint async def invoke(payload, context): @@ -3876,7 +3906,21 @@ async def invoke(payload, context): mcp_tools = await mcp_client.get_tools() # Define the agent using create_react_agent - graph = create_react_agent(get_or_create_model(), tools=mcp_tools + tools, prompt=SYSTEM_PROMPT) +{{#if hasConfigBundle}} + graph = create_react_agent(get_or_create_model(), tools=mcp_tools + tools, prompt=DEFAULT_SYSTEM_PROMPT) + callback = ConfigBundleCallback() + + # Process the user prompt + prompt = payload.get("prompt", "What can you help me with?") + log.info(f"Agent input: {prompt}") + + # Run the agent with config bundle callback + result = await graph.ainvoke( + {"messages": [HumanMessage(content=prompt)]}, + config={"callbacks": [callback]}, + ) +{{else}} + graph = create_react_agent(get_or_create_model(), tools=mcp_tools + tools, prompt=DEFAULT_SYSTEM_PROMPT) # Process the user prompt prompt = payload.get("prompt", "What can you help me with?") @@ -3884,6 +3928,7 @@ async def invoke(payload, context): # Run the agent result = await graph.ainvoke({"messages": [HumanMessage(content=prompt)]}) +{{/if}} # Return result output = result["messages"][-1].content @@ -4658,7 +4703,13 @@ Thumbs.db" `; exports[`Assets Directory Snapshots > Python framework assets > python/python/http/strands/base/main.py should match snapshot 1`] = ` -"from strands import Agent, tool +"from typing import Any + +from strands import Agent, tool +{{#if hasConfigBundle}} +from strands.hooks import HookProvider, HookRegistry, BeforeInvocationEvent, BeforeToolCallEvent +from bedrock_agentcore.runtime.context import BedrockAgentCoreContext +{{/if}} from bedrock_agentcore.runtime import BedrockAgentCoreApp from model.load import load_model {{#if hasGateway}} @@ -4683,11 +4734,26 @@ mcp_clients = get_all_gateway_mcp_clients() mcp_clients = [get_streamable_http_mcp_client()] {{/if}} +DEFAULT_SYSTEM_PROMPT = """ +You are a helpful assistant. Use tools when appropriate. +{{#if sessionStorageMountPath}} +You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. +{{/if}} +""" + +{{#if hasConfigBundle}} +DEFAULT_TOOL_DESC = "Return the sum of two numbers" +{{/if}} + # Define a collection of tools used by the model tools = [] # Define a simple function tool +{{#if hasConfigBundle}} +@tool(description=DEFAULT_TOOL_DESC) +{{else}} @tool +{{/if}} def add_numbers(a: int, b: int) -> int: """Return the sum of two numbers""" return a+b @@ -4751,12 +4817,39 @@ for mcp_client in mcp_clients: if mcp_client: tools.append(mcp_client) -SYSTEM_PROMPT = """ -You are a helpful assistant. Use tools when appropriate. -{{#if sessionStorageMountPath}} -You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. +{{#if hasConfigBundle}} + +class ConfigBundleHook(HookProvider): + """Injects config bundle values (system prompt, tool descriptions) before each invocation. + + BedrockAgentCoreContext.get_config_bundle() fetches the component configuration + for the current runtime ARN from the config bundle service. The SDK caches the + result and refreshes on bundle version changes. + """ + + def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None: + registry.add_callback(BeforeInvocationEvent, self._inject_system_prompt) + registry.add_callback(BeforeToolCallEvent, self._override_tool_desc) + + def _inject_system_prompt(self, event: BeforeInvocationEvent) -> None: + config = BedrockAgentCoreContext.get_config_bundle() + prompt = config.get("systemPrompt", DEFAULT_SYSTEM_PROMPT) + + if prompt != event.agent.system_prompt: + event.agent.system_prompt = prompt + + def _override_tool_desc(self, event: BeforeToolCallEvent) -> None: + config = BedrockAgentCoreContext.get_config_bundle() + tool_descs = config.get("toolDescriptions", {}) + + tool_name = event.tool_use["name"] + override = tool_descs.get(tool_name) + if override and event.selected_tool: + spec = event.selected_tool.tool_spec + if spec and "description" in spec: + spec["description"] = override + {{/if}} -""" {{#if hasMemory}} def agent_factory(): @@ -4768,13 +4861,23 @@ def agent_factory(): cache[key] = Agent( model=load_model(), session_manager=get_memory_session_manager(session_id, user_id), - system_prompt=SYSTEM_PROMPT, - tools=tools + system_prompt=DEFAULT_SYSTEM_PROMPT, + tools=tools{{#if hasConfigBundle}}, + hooks=[ConfigBundleHook()]{{/if}} ) return cache[key] return get_or_create_agent get_or_create_agent = agent_factory() {{else}} +{{#if hasConfigBundle}} +def create_agent(): + return Agent( + model=load_model(), + system_prompt=DEFAULT_SYSTEM_PROMPT, + tools=tools, + hooks=[ConfigBundleHook()], + ) +{{else}} _agent = None def get_or_create_agent(): @@ -4782,11 +4885,12 @@ def get_or_create_agent(): if _agent is None: _agent = Agent( model=load_model(), - system_prompt=SYSTEM_PROMPT, + system_prompt=DEFAULT_SYSTEM_PROMPT, tools=tools ) return _agent {{/if}} +{{/if}} @app.entrypoint @@ -4797,8 +4901,12 @@ async def invoke(payload, context): session_id = getattr(context, 'session_id', 'default-session') user_id = getattr(context, 'user_id', 'default-user') agent = get_or_create_agent(session_id, user_id) +{{else}} +{{#if hasConfigBundle}} + agent = create_agent() {{else}} agent = get_or_create_agent() +{{/if}} {{/if}} # Execute and format response diff --git a/src/assets/cdk/test/cdk.test.ts b/src/assets/cdk/test/cdk.test.ts index df5c767f9..79282f729 100644 --- a/src/assets/cdk/test/cdk.test.ts +++ b/src/assets/cdk/test/cdk.test.ts @@ -14,6 +14,7 @@ test('AgentCoreStack synthesizes with empty spec', () => { credentials: [], evaluators: [], onlineEvalConfigs: [], + configBundles: [], policyEngines: [], agentCoreGateways: [], mcpRuntimeTools: [], diff --git a/src/assets/python/http/langchain_langgraph/base/main.py b/src/assets/python/http/langchain_langgraph/base/main.py index dcb9eb13c..773253da0 100644 --- a/src/assets/python/http/langchain_langgraph/base/main.py +++ b/src/assets/python/http/langchain_langgraph/base/main.py @@ -1,7 +1,13 @@ import os -from langchain_core.messages import HumanMessage +from typing import Any + +from langchain_core.messages import HumanMessage{{#if hasConfigBundle}}, SystemMessage{{/if}} from langgraph.prebuilt import create_react_agent from langchain.tools import tool +{{#if hasConfigBundle}} +from langchain_core.callbacks import BaseCallbackHandler +from bedrock_agentcore.runtime.context import BedrockAgentCoreContext +{{/if}} from opentelemetry.instrumentation.langchain import LangchainInstrumentor from bedrock_agentcore.runtime import BedrockAgentCoreApp from model.load import load_model @@ -25,6 +31,14 @@ def get_or_create_model(): return _llm +DEFAULT_SYSTEM_PROMPT = """ +You are a helpful assistant. Use tools when appropriate. +{{#if sessionStorageMountPath}} +You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. +{{/if}} +""" + + # Define a simple function tool @tool def add_numbers(a: int, b: int) -> int: @@ -88,13 +102,28 @@ def list_files(directory: str = "") -> str: tools.extend([file_read, file_write, list_files]) {{/if}} -SYSTEM_PROMPT = """ -You are a helpful assistant. Use tools when appropriate. -{{#if sessionStorageMountPath}} -You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. -{{/if}} -""" +{{#if hasConfigBundle}} + +class ConfigBundleCallback(BaseCallbackHandler): + """Injects config bundle values into LangGraph agent at runtime. + + BedrockAgentCoreContext.get_config_bundle() fetches the component configuration + for the current runtime ARN from the config bundle service. The SDK caches the + result and refreshes on bundle version changes. + """ + + def on_chain_start(self, serialized: dict, inputs: dict, **kwargs: Any) -> None: + config = BedrockAgentCoreContext.get_config_bundle() + prompt = config.get("systemPrompt", DEFAULT_SYSTEM_PROMPT) + messages = inputs.get("messages", []) + if messages and isinstance(messages[0], SystemMessage): + messages[0] = SystemMessage(content=prompt) + else: + messages.insert(0, SystemMessage(content=prompt)) + inputs["messages"] = messages + +{{/if}} @app.entrypoint async def invoke(payload, context): @@ -113,7 +142,21 @@ async def invoke(payload, context): mcp_tools = await mcp_client.get_tools() # Define the agent using create_react_agent - graph = create_react_agent(get_or_create_model(), tools=mcp_tools + tools, prompt=SYSTEM_PROMPT) +{{#if hasConfigBundle}} + graph = create_react_agent(get_or_create_model(), tools=mcp_tools + tools, prompt=DEFAULT_SYSTEM_PROMPT) + callback = ConfigBundleCallback() + + # Process the user prompt + prompt = payload.get("prompt", "What can you help me with?") + log.info(f"Agent input: {prompt}") + + # Run the agent with config bundle callback + result = await graph.ainvoke( + {"messages": [HumanMessage(content=prompt)]}, + config={"callbacks": [callback]}, + ) +{{else}} + graph = create_react_agent(get_or_create_model(), tools=mcp_tools + tools, prompt=DEFAULT_SYSTEM_PROMPT) # Process the user prompt prompt = payload.get("prompt", "What can you help me with?") @@ -121,6 +164,7 @@ async def invoke(payload, context): # Run the agent result = await graph.ainvoke({"messages": [HumanMessage(content=prompt)]}) +{{/if}} # Return result output = result["messages"][-1].content diff --git a/src/assets/python/http/strands/base/main.py b/src/assets/python/http/strands/base/main.py index f7b69d3e4..0cc8771ad 100644 --- a/src/assets/python/http/strands/base/main.py +++ b/src/assets/python/http/strands/base/main.py @@ -1,4 +1,10 @@ +from typing import Any + from strands import Agent, tool +{{#if hasConfigBundle}} +from strands.hooks import HookProvider, HookRegistry, BeforeInvocationEvent, BeforeToolCallEvent +from bedrock_agentcore.runtime.context import BedrockAgentCoreContext +{{/if}} from bedrock_agentcore.runtime import BedrockAgentCoreApp from model.load import load_model {{#if hasGateway}} @@ -23,11 +29,26 @@ mcp_clients = [get_streamable_http_mcp_client()] {{/if}} +DEFAULT_SYSTEM_PROMPT = """ +You are a helpful assistant. Use tools when appropriate. +{{#if sessionStorageMountPath}} +You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. +{{/if}} +""" + +{{#if hasConfigBundle}} +DEFAULT_TOOL_DESC = "Return the sum of two numbers" +{{/if}} + # Define a collection of tools used by the model tools = [] # Define a simple function tool +{{#if hasConfigBundle}} +@tool(description=DEFAULT_TOOL_DESC) +{{else}} @tool +{{/if}} def add_numbers(a: int, b: int) -> int: """Return the sum of two numbers""" return a+b @@ -91,12 +112,39 @@ def list_files(directory: str = "") -> str: if mcp_client: tools.append(mcp_client) -SYSTEM_PROMPT = """ -You are a helpful assistant. Use tools when appropriate. -{{#if sessionStorageMountPath}} -You have persistent storage at {{sessionStorageMountPath}}. Use file tools to read and write files. Data persists across sessions. +{{#if hasConfigBundle}} + +class ConfigBundleHook(HookProvider): + """Injects config bundle values (system prompt, tool descriptions) before each invocation. + + BedrockAgentCoreContext.get_config_bundle() fetches the component configuration + for the current runtime ARN from the config bundle service. The SDK caches the + result and refreshes on bundle version changes. + """ + + def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None: + registry.add_callback(BeforeInvocationEvent, self._inject_system_prompt) + registry.add_callback(BeforeToolCallEvent, self._override_tool_desc) + + def _inject_system_prompt(self, event: BeforeInvocationEvent) -> None: + config = BedrockAgentCoreContext.get_config_bundle() + prompt = config.get("systemPrompt", DEFAULT_SYSTEM_PROMPT) + + if prompt != event.agent.system_prompt: + event.agent.system_prompt = prompt + + def _override_tool_desc(self, event: BeforeToolCallEvent) -> None: + config = BedrockAgentCoreContext.get_config_bundle() + tool_descs = config.get("toolDescriptions", {}) + + tool_name = event.tool_use["name"] + override = tool_descs.get(tool_name) + if override and event.selected_tool: + spec = event.selected_tool.tool_spec + if spec and "description" in spec: + spec["description"] = override + {{/if}} -""" {{#if hasMemory}} def agent_factory(): @@ -108,13 +156,23 @@ def get_or_create_agent(session_id, user_id): cache[key] = Agent( model=load_model(), session_manager=get_memory_session_manager(session_id, user_id), - system_prompt=SYSTEM_PROMPT, - tools=tools + system_prompt=DEFAULT_SYSTEM_PROMPT, + tools=tools{{#if hasConfigBundle}}, + hooks=[ConfigBundleHook()]{{/if}} ) return cache[key] return get_or_create_agent get_or_create_agent = agent_factory() {{else}} +{{#if hasConfigBundle}} +def create_agent(): + return Agent( + model=load_model(), + system_prompt=DEFAULT_SYSTEM_PROMPT, + tools=tools, + hooks=[ConfigBundleHook()], + ) +{{else}} _agent = None def get_or_create_agent(): @@ -122,11 +180,12 @@ def get_or_create_agent(): if _agent is None: _agent = Agent( model=load_model(), - system_prompt=SYSTEM_PROMPT, + system_prompt=DEFAULT_SYSTEM_PROMPT, tools=tools ) return _agent {{/if}} +{{/if}} @app.entrypoint @@ -137,8 +196,12 @@ async def invoke(payload, context): session_id = getattr(context, 'session_id', 'default-session') user_id = getattr(context, 'user_id', 'default-user') agent = get_or_create_agent(session_id, user_id) +{{else}} +{{#if hasConfigBundle}} + agent = create_agent() {{else}} agent = get_or_create_agent() +{{/if}} {{/if}} # Execute and format response diff --git a/src/cli/__tests__/global-config.test.ts b/src/cli/__tests__/global-config.test.ts index 2851a13a4..6e2038973 100644 --- a/src/cli/__tests__/global-config.test.ts +++ b/src/cli/__tests__/global-config.test.ts @@ -1,4 +1,9 @@ -import { getOrCreateInstallationId, readGlobalConfig, updateGlobalConfig } from '../global-config'; +import { + getOrCreateInstallationId, + readGlobalConfig, + readGlobalConfigSync, + updateGlobalConfig, +} from '../../lib/schemas/io/global-config'; import { createTempConfig } from './helpers/temp-config'; import { readFile, writeFile } from 'fs/promises'; import { afterAll, beforeEach, describe, expect, it } from 'vitest'; @@ -21,10 +26,29 @@ describe('global-config', () => { it('returns empty object when file is missing or invalid', async () => { expect(await readGlobalConfig(tmp.testDir + '/nonexistent.json')).toEqual({}); - await writeFile(tmp.configFile, JSON.stringify({ telemetry: { enabled: 'false' } })); + await writeFile(tmp.configFile, 'not json'); expect(await readGlobalConfig(tmp.configFile)).toEqual({}); }); + it('drops invalid fields while preserving valid ones', async () => { + await writeFile( + tmp.configFile, + JSON.stringify({ + transactionSearchIndexPercentage: 'not-a-number', + uvIndex: 'https://valid.url', + telemetry: { enabled: 'yes', endpoint: 'https://example.com' }, + }) + ); + + const config = await readGlobalConfig(tmp.configFile); + + expect(config).toEqual({ + transactionSearchIndexPercentage: undefined, + uvIndex: 'https://valid.url', + telemetry: { enabled: undefined, endpoint: 'https://example.com' }, + }); + }); + it('preserves unknown fields via passthrough', async () => { const full = { installationId: 'abc-123', @@ -39,6 +63,21 @@ describe('global-config', () => { }); }); + describe('readGlobalConfigSync', () => { + it('returns parsed config when file exists', async () => { + await writeFile(tmp.configFile, JSON.stringify({ telemetry: { enabled: false } })); + + expect(readGlobalConfigSync(tmp.configFile)).toEqual({ telemetry: { enabled: false } }); + }); + + it('returns empty object when file is missing or invalid', async () => { + expect(readGlobalConfigSync(tmp.testDir + '/nonexistent.json')).toEqual({}); + + await writeFile(tmp.configFile, 'not json'); + expect(readGlobalConfigSync(tmp.configFile)).toEqual({}); + }); + }); + describe('updateGlobalConfig', () => { it('creates directory and writes config when none exists', async () => { const fresh = createTempConfig('gc-fresh'); diff --git a/src/cli/aws/__tests__/agentcore-ab-tests.test.ts b/src/cli/aws/__tests__/agentcore-ab-tests.test.ts new file mode 100644 index 000000000..94dca3bdb --- /dev/null +++ b/src/cli/aws/__tests__/agentcore-ab-tests.test.ts @@ -0,0 +1,345 @@ +import { createABTest, deleteABTest, getABTest, listABTests, updateABTest } from '../agentcore-ab-tests.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +vi.mock('../account', () => ({ + getCredentialProvider: vi.fn().mockReturnValue({ + accessKeyId: 'AKID', + secretAccessKey: 'SECRET', + sessionToken: 'TOKEN', + }), +})); + +vi.mock('@smithy/signature-v4', () => ({ + SignatureV4: class { + // eslint-disable-next-line @typescript-eslint/require-await + async sign(request: { headers: Record }) { + return { headers: { ...request.headers, Authorization: 'signed' } }; + } + }, +})); + +vi.mock('@aws-crypto/sha256-js', () => ({ + Sha256: class {}, +})); + +vi.mock('@aws-sdk/credential-provider-node', () => ({ + defaultProvider: vi.fn(), +})); + +function mockJsonResponse(body: unknown, status = 200) { + return { + ok: status >= 200 && status < 300, + status, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + json: () => Promise.resolve(body), + text: () => Promise.resolve(JSON.stringify(body)), + }; +} + +describe('agentcore-ab-tests', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('createABTest', () => { + it('sends POST to /ab-tests with correct body', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + abTestId: 'abt-001', + abTestArn: 'arn:abt:001', + name: 'MyTest', + status: 'CREATED', + executionStatus: 'STOPPED', + createdAt: '2026-01-01T00:00:00Z', + }) + ); + + const result = await createABTest({ + region: 'us-east-1', + name: 'MyTest', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-1', + roleArn: 'arn:aws:iam::123:role/TestRole', + variants: [ + { + name: 'C', + weight: 80, + variantConfiguration: { configurationBundle: { bundleArn: 'arn:bundle:c', bundleVersion: 'v1' } }, + }, + { + name: 'T1', + weight: 20, + variantConfiguration: { configurationBundle: { bundleArn: 'arn:bundle:t', bundleVersion: 'v1' } }, + }, + ], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval:config' }, + }); + + expect(result.abTestId).toBe('abt-001'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/ab-tests'), + expect.objectContaining({ method: 'POST' }) + ); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.name).toBe('MyTest'); + expect(body.gatewayArn).toBe('arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-1'); + expect(body.variants).toHaveLength(2); + expect(body.clientToken).toBeDefined(); + }); + + it('omits optional fields when not provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + abTestId: 'abt-002', + abTestArn: 'arn:abt:002', + status: 'CREATED', + executionStatus: 'STOPPED', + createdAt: '2026-01-01T00:00:00Z', + }) + ); + + await createABTest({ + region: 'us-east-1', + name: 'Test', + gatewayArn: 'arn:gw', + roleArn: 'arn:role', + variants: [], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval' }, + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.description).toBeUndefined(); + expect(body.trafficAllocationConfig).toBeUndefined(); + expect(body.maxDurationDays).toBeUndefined(); + expect(body.enableOnCreate).toBeUndefined(); + }); + + it('includes optional fields when provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + abTestId: 'abt-003', + abTestArn: 'arn:abt:003', + status: 'CREATED', + executionStatus: 'RUNNING', + createdAt: '2026-01-01T00:00:00Z', + }) + ); + + await createABTest({ + region: 'us-east-1', + name: 'Test', + description: 'A description', + gatewayArn: 'arn:gw', + roleArn: 'arn:role', + variants: [], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval' }, + trafficAllocationConfig: { routeOnHeader: { headerName: 'X-AB' } }, + maxDurationDays: 30, + enableOnCreate: true, + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.description).toBe('A description'); + expect(body.trafficAllocationConfig).toEqual({ routeOnHeader: { headerName: 'X-AB' } }); + expect(body.maxDurationDays).toBe(30); + expect(body.enableOnCreate).toBe(true); + }); + + it('throws on non-ok response', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 400, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + text: () => Promise.resolve('Bad Request'), + }); + + await expect( + createABTest({ + region: 'us-east-1', + name: 'Test', + gatewayArn: 'arn:gw', + roleArn: 'arn:role', + variants: [], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval' }, + }) + ).rejects.toThrow('ABTest API error (400)'); + }); + }); + + describe('getABTest', () => { + it('sends GET to /ab-tests/{id}', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + abTestId: 'abt-123', + abTestArn: 'arn:abt:123', + name: 'MyTest', + status: 'ACTIVE', + executionStatus: 'RUNNING', + gatewayArn: 'arn:gw', + roleArn: 'arn:role', + variants: [], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval' }, + createdAt: '2026-01-01T00:00:00Z', + updatedAt: '2026-01-02T00:00:00Z', + results: { + analysisTimestamp: '2026-01-02T00:00:00Z', + evaluatorMetrics: [], + }, + }) + ); + + const result = await getABTest({ region: 'us-east-1', abTestId: 'abt-123' }); + + expect(result.abTestId).toBe('abt-123'); + expect(result.results).toBeDefined(); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/ab-tests/abt-123'), + expect.objectContaining({ method: 'GET' }) + ); + }); + }); + + describe('updateABTest', () => { + it('sends PUT to /ab-tests/{id} with only defined fields', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + abTestId: 'abt-123', + abTestArn: 'arn:abt:123', + status: 'ACTIVE', + executionStatus: 'PAUSED', + updatedAt: '2026-01-02T00:00:00Z', + }) + ); + + await updateABTest({ + region: 'us-east-1', + abTestId: 'abt-123', + executionStatus: 'PAUSED', + }); + + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/ab-tests/abt-123'), + expect.objectContaining({ method: 'PUT' }) + ); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.executionStatus).toBe('PAUSED'); + expect(body.clientToken).toBeDefined(); + expect(body.name).toBeUndefined(); + expect(body.description).toBeUndefined(); + expect(body.variants).toBeUndefined(); + }); + + it('includes all provided fields', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + abTestId: 'abt-123', + abTestArn: 'arn:abt:123', + status: 'ACTIVE', + executionStatus: 'RUNNING', + updatedAt: '2026-01-02T00:00:00Z', + }) + ); + + await updateABTest({ + region: 'us-east-1', + abTestId: 'abt-123', + name: 'Updated', + description: 'New desc', + maxDurationDays: 60, + roleArn: 'arn:new-role', + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.name).toBe('Updated'); + expect(body.description).toBe('New desc'); + expect(body.maxDurationDays).toBe(60); + expect(body.roleArn).toBe('arn:new-role'); + }); + }); + + describe('deleteABTest', () => { + it('sends DELETE to /ab-tests/{id} and returns success', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({}, 204)); + + const result = await deleteABTest({ region: 'us-east-1', abTestId: 'abt-123' }); + + expect(result.success).toBe(true); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/ab-tests/abt-123'), + expect.objectContaining({ method: 'DELETE' }) + ); + }); + + it('returns error on 404', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 404, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + text: () => Promise.resolve('Not Found'), + }); + + const result = await deleteABTest({ region: 'us-east-1', abTestId: 'abt-999' }); + + expect(mockFetch).toHaveBeenCalledTimes(1); + expect(mockFetch.mock.calls[0]![0]).toContain('/ab-tests/abt-999'); + expect(result.success).toBe(false); + expect(result.error).toContain('ABTest API error (404)'); + }); + + it('returns error on network failure', async () => { + mockFetch.mockRejectedValue(new Error('Network error')); + + const result = await deleteABTest({ region: 'us-east-1', abTestId: 'abt-123' }); + + expect(result.success).toBe(false); + expect(result.error).toBe('Network error'); + }); + }); + + describe('listABTests', () => { + it('sends GET to /ab-tests', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + abTests: [ + { + abTestId: 'abt-1', + abTestArn: 'arn:abt:1', + name: 'Test1', + status: 'ACTIVE', + executionStatus: 'RUNNING', + createdAt: '2026-01-01T00:00:00Z', + updatedAt: '2026-01-01T00:00:00Z', + }, + ], + }) + ); + + const result = await listABTests({ region: 'us-east-1' }); + + expect(result.abTests).toHaveLength(1); + expect(result.abTests[0]!.name).toBe('Test1'); + }); + + it('passes maxResults and nextToken as query params', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({ abTests: [] })); + + await listABTests({ region: 'us-east-1', maxResults: 10, nextToken: 'abc' }); + + const url = mockFetch.mock.calls[0]![0] as string; + expect(url).toContain('maxResults=10'); + expect(url).toContain('nextToken=abc'); + }); + + it('returns empty array when response has no abTests', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({})); + + const result = await listABTests({ region: 'us-east-1' }); + + expect(result.abTests).toEqual([]); + }); + }); +}); diff --git a/src/cli/aws/__tests__/agentcore-http-gateways.test.ts b/src/cli/aws/__tests__/agentcore-http-gateways.test.ts new file mode 100644 index 000000000..f9ace9a7a --- /dev/null +++ b/src/cli/aws/__tests__/agentcore-http-gateways.test.ts @@ -0,0 +1,235 @@ +import { createHttpGatewayTarget, getHttpGateway, listHttpGatewayTargets } from '../agentcore-http-gateways.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +vi.mock('../account', () => ({ + getCredentialProvider: vi.fn().mockReturnValue({ + accessKeyId: 'AKID', + secretAccessKey: 'SECRET', + sessionToken: 'TOKEN', + }), +})); + +vi.mock('@smithy/signature-v4', () => ({ + SignatureV4: class { + // eslint-disable-next-line @typescript-eslint/require-await + async sign(request: { headers: Record }) { + return { headers: { ...request.headers, Authorization: 'signed' } }; + } + }, +})); + +vi.mock('@aws-crypto/sha256-js', () => ({ + Sha256: class {}, +})); + +vi.mock('@aws-sdk/credential-provider-node', () => ({ + defaultProvider: vi.fn(), +})); + +function mockJsonResponse(body: unknown, status = 200) { + return { + ok: status >= 200 && status < 300, + status, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + json: () => Promise.resolve(body), + text: () => Promise.resolve(JSON.stringify(body)), + }; +} + +describe('agentcore-http-gateways', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('createHttpGatewayTarget', () => { + it('sends agentcoreRuntime in request body', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + targetId: 'tgt-001', + name: 'my-target', + status: 'CREATING', + }) + ); + + const result = await createHttpGatewayTarget({ + region: 'us-east-1', + gatewayId: 'gw-123', + targetName: 'my-target', + runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', + qualifier: 'DEFAULT', + }); + + expect(result.targetId).toBe('tgt-001'); + expect(result.name).toBe('my-target'); + expect(mockFetch).toHaveBeenCalledTimes(1); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.name).toBe('my-target'); + expect(body.targetConfiguration.http.agentcoreRuntime).toEqual({ + arn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', + qualifier: 'DEFAULT', + }); + expect(body.credentialProviderConfigurations).toEqual([{ credentialProviderType: 'GATEWAY_IAM_ROLE' }]); + expect(body.clientToken).toBeDefined(); + }); + + it('falls back to runtimeTargetConfiguration on ValidationException', async () => { + // First call fails with ValidationException + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 400, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + text: () => Promise.resolve('ValidationException: Unknown field agentcoreRuntime'), + }); + // Second call (fallback) succeeds + mockFetch.mockResolvedValueOnce( + mockJsonResponse({ + targetId: 'tgt-002', + name: 'my-target', + status: 'CREATING', + }) + ); + + const result = await createHttpGatewayTarget({ + region: 'us-east-1', + gatewayId: 'gw-123', + targetName: 'my-target', + runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', + }); + + expect(result.targetId).toBe('tgt-002'); + expect(mockFetch).toHaveBeenCalledTimes(2); + + // Second call should use runtimeTargetConfiguration + const fallbackBody = JSON.parse(mockFetch.mock.calls[1]![1].body); + expect(fallbackBody.targetConfiguration.http.runtimeTargetConfiguration).toEqual({ + arn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', + qualifier: 'DEFAULT', + }); + }); + + it('falls back to runtimeTargetConfiguration on 400 status', async () => { + // First call fails with 400 + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 400, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + text: () => Promise.resolve('400 Bad Request'), + }); + // Second call (fallback) succeeds + mockFetch.mockResolvedValueOnce( + mockJsonResponse({ + targetId: 'tgt-003', + name: 'my-target', + status: 'CREATING', + }) + ); + + const result = await createHttpGatewayTarget({ + region: 'us-east-1', + gatewayId: 'gw-123', + targetName: 'my-target', + runtimeArn: 'arn:runtime', + }); + + expect(result.targetId).toBe('tgt-003'); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + + it('throws on non-validation errors (no fallback)', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 500, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + text: () => Promise.resolve('Internal Server Error'), + }); + + await expect( + createHttpGatewayTarget({ + region: 'us-east-1', + gatewayId: 'gw-123', + targetName: 'my-target', + runtimeArn: 'arn:runtime', + }) + ).rejects.toThrow('Failed to create target'); + + // Only one call — no fallback attempt + expect(mockFetch).toHaveBeenCalledTimes(1); + }); + }); + + describe('getHttpGateway', () => { + it('returns gateway details', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + gatewayId: 'gw-123', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-123', + gatewayUrl: 'https://gw-123.example.com', + name: 'my-gateway', + status: 'READY', + authorizerType: 'AWS_IAM', + roleArn: 'arn:aws:iam::123:role/GwRole', + createdAt: '2026-01-01T00:00:00Z', + updatedAt: '2026-01-02T00:00:00Z', + }) + ); + + const result = await getHttpGateway({ region: 'us-east-1', gatewayId: 'gw-123' }); + + expect(result.gatewayId).toBe('gw-123'); + expect(result.name).toBe('my-gateway'); + expect(result.status).toBe('READY'); + expect(result.gatewayUrl).toBe('https://gw-123.example.com'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/gateways/gw-123'), + expect.objectContaining({ method: 'GET' }) + ); + }); + }); + + describe('listHttpGatewayTargets', () => { + it('returns targets array', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + targets: [ + { targetId: 'tgt-1', name: 'target-1', status: 'READY' }, + { targetId: 'tgt-2', name: 'target-2', status: 'CREATING' }, + ], + }) + ); + + const result = await listHttpGatewayTargets({ + region: 'us-east-1', + gatewayId: 'gw-123', + }); + + expect(result.targets).toHaveLength(2); + expect(result.targets[0]!.targetId).toBe('tgt-1'); + expect(result.targets[0]!.name).toBe('target-1'); + expect(result.targets[1]!.targetId).toBe('tgt-2'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/gateways/gw-123/targets'), + expect.objectContaining({ method: 'GET' }) + ); + }); + + it('handles response with items field instead of targets', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + items: [{ targetId: 'tgt-1', name: 'target-1', status: 'READY' }], + }) + ); + + const result = await listHttpGatewayTargets({ + region: 'us-east-1', + gatewayId: 'gw-123', + }); + + expect(result.targets).toHaveLength(1); + expect(result.targets[0]!.targetId).toBe('tgt-1'); + }); + }); +}); diff --git a/src/cli/aws/__tests__/agentcore-recommendation.test.ts b/src/cli/aws/__tests__/agentcore-recommendation.test.ts new file mode 100644 index 000000000..1b330cf30 --- /dev/null +++ b/src/cli/aws/__tests__/agentcore-recommendation.test.ts @@ -0,0 +1,295 @@ +import { + deleteRecommendation, + getRecommendation, + listRecommendations, + startRecommendation, +} from '../agentcore-recommendation.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +vi.mock('../account', () => ({ + getCredentialProvider: vi.fn().mockReturnValue({ + accessKeyId: 'AKID', + secretAccessKey: 'SECRET', + sessionToken: 'TOKEN', + }), +})); + +vi.mock('@smithy/signature-v4', () => ({ + SignatureV4: class { + // eslint-disable-next-line @typescript-eslint/require-await + async sign(request: { headers: Record }) { + return { headers: { ...request.headers, Authorization: 'signed' } }; + } + }, +})); + +vi.mock('@aws-crypto/sha256-js', () => ({ + Sha256: class {}, +})); + +vi.mock('@aws-sdk/credential-provider-node', () => ({ + defaultProvider: vi.fn(), +})); + +function mockJsonResponse(body: unknown, status = 200) { + return { + ok: status >= 200 && status < 300, + status, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + json: () => Promise.resolve(body), + text: () => Promise.resolve(JSON.stringify(body)), + }; +} + +describe('agentcore-recommendation', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('startRecommendation', () => { + it('sends POST to /recommendations with correct body', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + recommendationId: 'rec-123', + recommendationArn: 'arn:rec-123', + name: 'MyRecommendation', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }) + ); + + const result = await startRecommendation({ + region: 'us-west-2', + name: 'MyRecommendation', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + recommendationConfig: { + systemPromptRecommendationConfig: { + systemPrompt: { text: 'You are a helpful agent.' }, + agentTraces: { + cloudwatchLogs: { + logGroupArns: ['arn:log-group'], + serviceNames: ['bedrock-agentcore'], + startTime: '2026-03-23T00:00:00.000Z', + endTime: '2026-03-30T00:00:00.000Z', + }, + }, + evaluationConfig: { + evaluators: [{ evaluatorArn: 'arn:aws:bedrock-agentcore:::evaluator/Builtin.Helpfulness' }], + }, + }, + }, + }); + + expect(result.recommendationId).toBe('rec-123'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/recommendations'), + expect.objectContaining({ method: 'POST' }) + ); + + const fetchCall = mockFetch.mock.calls[0]!; + const body = JSON.parse(fetchCall[1].body); + expect(body.name).toBe('MyRecommendation'); + expect(body.type).toBe('SYSTEM_PROMPT_RECOMMENDATION'); + expect(body.recommendationConfig.systemPromptRecommendationConfig).toBeDefined(); + }); + + it('omits description when not provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + recommendationId: 'r1', + recommendationArn: 'arn:1', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }) + ); + + await startRecommendation({ + region: 'us-west-2', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + recommendationConfig: { + systemPromptRecommendationConfig: { + systemPrompt: { text: '' }, + agentTraces: { + cloudwatchLogs: { + logGroupArns: [], + serviceNames: ['bedrock-agentcore'], + startTime: '2026-03-23T00:00:00.000Z', + endTime: '2026-03-30T00:00:00.000Z', + }, + }, + evaluationConfig: { + evaluators: [{ evaluatorArn: 'arn:aws:bedrock-agentcore:::evaluator/Builtin.Helpfulness' }], + }, + }, + }, + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.description).toBeUndefined(); + }); + + it('includes description when provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + recommendationId: 'r1', + recommendationArn: 'arn:1', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }) + ); + + await startRecommendation({ + region: 'us-west-2', + name: 'MyRec', + description: 'Test description', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + recommendationConfig: { + systemPromptRecommendationConfig: { + systemPrompt: { text: '' }, + agentTraces: { + cloudwatchLogs: { + logGroupArns: [], + serviceNames: ['bedrock-agentcore'], + startTime: '2026-03-23T00:00:00.000Z', + endTime: '2026-03-30T00:00:00.000Z', + }, + }, + evaluationConfig: { + evaluators: [{ evaluatorArn: 'arn:aws:bedrock-agentcore:::evaluator/Builtin.Helpfulness' }], + }, + }, + }, + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.description).toBe('Test description'); + }); + + it('throws on non-ok response', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 400, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + text: () => Promise.resolve('Bad Request'), + }); + + await expect( + startRecommendation({ + region: 'us-west-2', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + recommendationConfig: {}, + }) + ).rejects.toThrow('Recommendation API error (400)'); + }); + }); + + describe('getRecommendation', () => { + it('sends GET to /recommendations/{id}', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + recommendationId: 'rec-123', + recommendationArn: 'arn:rec-123', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'COMPLETED', + recommendationResult: { + systemPromptRecommendationResult: { + recommendedSystemPrompt: 'Optimized prompt', + explanation: 'Made it better', + }, + }, + }) + ); + + const result = await getRecommendation({ region: 'us-west-2', recommendationId: 'rec-123' }); + + expect(result.recommendationId).toBe('rec-123'); + expect(result.name).toBe('MyRec'); + expect(result.recommendationResult?.systemPromptRecommendationResult?.recommendedSystemPrompt).toBe( + 'Optimized prompt' + ); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/recommendations/rec-123'), + expect.objectContaining({ method: 'GET' }) + ); + }); + }); + + describe('deleteRecommendation', () => { + it('sends DELETE to /recommendations/{id}', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({ recommendationId: 'rec-123', status: 'DELETING' }, 200)); + + const result = await deleteRecommendation({ region: 'us-west-2', recommendationId: 'rec-123' }); + + expect(result.recommendationId).toBe('rec-123'); + expect(result.status).toBe('DELETING'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/recommendations/rec-123'), + expect.objectContaining({ method: 'DELETE' }) + ); + }); + + it('throws on failure', async () => { + mockFetch.mockRejectedValue(new Error('Network error')); + + await expect(deleteRecommendation({ region: 'us-west-2', recommendationId: 'rec-123' })).rejects.toThrow( + 'Network error' + ); + }); + }); + + describe('listRecommendations', () => { + it('sends GET to /recommendations', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + recommendationSummaries: [ + { + recommendationId: 'r1', + recommendationArn: 'arn:r1', + name: 'Rec1', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'COMPLETED', + }, + { + recommendationId: 'r2', + recommendationArn: 'arn:r2', + name: 'Rec2', + type: 'TOOL_DESCRIPTION_RECOMMENDATION', + status: 'COMPLETED', + }, + ], + }) + ); + + const result = await listRecommendations({ region: 'us-west-2' }); + + expect(result.recommendationSummaries).toHaveLength(2); + expect(result.recommendationSummaries[0]!.name).toBe('Rec1'); + }); + + it('passes maxResults and nextToken as query params', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({ recommendationSummaries: [] })); + + await listRecommendations({ region: 'us-west-2', maxResults: 10, nextToken: 'abc' }); + + const url = mockFetch.mock.calls[0]![0] as string; + expect(url).toContain('maxResults=10'); + expect(url).toContain('nextToken=abc'); + }); + + it('returns empty array when response has no recommendationSummaries', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({})); + + const result = await listRecommendations({ region: 'us-west-2' }); + + expect(result.recommendationSummaries).toEqual([]); + }); + }); +}); diff --git a/src/cli/aws/__tests__/agentcore.test.ts b/src/cli/aws/__tests__/agentcore.test.ts index e26e4324e..f23ff5c83 100644 --- a/src/cli/aws/__tests__/agentcore.test.ts +++ b/src/cli/aws/__tests__/agentcore.test.ts @@ -1,4 +1,4 @@ -import { extractResult, parseA2AResponse, parseSSE, parseSSELine } from '../agentcore.js'; +import { buildBearerInvokeHeaders, extractResult, parseA2AResponse, parseSSE, parseSSELine } from '../agentcore.js'; import { describe, expect, it } from 'vitest'; describe('parseSSELine', () => { @@ -176,3 +176,43 @@ describe('parseA2AResponse', () => { expect(parseA2AResponse('not json')).toBe('not json'); }); }); + +describe('buildBearerInvokeHeaders', () => { + it('includes custom headers from options.headers', () => { + const headers = buildBearerInvokeHeaders( + { + bearerToken: 'tok', + headers: { + 'x-amzn-bedrock-agentcore-runtime-custom-foo': 'bar', + 'x-amzn-bedrock-agentcore-runtime-custom-baz': 'qux', + }, + }, + 'application/json' + ); + expect(headers['x-amzn-bedrock-agentcore-runtime-custom-foo']).toBe('bar'); + expect(headers['x-amzn-bedrock-agentcore-runtime-custom-baz']).toBe('qux'); + }); + + it('sets Authorization, Content-Type, Accept, and default user ID', () => { + const headers = buildBearerInvokeHeaders({ bearerToken: 'tok' }, 'application/json'); + expect(headers.Authorization).toBe('Bearer tok'); + expect(headers['Content-Type']).toBe('application/json'); + expect(headers.Accept).toBe('application/json'); + expect(headers['X-Amzn-Bedrock-AgentCore-Runtime-User-Id']).toBe('default-user'); + }); + + it('sets session ID header when provided', () => { + const headers = buildBearerInvokeHeaders({ bearerToken: 'tok', sessionId: 's1' }, 'application/json'); + expect(headers['X-Amzn-Bedrock-AgentCore-Runtime-Session-Id']).toBe('s1'); + }); + + it('omits session ID header when not provided', () => { + const headers = buildBearerInvokeHeaders({ bearerToken: 'tok' }, 'application/json'); + expect(headers).not.toHaveProperty('X-Amzn-Bedrock-AgentCore-Runtime-Session-Id'); + }); + + it('returns correct headers when options.headers is undefined', () => { + const headers = buildBearerInvokeHeaders({ bearerToken: 'tok' }, 'application/json'); + expect(Object.keys(headers)).toHaveLength(4); // Authorization, Content-Type, Accept, User-Id + }); +}); diff --git a/src/cli/aws/agentcore-ab-tests.ts b/src/cli/aws/agentcore-ab-tests.ts new file mode 100644 index 000000000..4bcf0ce16 --- /dev/null +++ b/src/cli/aws/agentcore-ab-tests.ts @@ -0,0 +1,360 @@ +/** + * AWS client wrappers for AB Test data plane operations. + * + * Uses the AgentCore Evaluation DataPlane API (bedrock-agentcore) + * with direct HTTP requests and SigV4 signing. + */ +import { getCredentialProvider } from './account'; +import { dnsSuffix } from './partition'; +import { Sha256 } from '@aws-crypto/sha256-js'; +import { defaultProvider } from '@aws-sdk/credential-provider-node'; +import { HttpRequest } from '@smithy/protocol-http'; +import { SignatureV4 } from '@smithy/signature-v4'; +import { randomUUID } from 'node:crypto'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface ABTestVariant { + name: 'C' | 'T1'; + weight: number; + variantConfiguration: { + configurationBundle?: { + bundleArn: string; + bundleVersion: string; + }; + target?: { + name: string; + }; + }; +} + +export type ABTestEvaluationConfig = + | { onlineEvaluationConfigArn: string } + | { + perVariantOnlineEvaluationConfig: { + name: 'C' | 'T1'; + onlineEvaluationConfigArn: string; + }[]; + }; + +export interface GatewayFilter { + targetPaths: string[]; +} + +export interface TrafficAllocationConfig { + routeOnHeader: { + headerName: string; + }; +} + +export interface ConfidenceInterval { + lower?: number; + upper?: number; +} + +export interface ControlStats { + treatmentName: string; + sampleSize: number; + mean: number; +} + +export interface VariantResult { + treatmentName: string; + sampleSize: number; + mean: number; + absoluteChange?: number; + percentChange?: number; + pValue?: number; + confidenceInterval?: ConfidenceInterval; + isSignificant: boolean; +} + +export interface EvaluatorMetric { + evaluatorArn: string; + controlStats: ControlStats; + variantResults: VariantResult[]; +} + +export interface ABTestResults { + analysisTimestamp?: string; + evaluatorMetrics: EvaluatorMetric[]; +} + +// ── Create ────────────────────────────────────────────────────────────────── + +export interface CreateABTestOptions { + region: string; + name: string; + description?: string; + gatewayArn: string; + roleArn: string; + variants: ABTestVariant[]; + evaluationConfig: ABTestEvaluationConfig; + gatewayFilter?: GatewayFilter; + trafficAllocationConfig?: TrafficAllocationConfig; + maxDurationDays?: number; + enableOnCreate?: boolean; +} + +export interface CreateABTestResult { + abTestId: string; + abTestArn: string; + name?: string; + status: string; + executionStatus: string; + createdAt: string; +} + +// ── Get ───────────────────────────────────────────────────────────────────── + +export interface GetABTestOptions { + region: string; + abTestId: string; +} + +export interface GetABTestResult { + abTestId: string; + abTestArn: string; + name: string; + description?: string; + status: string; + executionStatus: string; + gatewayArn: string; + roleArn: string; + variants: ABTestVariant[]; + evaluationConfig: ABTestEvaluationConfig; + trafficAllocationConfig?: TrafficAllocationConfig; + maxDurationDays?: number; + currentRunId?: string; + stopReason?: string; + failureReason?: string; + startedAt?: string; + stoppedAt?: string; + maxDurationExpiresAt?: string; + createdAt: string; + updatedAt: string; + results?: ABTestResults; +} + +// ── Update ────────────────────────────────────────────────────────────────── + +export interface UpdateABTestOptions { + region: string; + abTestId: string; + name?: string; + description?: string; + variants?: ABTestVariant[]; + trafficAllocationConfig?: TrafficAllocationConfig; + evaluationConfig?: ABTestEvaluationConfig; + maxDurationDays?: number; + executionStatus?: 'PAUSED' | 'RUNNING' | 'STOPPED'; + roleArn?: string; +} + +export interface UpdateABTestResult { + abTestId: string; + abTestArn: string; + status: string; + executionStatus: string; + failureReason?: string; + updatedAt: string; +} + +// ── Delete ────────────────────────────────────────────────────────────────── + +export interface DeleteABTestOptions { + region: string; + abTestId: string; +} + +// ── List ──────────────────────────────────────────────────────────────────── + +export interface ListABTestsOptions { + region: string; + maxResults?: number; + nextToken?: string; +} + +export interface ABTestSummary { + abTestId: string; + abTestArn: string; + name: string; + description?: string; + status: string; + executionStatus: string; + gatewayArn?: string; + createdAt: string; + updatedAt: string; +} + +export interface ListABTestsResult { + abTests: ABTestSummary[]; + nextToken?: string; +} + +// ============================================================================ +// HTTP signing helpers +// ============================================================================ + +function getDataPlaneEndpoint(region: string): string { + const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); + if (stage === 'beta') return `https://beta.${region}.elcapdp.genesis-primitives.aws.dev`; + if (stage === 'gamma') return `https://gamma.${region}.elcapdp.genesis-primitives.aws.dev`; + return `https://bedrock-agentcore.${region}.${dnsSuffix(region)}`; +} + +async function signedRequestToEndpoint( + endpoint: string, + options: { + region: string; + method: string; + path: string; + body?: string; + } +): Promise { + const { region, method, path, body } = options; + const url = new URL(path, endpoint); + + const query: Record = {}; + url.searchParams.forEach((value, key) => { + query[key] = value; + }); + + const request = new HttpRequest({ + method, + protocol: 'https:', + hostname: url.hostname, + path: url.pathname, + ...(Object.keys(query).length > 0 && { query }), + headers: { + 'Content-Type': 'application/json', + host: url.hostname, + }, + ...(body && { body }), + }); + + const credentials = getCredentialProvider() ?? defaultProvider(); + const service = 'bedrock-agentcore'; + const signer = new SignatureV4({ + service, + region, + credentials, + sha256: Sha256, + }); + + const signedReq = await signer.sign(request); + + const response = await fetch(`${endpoint}${path}`, { + method, + headers: signedReq.headers as Record, + ...(body && { body }), + }); + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error(`ABTest API error (${response.status}): ${errorBody}`); + } + + if (response.status === 204) return {}; + return response.json(); +} + +/** Data plane request — used for GetABTest (includes results/metrics). */ +async function dpRequest(options: { region: string; method: string; path: string; body?: string }): Promise { + return signedRequestToEndpoint(getDataPlaneEndpoint(options.region), options); +} + +// ============================================================================ +// Control Plane Operations (CRUD) +// ============================================================================ + +export async function createABTest(options: CreateABTestOptions): Promise { + const body = JSON.stringify({ + name: options.name, + clientToken: randomUUID(), + gatewayArn: options.gatewayArn, + roleArn: options.roleArn, + variants: options.variants, + evaluationConfig: options.evaluationConfig, + ...(options.description && { description: options.description }), + ...(options.gatewayFilter && { gatewayFilter: options.gatewayFilter }), + ...(options.trafficAllocationConfig && { trafficAllocationConfig: options.trafficAllocationConfig }), + ...(options.maxDurationDays !== undefined && { maxDurationDays: options.maxDurationDays }), + ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }), + }); + + const result = await dpRequest({ + region: options.region, + method: 'POST', + path: '/ab-tests', + body, + }); + + return result as CreateABTestResult; +} + +export async function getABTest(options: GetABTestOptions): Promise { + // Data plane includes results/metrics in the response + const data = await dpRequest({ + region: options.region, + method: 'GET', + path: `/ab-tests/${options.abTestId}`, + }); + + return data as GetABTestResult; +} + +export async function updateABTest(options: UpdateABTestOptions): Promise { + const body: Record = { clientToken: randomUUID() }; + if (options.name !== undefined) body.name = options.name; + if (options.description !== undefined) body.description = options.description; + if (options.variants !== undefined) body.variants = options.variants; + if (options.trafficAllocationConfig !== undefined) body.trafficAllocationConfig = options.trafficAllocationConfig; + if (options.evaluationConfig !== undefined) body.evaluationConfig = options.evaluationConfig; + if (options.maxDurationDays !== undefined) body.maxDurationDays = options.maxDurationDays; + if (options.executionStatus !== undefined) body.executionStatus = options.executionStatus; + if (options.roleArn !== undefined) body.roleArn = options.roleArn; + + const data = await dpRequest({ + region: options.region, + method: 'PUT', + path: `/ab-tests/${options.abTestId}`, + body: JSON.stringify(body), + }); + + return data as UpdateABTestResult; +} + +export async function deleteABTest(options: DeleteABTestOptions): Promise<{ success: boolean; error?: string }> { + try { + await dpRequest({ + region: options.region, + method: 'DELETE', + path: `/ab-tests/${options.abTestId}`, + }); + return { success: true }; + } catch (err) { + return { success: false, error: err instanceof Error ? err.message : String(err) }; + } +} + +export async function listABTests(options: ListABTestsOptions): Promise { + const params = new URLSearchParams(); + if (options.maxResults) params.set('maxResults', String(options.maxResults)); + if (options.nextToken) params.set('nextToken', options.nextToken); + const query = params.toString(); + + const data = await dpRequest({ + region: options.region, + method: 'GET', + path: `/ab-tests${query ? `?${query}` : ''}`, + }); + + const result = data as ListABTestsResult; + return { + abTests: result.abTests ?? [], + nextToken: result.nextToken, + }; +} diff --git a/src/cli/aws/agentcore-batch-evaluation.ts b/src/cli/aws/agentcore-batch-evaluation.ts new file mode 100644 index 000000000..9b0923753 --- /dev/null +++ b/src/cli/aws/agentcore-batch-evaluation.ts @@ -0,0 +1,411 @@ +/** + * AWS client wrappers for BatchEvaluation operations. + * + * The BatchEvaluation API is a flat, stateless model — no persistent "job" resource. + * Each batch evaluation is started, polled, and optionally stopped. + * + * Endpoints: + * POST /evaluations/batch-evaluate → StartBatchEvaluation + * GET /evaluations/batch-evaluate/{batchEvaluationId} → GetBatchEvaluation + * GET /evaluations/batch-evaluate → ListBatchEvaluations + * POST /evaluations/batch-evaluate/{batchEvaluationId}/stop → StopBatchEvaluation + * DELETE /evaluations/batch-evaluate/{batchEvaluationId} → DeleteBatchEvaluation + * + * Uses direct HTTP requests with SigV4 signing (service: bedrock-agentcore). + */ +import { getCredentialProvider } from './account'; +import { dnsSuffix } from './partition'; +import { Sha256 } from '@aws-crypto/sha256-js'; +import { defaultProvider } from '@aws-sdk/credential-provider-node'; +import { HttpRequest } from '@smithy/protocol-http'; +import { SignatureV4 } from '@smithy/signature-v4'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface SessionFilterConfig { + startTime?: string; + endTime?: string; +} + +export interface CloudWatchFilterConfig { + sessionIds?: string[]; + timeRange?: SessionFilterConfig; +} + +export interface CloudWatchLogsSource { + serviceNames: string[]; + logGroupNames: string[]; + filterConfig?: CloudWatchFilterConfig; +} + +export interface DataSourceConfig { + cloudWatchLogs?: CloudWatchLogsSource; + onlineEvaluationConfigSource?: Record; +} + +export interface Evaluator { + evaluatorId: string; +} + +export interface GroundTruthAssertion { + text: string; +} + +export interface GroundTruthTurnInput { + prompt: string; +} + +export interface GroundTruthTurnExpectedResponse { + text: string; +} + +export interface GroundTruthTurn { + input: GroundTruthTurnInput; + expectedResponse: GroundTruthTurnExpectedResponse; +} + +export interface ExpectedTrajectory { + toolNames: string[]; +} + +export interface InlineGroundTruth { + assertions?: GroundTruthAssertion[]; + expectedTrajectory?: ExpectedTrajectory; + turns?: GroundTruthTurn[]; +} + +export interface GroundTruth { + inline: InlineGroundTruth; +} + +export interface SessionMetadataEntry { + sessionId: string; + testScenarioId?: string; + groundTruth?: GroundTruth; + metadata?: Record; +} + +export interface EvaluationMetadata { + sessionMetadata?: SessionMetadataEntry[]; +} + +export interface StartBatchEvaluationOptions { + region: string; + name: string; + evaluators: Evaluator[]; + dataSourceConfig: DataSourceConfig; + evaluationMetadata?: EvaluationMetadata; + description?: string; + clientToken?: string; +} + +export interface StartBatchEvaluationResult { + batchEvaluationId: string; + batchEvaluationArn: string; + name: string; + status: string; + createdAt?: string; +} + +export interface GetBatchEvaluationOptions { + region: string; + batchEvaluationId: string; +} + +export interface CloudWatchOutputConfig { + logGroupName: string; + logStreamName: string; +} + +export interface OutputConfig { + cloudWatchConfig?: CloudWatchOutputConfig; +} + +export interface EvaluatorSummary { + evaluatorId: string; + statistics?: { + averageScore?: number; + averageTokenUsage?: { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + }; + }; + totalEvaluated?: number; + totalFailed?: number; +} + +export interface EvaluationResults { + evaluatorSummaries?: EvaluatorSummary[]; + numberOfSessionsCompleted?: number; + numberOfSessionsFailed?: number; + numberOfSessionsInProgress?: number; + totalNumberOfSessions?: number; + numberOfSessionsIgnored?: number; +} + +export interface GetBatchEvaluationResult { + batchEvaluationId: string; + batchEvaluationArn: string; + name: string; + status: string; + createdAt?: string; + updatedAt?: string; + evaluators?: Evaluator[]; + dataSourceConfig?: DataSourceConfig; + outputConfig?: OutputConfig; + evaluationResults?: EvaluationResults; + errorDetails?: string[]; + description?: string; +} + +export interface BatchEvaluationResultEntry { + evaluatorId: string; + score?: number; + label?: string; + explanation?: string; + error?: string; +} + +export interface ListBatchEvaluationsOptions { + region: string; + maxResults?: number; + nextToken?: string; +} + +export interface BatchEvaluationSummary { + batchEvaluationId: string; + batchEvaluationArn: string; + name: string; + status: string; + createdAt?: string; + description?: string; + evaluators?: Evaluator[]; + evaluationResults?: EvaluationResults; + errorDetails?: string[]; +} + +export interface ListBatchEvaluationsResult { + batchEvaluations: BatchEvaluationSummary[]; + nextToken?: string; +} + +export interface StopBatchEvaluationOptions { + region: string; + batchEvaluationId: string; +} + +export interface StopBatchEvaluationResult { + batchEvaluationId: string; + batchEvaluationArn: string; + status: string; + description?: string; +} + +export interface DeleteBatchEvaluationOptions { + region: string; + batchEvaluationId: string; +} + +export interface DeleteBatchEvaluationResult { + batchEvaluationId: string; + batchEvaluationArn: string; + status: string; +} + +// ============================================================================ +// HTTP signing helper +// ============================================================================ + +function getEndpoint(region: string): string { + const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); + if (stage === 'beta') return `https://beta.${region}.elcapdp.genesis-primitives.aws.dev`; + if (stage === 'gamma') return `https://gamma.${region}.elcapdp.genesis-primitives.aws.dev`; + return `https://bedrock-agentcore.${region}.${dnsSuffix(region)}`; +} + +async function signedRequest(options: { + region: string; + method: string; + path: string; + body?: string; +}): Promise<{ data: unknown; status: number }> { + const { region, method, path, body } = options; + const endpoint = getEndpoint(region); + const url = new URL(path, endpoint); + + const request = new HttpRequest({ + method, + protocol: 'https:', + hostname: url.hostname, + path: url.pathname + url.search, + headers: { + 'Content-Type': 'application/json', + host: url.hostname, + }, + ...(body && { body }), + }); + + const credentials = getCredentialProvider() ?? defaultProvider(); + const signer = new SignatureV4({ + service: 'bedrock-agentcore', + region, + credentials, + sha256: Sha256, + }); + + const signedReq = await signer.sign(request); + + const response = await fetch(`${endpoint}${url.pathname}${url.search}`, { + method, + headers: signedReq.headers as Record, + ...(body && { body }), + }); + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error(`BatchEvaluation API error (${response.status}): ${errorBody}`); + } + + if (response.status === 204) return { data: {}, status: 204 }; + return { data: await response.json(), status: response.status }; +} + +// ============================================================================ +// API Operations +// ============================================================================ + +/** + * Start a batch evaluation (async — returns immediately with an ID to poll). + */ +export async function startBatchEvaluation(options: StartBatchEvaluationOptions): Promise { + const body: Record = { + batchEvaluationName: options.name, + evaluators: options.evaluators, + dataSourceConfig: options.dataSourceConfig, + }; + if (options.evaluationMetadata) { + body.evaluationMetadata = options.evaluationMetadata; + } + if (options.description) { + body.description = options.description; + } + if (options.clientToken) { + body.clientToken = options.clientToken; + } + + const { data } = await signedRequest({ + region: options.region, + method: 'POST', + path: '/evaluations/batch-evaluate', + body: JSON.stringify(body), + }); + + const raw = data as Record; + return { + batchEvaluationId: (raw.batchEvaluationId ?? '') as string, + batchEvaluationArn: (raw.batchEvaluationArn ?? '') as string, + name: (raw.batchEvaluationName ?? '') as string, + status: (raw.status ?? '') as string, + createdAt: raw.createdAt as string | undefined, + }; +} + +/** + * Get status and results of a batch evaluation. + */ +export async function getBatchEvaluation(options: GetBatchEvaluationOptions): Promise { + const { data } = await signedRequest({ + region: options.region, + method: 'GET', + path: `/evaluations/batch-evaluate/${options.batchEvaluationId}`, + }); + + const raw = data as Record; + return { + batchEvaluationId: (raw.batchEvaluationId ?? '') as string, + batchEvaluationArn: (raw.batchEvaluationArn ?? '') as string, + name: (raw.batchEvaluationName ?? '') as string, + status: (raw.status ?? '') as string, + createdAt: raw.createdAt as string | undefined, + updatedAt: raw.updatedAt as string | undefined, + evaluators: raw.evaluators as Evaluator[] | undefined, + dataSourceConfig: raw.dataSourceConfig as DataSourceConfig | undefined, + outputConfig: raw.outputConfig as OutputConfig | undefined, + evaluationResults: raw.evaluationResults as EvaluationResults | undefined, + errorDetails: raw.errorDetails as string[] | undefined, + description: raw.description as string | undefined, + }; +} + +/** + * List batch evaluations. + */ +export async function listBatchEvaluations(options: ListBatchEvaluationsOptions): Promise { + const params = new URLSearchParams(); + if (options.maxResults) params.set('maxResults', String(options.maxResults)); + if (options.nextToken) params.set('nextToken', options.nextToken); + + const query = params.toString(); + const path = `/evaluations/batch-evaluate${query ? `?${query}` : ''}`; + + const { data } = await signedRequest({ + region: options.region, + method: 'GET', + path, + }); + + const result = data as ListBatchEvaluationsResult; + return { + batchEvaluations: result.batchEvaluations ?? [], + nextToken: result.nextToken, + }; +} + +/** + * Stop a running batch evaluation. + */ +export async function stopBatchEvaluation(options: StopBatchEvaluationOptions): Promise { + const { data } = await signedRequest({ + region: options.region, + method: 'POST', + path: `/evaluations/batch-evaluate/${options.batchEvaluationId}/stop`, + }); + + const raw = data as Record; + return { + batchEvaluationId: (raw.batchEvaluationId ?? '') as string, + batchEvaluationArn: (raw.batchEvaluationArn ?? '') as string, + status: (raw.status ?? '') as string, + description: raw.description as string | undefined, + }; +} + +/** + * Delete a batch evaluation. + */ +export async function deleteBatchEvaluation( + options: DeleteBatchEvaluationOptions +): Promise { + const { data } = await signedRequest({ + region: options.region, + method: 'DELETE', + path: `/evaluations/batch-evaluate/${options.batchEvaluationId}`, + }); + + const raw = data as Record; + return { + batchEvaluationId: (raw.batchEvaluationId ?? '') as string, + batchEvaluationArn: (raw.batchEvaluationArn ?? '') as string, + status: (raw.status ?? '') as string, + }; +} + +/** + * Generate a client token for idempotency. + */ +export function generateClientToken(): string { + return crypto.randomUUID(); +} diff --git a/src/cli/aws/agentcore-config-bundles.ts b/src/cli/aws/agentcore-config-bundles.ts new file mode 100644 index 000000000..d890d95df --- /dev/null +++ b/src/cli/aws/agentcore-config-bundles.ts @@ -0,0 +1,368 @@ +/** + * AWS client wrappers for Configuration Bundle control plane operations. + * + * NOTE: The ConfigurationBundle API is not yet available in the + * @aws-sdk/client-bedrock-agentcore-control SDK. These wrappers use + * direct HTTP requests with SigV4 signing as an interim solution. + * When the SDK adds ConfigurationBundle commands, migrate to the SDK client. + */ +import { getCredentialProvider } from './account'; +import { dnsSuffix } from './partition'; +import { Sha256 } from '@aws-crypto/sha256-js'; +import { defaultProvider } from '@aws-sdk/credential-provider-node'; +import { HttpRequest } from '@smithy/protocol-http'; +import { SignatureV4 } from '@smithy/signature-v4'; +import { randomUUID } from 'node:crypto'; + +// ============================================================================ +// Types +// ============================================================================ + +/** Freeform configuration for a component within a bundle. */ +export interface ComponentConfiguration { + configuration: Record; +} + +/** Map of component identifier (ARN) to its configuration. */ +export type ComponentConfigurationMap = Record; + +/** Version lineage metadata for git-like versioning. */ +export interface VersionLineageMetadata { + parentVersionIds?: string[]; + branchName?: string; + createdBy?: { name: string; arn?: string }; + commitMessage?: string; +} + +// ── Create ────────────────────────────────────────────────────────────────── + +export interface CreateConfigurationBundleOptions { + region: string; + bundleName: string; + description?: string; + components: ComponentConfigurationMap; + branchName?: string; + commitMessage?: string; + createdBy?: { name: string; arn?: string }; +} + +export interface CreateConfigurationBundleResult { + bundleArn: string; + bundleId: string; + versionId: string; + createdAt: string; +} + +// ── Get ───────────────────────────────────────────────────────────────────── + +export interface GetConfigurationBundleOptions { + region: string; + bundleId: string; + branchName?: string; +} + +export interface GetConfigurationBundleResult { + bundleArn: string; + bundleId: string; + bundleName: string; + description?: string; + versionId: string; + components: ComponentConfigurationMap; + lineageMetadata?: VersionLineageMetadata; + createdAt: string; + updatedAt: string; +} + +// ── Update ────────────────────────────────────────────────────────────────── + +export interface UpdateConfigurationBundleOptions { + region: string; + bundleId: string; + bundleName?: string; + description?: string; + components?: ComponentConfigurationMap; + parentVersionIds?: string[]; + branchName?: string; + commitMessage?: string; + createdBy?: { name: string; arn?: string }; +} + +export interface UpdateConfigurationBundleResult { + bundleArn: string; + bundleId: string; + versionId: string; + updatedAt: string; +} + +// ── Delete ────────────────────────────────────────────────────────────────── + +export interface DeleteConfigurationBundleOptions { + region: string; + bundleId: string; +} + +// ── List ──────────────────────────────────────────────────────────────────── + +export interface ListConfigurationBundlesOptions { + region: string; + maxResults?: number; + nextToken?: string; +} + +export interface ConfigurationBundleSummary { + bundleArn: string; + bundleId: string; + bundleName: string; + description?: string; +} + +export interface ListConfigurationBundlesResult { + bundles: ConfigurationBundleSummary[]; + nextToken?: string; +} + +// ── Get Version ───────────────────────────────────────────────────────────── + +export interface GetConfigurationBundleVersionOptions { + region: string; + bundleId: string; + versionId: string; +} + +export interface GetConfigurationBundleVersionResult { + bundleArn: string; + bundleId: string; + bundleName: string; + description?: string; + versionId: string; + components: ComponentConfigurationMap; + lineageMetadata?: VersionLineageMetadata; + createdAt: string; + versionCreatedAt: string; +} + +// ── List Versions ─────────────────────────────────────────────────────────── + +export interface ListConfigurationBundleVersionsFilter { + branchName?: string; + latestPerBranch?: boolean; + createdByName?: string; +} + +export interface ListConfigurationBundleVersionsOptions { + region: string; + bundleId: string; + maxResults?: number; + nextToken?: string; + filter?: ListConfigurationBundleVersionsFilter; +} + +export interface ConfigurationBundleVersionSummary { + bundleArn: string; + bundleId: string; + versionId: string; + lineageMetadata?: VersionLineageMetadata; + versionCreatedAt: string; +} + +export interface ListConfigurationBundleVersionsResult { + versions: ConfigurationBundleVersionSummary[]; + nextToken?: string; +} + +// ============================================================================ +// HTTP signing helper +// ============================================================================ + +// TODO: Remove beta/gamma endpoints before GA merge +function getControlPlaneEndpoint(region: string): string { + const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); + if (stage === 'beta') return `https://beta.${region}.elcapcp.genesis-primitives.aws.dev`; + if (stage === 'gamma') return `https://gamma.${region}.elcapcp.genesis-primitives.aws.dev`; + return `https://bedrock-agentcore-control.${region}.${dnsSuffix(region)}`; +} + +async function signedRequest(options: { + region: string; + method: string; + path: string; + body?: string; +}): Promise { + const { region, method, path, body } = options; + const endpoint = getControlPlaneEndpoint(region); + const url = new URL(path, endpoint); + + const query: Record = {}; + url.searchParams.forEach((value, key) => { + query[key] = value; + }); + + const request = new HttpRequest({ + method, + protocol: 'https:', + hostname: url.hostname, + path: url.pathname, + ...(Object.keys(query).length > 0 && { query }), + headers: { + 'Content-Type': 'application/json', + host: url.hostname, + }, + ...(body && { body }), + }); + + const credentials = getCredentialProvider() ?? defaultProvider(); + const service = 'bedrock-agentcore'; + const signer = new SignatureV4({ + service, + region, + credentials, + sha256: Sha256, + }); + + const signedReq = await signer.sign(request); + + const response = await fetch(`${endpoint}${path}`, { + method, + headers: signedReq.headers as Record, + ...(body && { body }), + }); + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error(`ConfigurationBundle API error (${response.status}): ${errorBody}`); + } + + if (response.status === 204) return {}; + return response.json(); +} + +// ============================================================================ +// Control Plane Operations +// ============================================================================ + +export async function createConfigurationBundle( + options: CreateConfigurationBundleOptions +): Promise { + const body = JSON.stringify({ + bundleName: options.bundleName, + clientToken: randomUUID(), + ...(options.description && { description: options.description }), + components: options.components, + ...(options.branchName && { branchName: options.branchName }), + ...(options.commitMessage && { commitMessage: options.commitMessage }), + ...(options.createdBy && { createdBy: options.createdBy }), + }); + + const result = await signedRequest({ + region: options.region, + method: 'POST', + path: '/configuration-bundles/create', + body, + }); + + return result as CreateConfigurationBundleResult; +} + +export async function getConfigurationBundle( + options: GetConfigurationBundleOptions +): Promise { + const params = new URLSearchParams(); + if (options.branchName) params.set('branchName', options.branchName); + const query = params.toString(); + const path = `/configuration-bundles/${options.bundleId}${query ? `?${query}` : ''}`; + + const data = await signedRequest({ + region: options.region, + method: 'GET', + path, + }); + + return data as GetConfigurationBundleResult; +} + +export async function updateConfigurationBundle( + options: UpdateConfigurationBundleOptions +): Promise { + const body: Record = { clientToken: randomUUID() }; + if (options.bundleName !== undefined) body.bundleName = options.bundleName; + if (options.description !== undefined) body.description = options.description; + if (options.components !== undefined) body.components = options.components; + if (options.parentVersionIds !== undefined) body.parentVersionIds = options.parentVersionIds; + if (options.branchName !== undefined) body.branchName = options.branchName; + if (options.commitMessage !== undefined) body.commitMessage = options.commitMessage; + if (options.createdBy !== undefined) body.createdBy = options.createdBy; + + const data = await signedRequest({ + region: options.region, + method: 'PUT', + path: `/configuration-bundles/${options.bundleId}`, + body: JSON.stringify(body), + }); + + return data as UpdateConfigurationBundleResult; +} + +export async function deleteConfigurationBundle(options: DeleteConfigurationBundleOptions): Promise { + await signedRequest({ + region: options.region, + method: 'DELETE', + path: `/configuration-bundles/${options.bundleId}`, + }); +} + +export async function listConfigurationBundles( + options: ListConfigurationBundlesOptions +): Promise { + const params = new URLSearchParams(); + if (options.maxResults) params.set('maxResults', String(options.maxResults)); + if (options.nextToken) params.set('nextToken', options.nextToken); + const query = params.toString(); + + const data = await signedRequest({ + region: options.region, + method: 'POST', + path: `/configuration-bundles${query ? `?${query}` : ''}`, + }); + + const result = data as ListConfigurationBundlesResult; + return { + bundles: result.bundles ?? [], + nextToken: result.nextToken, + }; +} + +export async function getConfigurationBundleVersion( + options: GetConfigurationBundleVersionOptions +): Promise { + const data = await signedRequest({ + region: options.region, + method: 'GET', + path: `/configuration-bundles/${options.bundleId}/versions/${options.versionId}`, + }); + + return data as GetConfigurationBundleVersionResult; +} + +export async function listConfigurationBundleVersions( + options: ListConfigurationBundleVersionsOptions +): Promise { + const params = new URLSearchParams(); + if (options.maxResults) params.set('maxResults', String(options.maxResults)); + if (options.nextToken) params.set('nextToken', options.nextToken); + const query = params.toString(); + + const body = options.filter ? JSON.stringify({ filter: options.filter }) : undefined; + + const data = await signedRequest({ + region: options.region, + method: 'POST', + path: `/configuration-bundles/${options.bundleId}/versions${query ? `?${query}` : ''}`, + body, + }); + + const result = data as ListConfigurationBundleVersionsResult; + return { + versions: result.versions ?? [], + nextToken: result.nextToken, + }; +} diff --git a/src/cli/aws/agentcore-http-gateways.ts b/src/cli/aws/agentcore-http-gateways.ts new file mode 100644 index 000000000..674f090a0 --- /dev/null +++ b/src/cli/aws/agentcore-http-gateways.ts @@ -0,0 +1,519 @@ +/** + * AWS client wrappers for HTTP Gateway control plane operations. + * + * HTTP gateways are required for A/B testing because MCP gateways + * don't emit spans for treatment propagation. These wrappers use + * direct HTTP requests with SigV4 signing against the control plane. + */ +import { getCredentialProvider } from './account'; +import { dnsSuffix } from './partition'; +import { Sha256 } from '@aws-crypto/sha256-js'; +import { defaultProvider } from '@aws-sdk/credential-provider-node'; +import { HttpRequest } from '@smithy/protocol-http'; +import { SignatureV4 } from '@smithy/signature-v4'; +import { randomUUID } from 'node:crypto'; + +// ============================================================================ +// Types +// ============================================================================ + +// ── Create Gateway ───────────────────────────────────────────────────────── + +export interface CreateHttpGatewayOptions { + region: string; + name: string; + roleArn: string; +} + +export interface CreateHttpGatewayResult { + gatewayId: string; + gatewayArn: string; + name: string; + status: string; +} + +// ── Create Gateway Target ────────────────────────────────────────────────── + +export interface CreateHttpGatewayTargetOptions { + region: string; + gatewayId: string; + targetName: string; + runtimeArn: string; + qualifier?: string; +} + +export interface CreateHttpGatewayTargetResult { + targetId: string; + name: string; + status: string; +} + +// ── Get Gateway ──────────────────────────────────────────────────────────── + +export interface GetHttpGatewayOptions { + region: string; + gatewayId: string; +} + +export interface GetHttpGatewayResult { + gatewayId: string; + gatewayArn: string; + gatewayUrl?: string; + name: string; + status: string; + authorizerType?: string; + roleArn?: string; + createdAt?: string; + updatedAt?: string; +} + +// ── Get Gateway Target ───────────────────────────────────────────────────── + +export interface GetHttpGatewayTargetOptions { + region: string; + gatewayId: string; + targetId: string; +} + +export interface GetHttpGatewayTargetResult { + targetId: string; + name: string; + status: string; + targetConfiguration?: unknown; + createdAt?: string; + updatedAt?: string; +} + +// ── List Gateways ────────────────────────────────────────────────────────── + +export interface ListHttpGatewaysOptions { + region: string; + maxResults?: number; + nextToken?: string; +} + +export interface HttpGatewaySummary { + gatewayId: string; + gatewayArn: string; + name: string; + status: string; +} + +export interface ListHttpGatewaysResult { + gateways: HttpGatewaySummary[]; + nextToken?: string; +} + +// ── List Gateway Targets ────────────────────────────────────────────────── + +export interface ListHttpGatewayTargetsOptions { + region: string; + gatewayId: string; + maxResults?: number; +} + +export interface HttpGatewayTargetSummary { + targetId: string; + name: string; + status: string; +} + +export interface ListHttpGatewayTargetsResult { + targets: HttpGatewayTargetSummary[]; +} + +// ── Delete Gateway Target ────────────────────────────────────────────────── + +export interface DeleteHttpGatewayTargetOptions { + region: string; + gatewayId: string; + targetId: string; +} + +// ── Delete Gateway ───────────────────────────────────────────────────────── + +export interface DeleteHttpGatewayOptions { + region: string; + gatewayId: string; +} + +// ── Wait for Target Ready ────────────────────────────────────────────────── + +export interface WaitForTargetReadyOptions { + region: string; + gatewayId: string; + targetId: string; + /** Maximum time to wait in milliseconds. Defaults to 120000 (120s). */ + timeoutMs?: number; +} + +// ============================================================================ +// HTTP signing helper +// ============================================================================ + +function getControlPlaneEndpoint(region: string): string { + const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); + if (stage === 'beta') return `https://beta.${region}.elcapcp.genesis-primitives.aws.dev`; + if (stage === 'gamma') return `https://gamma.${region}.elcapcp.genesis-primitives.aws.dev`; + return `https://bedrock-agentcore-control.${region}.${dnsSuffix(region)}`; +} + +async function signedRequest(options: { + region: string; + method: string; + path: string; + body?: string; +}): Promise { + const { region, method, path, body } = options; + const endpoint = getControlPlaneEndpoint(region); + const url = new URL(path, endpoint); + + const query: Record = {}; + url.searchParams.forEach((value, key) => { + query[key] = value; + }); + + const request = new HttpRequest({ + method, + protocol: 'https:', + hostname: url.hostname, + path: url.pathname, + ...(Object.keys(query).length > 0 && { query }), + headers: { + 'Content-Type': 'application/json', + host: url.hostname, + }, + ...(body && { body }), + }); + + const credentials = getCredentialProvider() ?? defaultProvider(); + const service = 'bedrock-agentcore'; + const signer = new SignatureV4({ + service, + region, + credentials, + sha256: Sha256, + }); + + const signedReq = await signer.sign(request); + + const response = await fetch(`${endpoint}${path}`, { + method, + headers: signedReq.headers as Record, + ...(body && { body }), + }); + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error(`HttpGateway API error (${response.status}): ${errorBody}`); + } + + if (response.status === 204) return {}; + return response.json(); +} + +// ============================================================================ +// Control Plane Operations +// ============================================================================ + +export async function createHttpGateway(options: CreateHttpGatewayOptions): Promise { + const body = JSON.stringify({ + name: options.name, + authorizerType: 'AWS_IAM', + roleArn: options.roleArn, + clientToken: randomUUID(), + }); + + try { + return (await signedRequest({ + region: options.region, + method: 'POST', + path: '/gateways', + body, + })) as CreateHttpGatewayResult; + } catch (err) { + throw new Error( + `Failed to create HTTP gateway "${options.name}": ${err instanceof Error ? err.message : String(err)}` + ); + } +} + +export async function createHttpGatewayTarget( + options: CreateHttpGatewayTargetOptions +): Promise { + const body = JSON.stringify({ + name: options.targetName, + clientToken: randomUUID(), + targetConfiguration: { + http: { + agentcoreRuntime: { + arn: options.runtimeArn, + qualifier: options.qualifier ?? 'DEFAULT', + }, + }, + }, + credentialProviderConfigurations: [{ credentialProviderType: 'GATEWAY_IAM_ROLE' }], + }); + + try { + return (await signedRequest({ + region: options.region, + method: 'POST', + path: `/gateways/${options.gatewayId}/targets`, + body, + })) as CreateHttpGatewayTargetResult; + } catch (err) { + // Fallback: retry with legacy field name if the new name is not yet supported + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('ValidationException') || msg.includes('400')) { + const legacyBody = JSON.stringify({ + name: options.targetName, + clientToken: randomUUID(), + targetConfiguration: { + http: { + runtimeTargetConfiguration: { + arn: options.runtimeArn, + qualifier: options.qualifier ?? 'DEFAULT', + }, + }, + }, + credentialProviderConfigurations: [{ credentialProviderType: 'GATEWAY_IAM_ROLE' }], + }); + try { + return (await signedRequest({ + region: options.region, + method: 'POST', + path: `/gateways/${options.gatewayId}/targets`, + body: legacyBody, + })) as CreateHttpGatewayTargetResult; + } catch { + // Fall through to original error + } + } + throw new Error(`Failed to create target "${options.targetName}" in gateway ${options.gatewayId}: ${msg}`); + } +} + +export async function getHttpGateway(options: GetHttpGatewayOptions): Promise { + const data = await signedRequest({ + region: options.region, + method: 'GET', + path: `/gateways/${options.gatewayId}`, + }); + + return data as GetHttpGatewayResult; +} + +export async function getHttpGatewayTarget(options: GetHttpGatewayTargetOptions): Promise { + const data = await signedRequest({ + region: options.region, + method: 'GET', + path: `/gateways/${options.gatewayId}/targets/${options.targetId}`, + }); + + return data as GetHttpGatewayTargetResult; +} + +export async function listHttpGateways(options: ListHttpGatewaysOptions): Promise { + const params = new URLSearchParams(); + if (options.maxResults) params.set('maxResults', String(options.maxResults)); + if (options.nextToken) params.set('nextToken', options.nextToken); + const query = params.toString(); + + const data = await signedRequest({ + region: options.region, + method: 'GET', + path: `/gateways${query ? `?${query}` : ''}`, + }); + + const result = data as ListHttpGatewaysResult; + return { + gateways: result.gateways ?? [], + nextToken: result.nextToken, + }; +} + +/** + * List all HTTP gateways, paginating through all results. + */ +export async function listAllHttpGateways(options: { region: string }): Promise { + const all: HttpGatewaySummary[] = []; + let nextToken: string | undefined; + + do { + const result = await listHttpGateways({ region: options.region, maxResults: 100, nextToken }); + all.push(...result.gateways); + nextToken = result.nextToken; + } while (nextToken); + + return all; +} + +export async function listHttpGatewayTargets( + options: ListHttpGatewayTargetsOptions +): Promise { + const params = new URLSearchParams(); + if (options.maxResults) params.set('maxResults', String(options.maxResults)); + const query = params.toString(); + + const data = await signedRequest({ + region: options.region, + method: 'GET', + path: `/gateways/${options.gatewayId}/targets${query ? `?${query}` : ''}`, + }); + + const result = data as Record; + return { + targets: (result.items ?? result.targets ?? []) as HttpGatewayTargetSummary[], + }; +} + +export async function deleteHttpGatewayTarget( + options: DeleteHttpGatewayTargetOptions +): Promise<{ success: boolean; error?: string }> { + try { + await signedRequest({ + region: options.region, + method: 'DELETE', + path: `/gateways/${options.gatewayId}/targets/${options.targetId}`, + }); + + // Wait for target to be fully deleted before returning. + // Gateway deletion fails if targets still exist in DELETING state. + const timeoutMs = 60_000; + const startTime = Date.now(); + let delayMs = 2_000; + + while (Date.now() - startTime < timeoutMs) { + try { + await getHttpGatewayTarget({ + region: options.region, + gatewayId: options.gatewayId, + targetId: options.targetId, + }); + // Target still exists — keep waiting + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('(404)') || msg.includes('not found')) { + return { success: true }; // Target confirmed deleted + } + // Transient error — keep polling + } + + const remaining = timeoutMs - (Date.now() - startTime); + if (remaining <= 0) break; + await new Promise(resolve => setTimeout(resolve, Math.min(delayMs, remaining))); + delayMs = Math.min(delayMs * 2, 8_000); + } + + // Polling timed out — target may still be deleting + return { success: false, error: `Timed out waiting for target ${options.targetId} to be fully deleted` }; + } catch (err) { + return { success: false, error: err instanceof Error ? err.message : String(err) }; + } +} + +export async function deleteHttpGateway( + options: DeleteHttpGatewayOptions +): Promise<{ success: boolean; error?: string }> { + try { + await signedRequest({ + region: options.region, + method: 'DELETE', + path: `/gateways/${options.gatewayId}`, + }); + return { success: true }; + } catch (err) { + return { success: false, error: err instanceof Error ? err.message : String(err) }; + } +} + +/** Terminal states that indicate a resource will never become READY. */ +const TERMINAL_FAILURE_STATES = ['FAILED', 'CREATE_FAILED', 'UPDATE_FAILED', 'DELETING', 'DELETED'] as const; + +export async function waitForGatewayReady(options: { + region: string; + gatewayId: string; + timeoutMs?: number; +}): Promise { + const timeoutMs = options.timeoutMs ?? 120_000; + const startTime = Date.now(); + let delayMs = 2_000; + + while (Date.now() - startTime < timeoutMs) { + const gateway = await getHttpGateway({ + region: options.region, + gatewayId: options.gatewayId, + }); + + if (gateway.status === 'READY') return gateway; + + if ((TERMINAL_FAILURE_STATES as readonly string[]).includes(gateway.status)) { + throw new Error( + `Gateway ${options.gatewayId} reached terminal state '${gateway.status}' and will not become READY` + ); + } + + const remaining = timeoutMs - (Date.now() - startTime); + if (remaining <= 0) break; + + await new Promise(resolve => setTimeout(resolve, Math.min(delayMs, remaining))); + delayMs = Math.min(delayMs * 2, 16_000); + } + + throw new Error( + `Timed out waiting for gateway ${options.gatewayId} to become READY after ${Math.round(timeoutMs / 1000)}s` + ); +} + +export async function waitForTargetReady(options: WaitForTargetReadyOptions): Promise { + const timeoutMs = options.timeoutMs ?? 120_000; + const startTime = Date.now(); + let delayMs = 2_000; + + while (Date.now() - startTime < timeoutMs) { + let target; + try { + target = await getHttpGatewayTarget({ + region: options.region, + gatewayId: options.gatewayId, + targetId: options.targetId, + }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('(404)')) { + throw new Error( + `Target ${options.targetId} not found during readiness poll — it may have been deleted externally` + ); + } + // Retry on transient server errors + if (/\(5\d\d\)/.test(msg)) { + // Continue polling — transient error + const remaining = timeoutMs - (Date.now() - startTime); + if (remaining <= 0) break; + await new Promise(resolve => setTimeout(resolve, delayMs)); + delayMs = Math.min(delayMs * 2, 16_000); + continue; + } + throw err; + } + + if (target.status === 'READY') return target; + + if ((TERMINAL_FAILURE_STATES as readonly string[]).includes(target.status)) { + throw new Error( + `Target ${options.targetId} in gateway ${options.gatewayId} reached terminal state '${target.status}' and will not become READY` + ); + } + + const remaining = timeoutMs - (Date.now() - startTime); + if (remaining <= 0) break; + + await new Promise(resolve => setTimeout(resolve, Math.min(delayMs, remaining))); + delayMs = Math.min(delayMs * 2, 16_000); + } + + throw new Error( + `Timed out waiting for target ${options.targetId} to become READY after ${Math.round(timeoutMs / 1000)}s` + ); +} diff --git a/src/cli/aws/agentcore-recommendation.ts b/src/cli/aws/agentcore-recommendation.ts new file mode 100644 index 000000000..55242fdcd --- /dev/null +++ b/src/cli/aws/agentcore-recommendation.ts @@ -0,0 +1,371 @@ +/** + * AWS client wrappers for Recommendation API operations. + * + * NOTE: The Recommendation API is not yet available in the AWS SDK. + * These wrappers use direct HTTP requests with SigV4 signing as an + * interim solution. When the SDK adds Recommendation commands, migrate + * to the SDK client. + * + * TEMPORARY: All Recommendation endpoints are on the Data Plane (DP), + * not the Control Plane. This is the current API shape as of 2026-03-30. + * The API may move to CP in the future — update endpoints accordingly. + * + * Recommendations are one-shot, immutable resources. There is no Update + * operation and no runs sub-resource. You start a recommendation with + * StartRecommendation, poll via GetRecommendation, and stop via + * DeleteRecommendation (stop-via-delete pattern). + */ +import { getCredentialProvider } from './account'; +import { dnsSuffix } from './partition'; +import { Sha256 } from '@aws-crypto/sha256-js'; +import { defaultProvider } from '@aws-sdk/credential-provider-node'; +import { HttpRequest } from '@smithy/protocol-http'; +import { SignatureV4 } from '@smithy/signature-v4'; + +// ============================================================================ +// Types — Recommendation Type Enum +// ============================================================================ + +export type RecommendationType = 'SYSTEM_PROMPT_RECOMMENDATION' | 'TOOL_DESCRIPTION_RECOMMENDATION'; + +// ============================================================================ +// Types — Input Config (tag-union per type) +// ============================================================================ + +/** System prompt source — either inline text or a ConfigBundle reference. */ +export interface SystemPromptSource { + text?: string; + configurationBundle?: { + bundleArn: string; + versionId?: string; + systemPromptJsonPath?: string; + }; +} + +/** A single OTEL-style span for inline session traces. */ +export interface SessionSpan { + scope?: { name: string }; + body?: { + input?: { messages?: { content: unknown; role: string }[] }; + output?: { messages?: { content: unknown; role: string }[] }; + }; + attributes?: Record; + traceId: string; + spanId: string; +} + +/** Agent trace source — inline spans or CloudWatch Logs. */ +export interface AgentTracesSource { + sessionSpans?: SessionSpan[]; + cloudwatchLogs?: { + logGroupArns: string[]; + serviceNames: string[]; + startTime: string; + endTime: string; + limit?: number; + sessionIds?: string[]; + }; +} + +/** Evaluation config — exactly one evaluator as objective signal (API constraint: min 1, max 1). */ +export interface RecommendationEvaluationConfig { + evaluators: [{ evaluatorArn: string }]; +} + +/** Config for SYSTEM_PROMPT_RECOMMENDATION type. */ +export interface SystemPromptRecommendationConfig { + systemPrompt: SystemPromptSource; + agentTraces: AgentTracesSource; + evaluationConfig: RecommendationEvaluationConfig; +} + +/** Config for TOOL_DESCRIPTION_RECOMMENDATION type. */ +export interface ToolDescriptionRecommendationConfig { + toolDescription: { + toolDescriptionText?: { + tools: { toolName: string; toolDescription: { text: string } }[]; + }; + configurationBundle?: { + bundleArn: string; + versionId?: string; + tools: { toolName: string; toolDescriptionJsonPath: string }[]; + }; + }; + agentTraces: AgentTracesSource; +} + +/** Tag-union recommendation config — only populate the member matching the type. */ +export interface RecommendationConfig { + systemPromptRecommendationConfig?: SystemPromptRecommendationConfig; + toolDescriptionRecommendationConfig?: ToolDescriptionRecommendationConfig; +} + +// ============================================================================ +// Types — Result (tag-union per type) +// ============================================================================ + +export interface RecommendationResultConfigurationBundle { + bundleArn: string; + versionId: string; +} + +export interface SystemPromptRecommendationResult { + recommendedSystemPrompt?: string; + configurationBundle?: RecommendationResultConfigurationBundle; + errorCode?: string; + errorMessage?: string; +} + +export interface ToolDescriptionRecommendationToolResult { + toolName: string; + recommendedToolDescription: string; +} + +export interface ToolDescriptionRecommendationResult { + tools?: ToolDescriptionRecommendationToolResult[]; + configurationBundle?: RecommendationResultConfigurationBundle; + errorCode?: string; + errorMessage?: string; +} + +export interface RecommendationResult { + systemPromptRecommendationResult?: SystemPromptRecommendationResult; + toolDescriptionRecommendationResult?: ToolDescriptionRecommendationResult; +} + +// ============================================================================ +// Types — API Options & Results +// ============================================================================ + +export interface StartRecommendationOptions { + region: string; + name: string; + description?: string; + type: RecommendationType; + recommendationConfig: RecommendationConfig; + kmsKeyArn?: string; + clientToken?: string; +} + +export interface StartRecommendationResult { + recommendationId: string; + recommendationArn: string; + name: string; + type: string; + status: string; + createdAt?: string; + updatedAt?: string; + requestId?: string; +} + +export interface GetRecommendationOptions { + region: string; + recommendationId: string; +} + +export interface GetRecommendationResult { + recommendationId: string; + recommendationArn: string; + name: string; + description?: string; + type: string; + recommendationConfig?: RecommendationConfig; + status: string; + statusReasons?: string[]; + createdAt?: string; + updatedAt?: string; + completedAt?: string; + recommendationResult?: RecommendationResult; + requestId?: string; +} + +export interface ListRecommendationsOptions { + region: string; + status?: string; + maxResults?: number; + nextToken?: string; +} + +export interface RecommendationSummary { + recommendationId: string; + recommendationArn: string; + name: string; + description?: string; + type: string; + status: string; + createdAt?: string; + updatedAt?: string; +} + +export interface ListRecommendationsResult { + recommendationSummaries: RecommendationSummary[]; + nextToken?: string; +} + +export interface DeleteRecommendationOptions { + region: string; + recommendationId: string; +} + +export interface DeleteRecommendationResult { + recommendationId: string; + status: string; +} + +// ============================================================================ +// HTTP signing helper +// ============================================================================ + +/** + * Resolve the DP endpoint for the Recommendation API. + * + * TEMPORARY: All Recommendation endpoints are on the Data Plane. + * Set AGENTCORE_STAGE=beta|gamma to target pre-release environments. + */ +function getDataPlaneEndpoint(region: string): string { + const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); + if (stage === 'beta') return `https://beta.${region}.elcapdp.genesis-primitives.aws.dev`; + if (stage === 'gamma') return `https://gamma.${region}.elcapdp.genesis-primitives.aws.dev`; + return `https://bedrock-agentcore.${region}.${dnsSuffix(region)}`; +} + +async function signedRequest(options: { + region: string; + method: string; + path: string; + body?: string; +}): Promise<{ data: unknown; status: number; requestId?: string }> { + const { region, method, path, body } = options; + const endpoint = getDataPlaneEndpoint(region); + const url = new URL(path, endpoint); + + const query: Record = {}; + url.searchParams.forEach((value, key) => { + query[key] = value; + }); + + const request = new HttpRequest({ + method, + protocol: 'https:', + hostname: url.hostname, + path: url.pathname, + ...(Object.keys(query).length > 0 && { query }), + headers: { + 'Content-Type': 'application/json', + host: url.hostname, + }, + ...(body && { body }), + }); + + const credentials = getCredentialProvider() ?? defaultProvider(); + const signer = new SignatureV4({ + service: 'bedrock-agentcore', + region, + credentials, + sha256: Sha256, + }); + + const signedReq = await signer.sign(request); + + const response = await fetch(`${endpoint}${path}`, { + method, + headers: signedReq.headers as Record, + ...(body && { body }), + }); + + const requestId = response.headers.get('x-amzn-requestid') ?? 'unknown'; + + if (!response.ok) { + const errorBody = await response.text(); + throw new Error(`Recommendation API error (${response.status}): ${errorBody} [requestId: ${requestId}]`); + } + + if (response.status === 204) return { data: {}, status: 204, requestId }; + return { data: await response.json(), status: response.status, requestId }; +} + +// ============================================================================ +// API Operations +// ============================================================================ + +/** + * Start a new recommendation (async — returns 202). + * Creates an ARN-able resource that progresses through: + * PENDING → IN_PROGRESS → COMPLETED | FAILED + */ +export async function startRecommendation(options: StartRecommendationOptions): Promise { + const body = JSON.stringify({ + name: options.name, + ...(options.description && { description: options.description }), + type: options.type, + recommendationConfig: options.recommendationConfig, + ...(options.kmsKeyArn && { kmsKeyArn: options.kmsKeyArn }), + ...(options.clientToken && { clientToken: options.clientToken }), + }); + + const { data, requestId } = await signedRequest({ + region: options.region, + method: 'POST', + path: '/recommendations', + body, + }); + + const result = data as StartRecommendationResult; + if (requestId) result.requestId = requestId; + return result; +} + +/** + * Get recommendation status and results. + * When status is COMPLETED, recommendationResult contains the optimized artifact. + */ +export async function getRecommendation(options: GetRecommendationOptions): Promise { + const { data, requestId } = await signedRequest({ + region: options.region, + method: 'GET', + path: `/recommendations/${options.recommendationId}`, + }); + + const result = data as GetRecommendationResult; + if (requestId) result.requestId = requestId; + return result; +} + +/** + * List recommendations with optional filtering and pagination. + */ +export async function listRecommendations(options: ListRecommendationsOptions): Promise { + const params = new URLSearchParams(); + if (options.status) params.set('status', options.status); + if (options.maxResults) params.set('maxResults', String(options.maxResults)); + if (options.nextToken) params.set('nextToken', options.nextToken); + + const query = params.toString(); + const path = `/recommendations${query ? `?${query}` : ''}`; + + const { data } = await signedRequest({ + region: options.region, + method: 'GET', + path, + }); + + const result = data as ListRecommendationsResult; + return { + recommendationSummaries: result.recommendationSummaries ?? [], + nextToken: result.nextToken, + }; +} + +/** + * Delete a recommendation. Also stops in-progress recommendations + * (stop-via-delete pattern — no separate Stop API). + */ +export async function deleteRecommendation(options: DeleteRecommendationOptions): Promise { + const { data } = await signedRequest({ + region: options.region, + method: 'DELETE', + path: `/recommendations/${options.recommendationId}`, + }); + + return data as DeleteRecommendationResult; +} diff --git a/src/cli/aws/agentcore.ts b/src/cli/aws/agentcore.ts index 55085c23e..4c50a0330 100644 --- a/src/cli/aws/agentcore.ts +++ b/src/cli/aws/agentcore.ts @@ -6,7 +6,6 @@ import { serviceEndpoint } from './partition'; import { BedrockAgentCoreClient, EvaluateCommand, - type EvaluationReferenceInput, InvokeAgentRuntimeCommand, InvokeAgentRuntimeCommandCommand, StopRuntimeSessionCommand, @@ -14,6 +13,14 @@ import { import type { HttpRequest } from '@smithy/protocol-http'; import type { DocumentType } from '@smithy/types'; +/** Local definition — SDK does not yet export this type. */ +export interface EvaluationReferenceInput { + context: { spanContext: { sessionId: string; traceId?: string } }; + expectedTrajectory?: { toolNames: string[] }; + assertions?: { text: string }[]; + expectedResponse?: { text: string }; +} + /** * Create a BedrockAgentCoreClient with optional custom header injection middleware. */ @@ -70,6 +77,8 @@ export interface InvokeAgentRuntimeOptions { headers?: Record; /** Bearer token for CUSTOM_JWT auth. When provided, uses raw HTTP with Authorization header instead of SigV4. */ bearerToken?: string; + /** W3C baggage header value (e.g. config bundle ref for runtime) */ + baggage?: string; } export interface InvokeAgentRuntimeResult { @@ -162,20 +171,40 @@ function buildInvokeUrl(region: string, runtimeArn: string): string { } /** - * Invoke an AgentCore Runtime using bearer token auth (raw HTTP, no SigV4). - * Used when the runtime has CUSTOM_JWT authorizer configured. + * Build headers for bearer-token invoke requests. + * Shared by both streaming and non-streaming invoke paths. */ -async function invokeWithBearerTokenStreaming(options: InvokeAgentRuntimeOptions): Promise { - const url = buildInvokeUrl(options.region, options.runtimeArn); +export function buildBearerInvokeHeaders( + options: Pick, + accept: string +): Record { const headers: Record = { Authorization: `Bearer ${options.bearerToken}`, 'Content-Type': 'application/json', - Accept: 'application/json, text/event-stream', + Accept: accept, }; if (options.sessionId) { headers['X-Amzn-Bedrock-AgentCore-Runtime-Session-Id'] = options.sessionId; } headers['X-Amzn-Bedrock-AgentCore-Runtime-User-Id'] = options.userId ?? DEFAULT_RUNTIME_USER_ID; + if (options.baggage) { + headers.baggage = options.baggage; + } + if (options.headers) { + for (const [name, value] of Object.entries(options.headers)) { + headers[name] = value; + } + } + return headers; +} + +/** + * Invoke an AgentCore Runtime using bearer token auth (raw HTTP, no SigV4). + * Used when the runtime has CUSTOM_JWT authorizer configured. + */ +async function invokeWithBearerTokenStreaming(options: InvokeAgentRuntimeOptions): Promise { + const url = buildInvokeUrl(options.region, options.runtimeArn); + const headers = buildBearerInvokeHeaders(options, 'application/json, text/event-stream'); const res = await fetch(url, { method: 'POST', @@ -261,15 +290,7 @@ async function invokeWithBearerTokenStreaming(options: InvokeAgentRuntimeOptions */ async function invokeWithBearerToken(options: InvokeAgentRuntimeOptions): Promise { const url = buildInvokeUrl(options.region, options.runtimeArn); - const headers: Record = { - Authorization: `Bearer ${options.bearerToken}`, - 'Content-Type': 'application/json', - Accept: 'application/json', - }; - if (options.sessionId) { - headers['X-Amzn-Bedrock-AgentCore-Runtime-Session-Id'] = options.sessionId; - } - headers['X-Amzn-Bedrock-AgentCore-Runtime-User-Id'] = options.userId ?? DEFAULT_RUNTIME_USER_ID; + const headers = buildBearerInvokeHeaders(options, 'application/json'); const res = await fetch(url, { method: 'POST', @@ -311,6 +332,7 @@ export async function invokeAgentRuntimeStreaming(options: InvokeAgentRuntimeOpt accept: 'application/json', runtimeSessionId: options.sessionId, runtimeUserId: options.userId ?? DEFAULT_RUNTIME_USER_ID, + ...(options.baggage && { baggage: options.baggage }), }); const response = await client.send(command); @@ -406,6 +428,7 @@ export async function invokeAgentRuntime(options: InvokeAgentRuntimeOptions): Pr accept: 'application/json', runtimeSessionId: options.sessionId, runtimeUserId: options.userId ?? DEFAULT_RUNTIME_USER_ID, + ...(options.baggage && { baggage: options.baggage }), }); const response = await client.send(command); diff --git a/src/cli/aws/index.ts b/src/cli/aws/index.ts index 5373814b0..d306a80e1 100644 --- a/src/cli/aws/index.ts +++ b/src/cli/aws/index.ts @@ -78,6 +78,24 @@ export { type StopRuntimeSessionOptions, type StopRuntimeSessionResult, } from './agentcore'; +export { + startRecommendation, + getRecommendation, + listRecommendations, + deleteRecommendation, + type StartRecommendationOptions, + type StartRecommendationResult, + type GetRecommendationOptions, + type GetRecommendationResult, + type ListRecommendationsOptions, + type ListRecommendationsResult, + type DeleteRecommendationOptions, + type DeleteRecommendationResult, + type RecommendationSummary, + type RecommendationType, + type RecommendationConfig, + type RecommendationResult, +} from './agentcore-recommendation'; export { AguiEventType, AguiErrorCode, diff --git a/src/cli/cli.ts b/src/cli/cli.ts index 97c826486..98692639c 100644 --- a/src/cli/cli.ts +++ b/src/cli/cli.ts @@ -1,5 +1,8 @@ +import { getOrCreateInstallationId } from '../lib/schemas/io/global-config'; +import { registerABTestCommand } from './commands/abtest'; import { registerAdd } from './commands/add'; import { registerAddTool } from './commands/add/tool-command'; +import { registerConfigBundle } from './commands/config-bundle'; import { registerCreate } from './commands/create'; import { registerDeploy } from './commands/deploy'; import { registerDev } from './commands/dev'; @@ -10,18 +13,19 @@ import { registerImport } from './commands/import'; import { registerInvoke } from './commands/invoke'; import { registerLogs } from './commands/logs'; import { registerPackage } from './commands/package'; -import { registerPause } from './commands/pause'; +import { registerPause, registerPromote } from './commands/pause'; +import { registerRecommendations } from './commands/recommendations'; import { registerRemove } from './commands/remove'; import { registerRemoveTool } from './commands/remove/tool-command'; import { registerResume } from './commands/resume'; import { registerRun } from './commands/run'; import { registerStatus } from './commands/status'; +import { registerStop } from './commands/stop'; import { registerTelemetry } from './commands/telemetry'; import { registerTraces } from './commands/traces'; import { registerUpdate } from './commands/update'; import { registerValidate } from './commands/validate'; import { PACKAGE_VERSION } from './constants'; -import { getOrCreateInstallationId } from './global-config'; import { ALL_PRIMITIVES } from './primitives'; import { TelemetryClientAccessor } from './telemetry'; import { App } from './tui/App'; @@ -184,14 +188,18 @@ export function registerCommands(program: Command) { registerLogs(program); registerPackage(program); registerPause(program); + registerRecommendations(program); const removeCmd = registerRemove(program); registerResume(program); registerRun(program); registerStatus(program); + registerStop(program); + registerPromote(program); registerTelemetry(program); registerTraces(program); registerUpdate(program); registerValidate(program); + registerConfigBundle(program); // Register primitive subcommands (add agent, remove agent, add memory, etc.) for (const primitive of ALL_PRIMITIVES) { @@ -201,6 +209,9 @@ export function registerCommands(program: Command) { // Register standalone add/remove subcommands registerAddTool(addCmd); registerRemoveTool(removeCmd); + + // Register AB test detail command + registerABTestCommand(program); } export const main = async (argv: string[]) => { @@ -214,7 +225,7 @@ export const main = async (argv: string[]) => { const args = argv.slice(2); - // Fire off non-blocking update check (skip for `update` command) + // Fire off non-blocking update check (skip for `update` command itself) const isUpdateCommand = args[0] === 'update'; const updateCheck = isUpdateCommand ? Promise.resolve(null) : checkForUpdate(); diff --git a/src/cli/cloudformation/__tests__/outputs-extended.test.ts b/src/cli/cloudformation/__tests__/outputs-extended.test.ts index cbe82085e..1f48faa96 100644 --- a/src/cli/cloudformation/__tests__/outputs-extended.test.ts +++ b/src/cli/cloudformation/__tests__/outputs-extended.test.ts @@ -364,7 +364,7 @@ describe('parseOnlineEvalOutputs', () => { 'arn:aws:bedrock:us-east-1:123:online-evaluation-config/proj_TestConfig-xyz', }; - const result = parseOnlineEvalOutputs(outputs, ['TestConfig']); + const result = parseOnlineEvalOutputs(outputs, [{ name: 'TestConfig' }]); expect(result.TestConfig).toBeDefined(); expect(result.TestConfig!.onlineEvaluationConfigId).toBe('proj_TestConfig-xyz'); expect(result.TestConfig!.onlineEvaluationConfigArn).toBe( @@ -380,7 +380,7 @@ describe('parseOnlineEvalOutputs', () => { ApplicationOnlineEvalConfigBArnOutputD: 'arn:b', }; - const result = parseOnlineEvalOutputs(outputs, ['ConfigA', 'ConfigB']); + const result = parseOnlineEvalOutputs(outputs, [{ name: 'ConfigA' }, { name: 'ConfigB' }]); expect(Object.keys(result)).toHaveLength(2); expect(result.ConfigA!.onlineEvaluationConfigId).toBe('id-a'); expect(result.ConfigB!.onlineEvaluationConfigId).toBe('id-b'); @@ -391,12 +391,12 @@ describe('parseOnlineEvalOutputs', () => { ApplicationOnlineEvalTestConfigArnOutputDEF: 'arn:config', }; - const result = parseOnlineEvalOutputs(outputs, ['TestConfig']); + const result = parseOnlineEvalOutputs(outputs, [{ name: 'TestConfig' }]); expect(result.TestConfig).toBeUndefined(); }); it('returns empty record for empty outputs', () => { - const result = parseOnlineEvalOutputs({}, ['TestConfig']); + const result = parseOnlineEvalOutputs({}, [{ name: 'TestConfig' }]); expect(result).toEqual({}); }); }); diff --git a/src/cli/cloudformation/__tests__/outputs.test.ts b/src/cli/cloudformation/__tests__/outputs.test.ts index d12ddb689..24f39b451 100644 --- a/src/cli/cloudformation/__tests__/outputs.test.ts +++ b/src/cli/cloudformation/__tests__/outputs.test.ts @@ -469,3 +469,117 @@ describe('buildDeployedState with policy data', () => { expect(result.targets.default!.resources?.policyEngines).toBeUndefined(); }); }); + +describe('buildDeployedState carry-forward', () => { + it('carries forward abTests from existing state', () => { + const existingState = { + targets: { + default: { + resources: { + stackName: 'TestStack', + abTests: { + TestExperiment: { + abTestId: 'abt-123', + abTestArn: 'arn:aws:bedrock:us-east-1:123456789012:ab-test/abt-123', + }, + }, + }, + }, + }, + }; + + const result = buildDeployedState({ + targetName: 'default', + stackName: 'TestStack', + agents: {}, + gateways: {}, + existingState, + }); + + expect(result.targets.default!.resources?.abTests).toEqual({ + TestExperiment: { + abTestId: 'abt-123', + abTestArn: 'arn:aws:bedrock:us-east-1:123456789012:ab-test/abt-123', + }, + }); + }); + + it('carries forward httpGateways from existing state', () => { + const existingState = { + targets: { + default: { + resources: { + stackName: 'TestStack', + httpGateways: { + MyHttpGw: { + gatewayId: 'hgw-456', + gatewayArn: 'arn:aws:bedrock:us-east-1:123456789012:http-gateway/hgw-456', + }, + }, + }, + }, + }, + }; + + const result = buildDeployedState({ + targetName: 'default', + stackName: 'TestStack', + agents: {}, + gateways: {}, + existingState, + }); + + expect(result.targets.default!.resources?.httpGateways).toEqual({ + MyHttpGw: { + gatewayId: 'hgw-456', + gatewayArn: 'arn:aws:bedrock:us-east-1:123456789012:http-gateway/hgw-456', + }, + }); + }); + + it('does not carry forward empty abTests', () => { + const existingState = { + targets: { + default: { + resources: { + stackName: 'TestStack', + abTests: {}, + }, + }, + }, + }; + + const result = buildDeployedState({ + targetName: 'default', + stackName: 'TestStack', + agents: {}, + gateways: {}, + existingState, + }); + + expect(result.targets.default!.resources?.abTests).toBeUndefined(); + }); + + it('does not carry forward empty httpGateways', () => { + const existingState = { + targets: { + default: { + resources: { + stackName: 'TestStack', + httpGateways: {}, + }, + }, + }, + }; + + const result = buildDeployedState({ + targetName: 'default', + stackName: 'TestStack', + agents: {}, + gateways: {}, + existingState, + }); + + expect(result.targets.default!.resources?.httpGateways).toBeUndefined(); + }); +}); diff --git a/src/cli/cloudformation/outputs.ts b/src/cli/cloudformation/outputs.ts index f19d602e9..5f574aefe 100644 --- a/src/cli/cloudformation/outputs.ts +++ b/src/cli/cloudformation/outputs.ts @@ -251,13 +251,13 @@ export function parseEvaluatorOutputs( */ export function parseOnlineEvalOutputs( outputs: StackOutputs, - onlineEvalNames: string[] + onlineEvalSpecs: { name: string; agent?: string; endpoint?: string }[] ): Record { const configs: Record = {}; const outputKeys = Object.keys(outputs); - for (const configName of onlineEvalNames) { - const pascal = toPascalId('OnlineEval', configName); + for (const spec of onlineEvalSpecs) { + const pascal = toPascalId('OnlineEval', spec.name); const idPrefix = `Application${pascal}IdOutput`; const arnPrefix = `Application${pascal}ArnOutput`; @@ -265,9 +265,11 @@ export function parseOnlineEvalOutputs( const arnKey = outputKeys.find(k => k.startsWith(arnPrefix)); if (idKey && arnKey) { - configs[configName] = { + configs[spec.name] = { onlineEvaluationConfigId: outputs[idKey]!, onlineEvaluationConfigArn: outputs[arnKey]!, + ...(spec.agent && { agent: spec.agent }), + ...(spec.endpoint && { endpoint: spec.endpoint }), }; } } @@ -454,6 +456,24 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta targetState.resources!.runtimeEndpoints = runtimeEndpoints; } + // Carry forward config bundles from existing state (managed post-deploy, not via CFN outputs) + const existingConfigBundles = existingState?.targets?.[targetName]?.resources?.configBundles; + if (existingConfigBundles && Object.keys(existingConfigBundles).length > 0) { + targetState.resources!.configBundles = existingConfigBundles; + } + + // Carry forward AB tests from existing state (managed post-deploy, not via CFN outputs) + const existingABTests = existingState?.targets?.[targetName]?.resources?.abTests; + if (existingABTests && Object.keys(existingABTests).length > 0) { + targetState.resources!.abTests = existingABTests; + } + + // Carry forward HTTP gateways from existing state (managed post-deploy, not via CFN outputs) + const existingHttpGateways = existingState?.targets?.[targetName]?.resources?.httpGateways; + if (existingHttpGateways && Object.keys(existingHttpGateways).length > 0) { + targetState.resources!.httpGateways = existingHttpGateways; + } + return { targets: { ...existingState?.targets, diff --git a/src/cli/commands/abtest/command.ts b/src/cli/commands/abtest/command.ts new file mode 100644 index 000000000..cc236cdb3 --- /dev/null +++ b/src/cli/commands/abtest/command.ts @@ -0,0 +1,199 @@ +/** + * AB Test commands. + * + * `agentcore ab-test ` — fetches and displays full AB test details + * from the data plane API, including evaluation scores/metrics. + */ +import { ConfigIO } from '../../../lib'; +import { getABTest, listABTests } from '../../aws/agentcore-ab-tests'; +import type { GetABTestResult } from '../../aws/agentcore-ab-tests'; +import { dnsSuffix } from '../../aws/partition'; +import { getErrorMessage } from '../../errors'; +import type { Command } from '@commander-js/extra-typings'; + +// ============================================================================ +// Helpers +// ============================================================================ + +async function getRegion(cliRegion?: string): Promise { + if (cliRegion) return cliRegion; + try { + const configIO = new ConfigIO(); + const targets = await configIO.resolveAWSDeploymentTargets(); + if (targets.length > 0) return targets[0]!.region; + } catch { + // Fall through to env vars + } + return process.env.AWS_DEFAULT_REGION ?? process.env.AWS_REGION ?? 'us-east-1'; +} + +async function resolveABTestId( + testName: string, + region: string +): Promise<{ abTestId: string; region: string; error?: string }> { + let projectName: string | undefined; + try { + const configIO = new ConfigIO(); + const deployedState = await configIO.readDeployedState(); + const awsTargets = await configIO.readAWSDeploymentTargets(); + + try { + const projectSpec = await configIO.readProjectSpec(); + projectName = projectSpec.name; + } catch { + // Project spec unavailable + } + + for (const [targetName, target] of Object.entries(deployedState.targets ?? {})) { + const abTests = target.resources?.abTests; + if (abTests?.[testName]) { + const targetConfig = awsTargets.find(t => t.name === targetName); + const resolvedRegion = targetConfig?.region ?? region; + return { abTestId: abTests[testName].abTestId, region: resolvedRegion }; + } + } + } catch { + // No deployed state available + } + + try { + const result = await listABTests({ region, maxResults: 100 }); + // Match against both prefixed name ({projectName}_{testName}) and bare testName (backwards compat) + const prefixedName = projectName ? `${projectName}_${testName}` : undefined; + // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- boolean OR, not nullish coalescing + const match = result.abTests.find(t => (prefixedName && t.name === prefixedName) || t.name === testName); + if (match) { + return { abTestId: match.abTestId, region }; + } + } catch { + // API call failed + } + + return { abTestId: '', region, error: `AB test "${testName}" not found in deployed state or API.` }; +} + +function gatewayUrlFromArn(arn: string): string { + const parts = arn.split(':'); + const region = parts[3]; + const gatewayId = parts[5]?.split('/')[1]; + if (region && gatewayId) { + return `https://${gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}`; + } + return arn; +} + +function formatABTestDetails(test: GetABTestResult): string { + const lines: string[] = []; + lines.push(`AB Test: ${test.name}`); + lines.push(` Status: ${test.status}`); + lines.push(` Execution: ${test.executionStatus}`); + lines.push(` Invocation URL: ${gatewayUrlFromArn(test.gatewayArn)}//invocations`); + lines.push( + ` Online Eval: ${'onlineEvaluationConfigArn' in test.evaluationConfig ? test.evaluationConfig.onlineEvaluationConfigArn : 'per-variant'}` + ); + if (test.description) lines.push(` Description: ${test.description}`); + + for (const variant of test.variants) { + const bundleRef = variant.variantConfiguration.configurationBundle; + const targetRef = variant.variantConfiguration.target; + if (targetRef) { + lines.push(` Variant ${variant.name}: weight=${variant.weight}, target=${targetRef.name}`); + } else if (bundleRef) { + lines.push( + ` Variant ${variant.name}: weight=${variant.weight}, bundle=${bundleRef.bundleArn}, version=${bundleRef.bundleVersion}` + ); + } + } + + // TODO(post-preview): Re-enable max duration display once configurable duration is launched. + // if (test.maxDurationDays) lines.push(` Max Duration: ${test.maxDurationDays} days`); + if (test.startedAt) lines.push(` Started: ${test.startedAt}`); + if (test.stoppedAt) lines.push(` Stopped: ${test.stoppedAt}`); + if (test.failureReason) lines.push(` Failure: ${test.failureReason}`); + + if (test.results) { + lines.push(' Results:'); + if (test.results.analysisTimestamp) { + lines.push(` Analysis Time: ${test.results.analysisTimestamp}`); + } + for (const metric of test.results.evaluatorMetrics) { + lines.push(` Evaluator: ${metric.evaluatorArn}`); + lines.push( + ` Control: samples=${metric.controlStats.sampleSize}, mean=${metric.controlStats.mean.toFixed(4)}` + ); + for (const vr of metric.variantResults) { + lines.push( + ` ${vr.treatmentName}: samples=${vr.sampleSize}, mean=${vr.mean.toFixed(4)}, significant=${vr.isSignificant}` + ); + if (vr.absoluteChange !== undefined) + lines.push(` Change: ${vr.absoluteChange.toFixed(4)} (${(vr.percentChange ?? 0).toFixed(2)}%)`); + if (vr.pValue !== undefined) lines.push(` p-value: ${vr.pValue.toFixed(6)}`); + if (vr.confidenceInterval) { + lines.push( + ` CI: [${vr.confidenceInterval.lower?.toFixed(4)}, ${vr.confidenceInterval.upper?.toFixed(4)}]` + ); + } + } + } + } + + return lines.join('\n'); +} + +// ============================================================================ +// Command registration +// ============================================================================ + +export function registerABTestCommand(program: Command): void { + program + .command('ab-test') + .description('[preview] View A/B test details and results') + .argument('', 'AB test name') + .option('--region ', 'AWS region') + .option('--json', 'Output as JSON') + .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { + try { + const region = await getRegion(cliOptions.region); + const { abTestId, error } = await resolveABTestId(name, region); + if (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + console.error(error); + } + process.exit(1); + } + const result = await getABTest({ region, abTestId }); + + if (cliOptions.json) { + console.log(JSON.stringify(result)); + process.exit(0); + } else if (process.stdout.isTTY) { + // Render TUI detail screen with key bindings + const [{ render }, { default: React }, { ABTestDetailScreen }] = await Promise.all([ + import('ink'), + import('react'), + import('../../tui/screens/ab-test'), + ]); + render( + React.createElement(ABTestDetailScreen, { + abTestId, + region, + onExit: () => process.exit(0), + }) + ); + return; + } else { + console.log(formatABTestDetails(result)); + process.exit(0); + } + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(`Error: ${getErrorMessage(error)}`); + } + process.exit(1); + } + }); +} diff --git a/src/cli/commands/abtest/index.ts b/src/cli/commands/abtest/index.ts new file mode 100644 index 000000000..0ff25efc5 --- /dev/null +++ b/src/cli/commands/abtest/index.ts @@ -0,0 +1 @@ +export { registerABTestCommand } from './command'; diff --git a/src/cli/commands/add/types.ts b/src/cli/commands/add/types.ts index 5692ef00e..6f161e38d 100644 --- a/src/cli/commands/add/types.ts +++ b/src/cli/commands/add/types.ts @@ -37,6 +37,7 @@ export interface AddAgentOptions extends VpcOptions { idleTimeout?: number | string; maxLifetime?: number | string; sessionStorageMountPath?: string; + withConfigBundle?: boolean; json?: boolean; } diff --git a/src/cli/commands/config-bundle/command.tsx b/src/cli/commands/config-bundle/command.tsx new file mode 100644 index 000000000..ce72f2a4b --- /dev/null +++ b/src/cli/commands/config-bundle/command.tsx @@ -0,0 +1,347 @@ +import { + getConfigurationBundleVersion, + listConfigurationBundleVersions, + updateConfigurationBundle, +} from '../../aws/agentcore-config-bundles'; +import type { + ConfigurationBundleVersionSummary, + ListConfigurationBundleVersionsFilter, +} from '../../aws/agentcore-config-bundles'; +import { getErrorMessage } from '../../errors'; +import { deepDiff } from '../../operations/config-bundle/diff-versions'; +import { resolveBundleByName } from '../../operations/config-bundle/resolve-bundle'; +import { requireProject } from '../../tui/guards'; +import type { Command } from '@commander-js/extra-typings'; +import { Box, Text, render } from 'ink'; + +// ============================================================================ +// Helpers +// ============================================================================ + +function formatTimestamp(ts: string): string { + const num = Number(ts); + if (isNaN(num)) return ts; + // API returns epoch seconds; convert to ms if needed + const ms = num < 1e12 ? num * 1000 : num; + return new Date(ms) + .toISOString() + .replace('T', ' ') + .replace(/\.\d+Z$/, 'Z'); +} + +async function resolveRegion(): Promise { + const { ConfigIO } = await import('../../../lib'); + const configIO = new ConfigIO(); + const targets = await configIO.resolveAWSDeploymentTargets(); + if (targets.length === 0) { + throw new Error('No AWS deployment targets configured. Run `agentcore deploy` first.'); + } + return targets[0]!.region; +} + +// ============================================================================ +// Version list +// ============================================================================ + +async function handleVersions(options: { + bundle: string; + branch?: string; + latestPerBranch?: boolean; + createdBy?: string; + region?: string; + json?: boolean; +}) { + const region = options.region ?? (await resolveRegion()); + const resolved = await resolveBundleByName(options.bundle, region); + + const filter: ListConfigurationBundleVersionsFilter = {}; + if (options.branch) filter.branchName = options.branch; + if (options.latestPerBranch) filter.latestPerBranch = true; + if (options.createdBy) filter.createdByName = options.createdBy; + const hasFilter = Object.keys(filter).length > 0; + + // Paginate to collect all versions + const allVersions: ConfigurationBundleVersionSummary[] = []; + let nextToken: string | undefined; + do { + const result = await listConfigurationBundleVersions({ + region, + bundleId: resolved.bundleId, + maxResults: 50, + nextToken, + ...(hasFilter && { filter }), + }); + allVersions.push(...result.versions); + nextToken = result.nextToken; + } while (nextToken); + + // Sort by creation time, newest first + allVersions.sort((a, b) => Number(b.versionCreatedAt) - Number(a.versionCreatedAt)); + + return { versions: allVersions, bundleName: options.bundle, bundleId: resolved.bundleId }; +} + +// ============================================================================ +// Diff +// ============================================================================ + +async function handleDiff(options: { bundle: string; from: string; to: string; region?: string }) { + const region = options.region ?? (await resolveRegion()); + const resolved = await resolveBundleByName(options.bundle, region); + + const [fromVersion, toVersion] = await Promise.all([ + getConfigurationBundleVersion({ region, bundleId: resolved.bundleId, versionId: options.from }), + getConfigurationBundleVersion({ region, bundleId: resolved.bundleId, versionId: options.to }), + ]); + + const diffs = deepDiff(fromVersion.components, toVersion.components); + + return { fromVersion, toVersion, diffs }; +} + +// ============================================================================ +// Command registration +// ============================================================================ + +export const registerConfigBundle = (program: Command) => { + const cmd = program + .command('config-bundle') + .alias('cb') + .description('[preview] Manage configuration bundles (use bundle name from agentcore.json, not the ID)'); + + // --- versions --- + cmd + .command('versions') + .description('List version history for a configuration bundle') + .requiredOption('--bundle ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') + .option('--branch ', 'Filter by branch name') + .option('--latest-per-branch', 'Show only the latest version per branch') + .option('--created-by ', 'Filter by creator name (e.g. "user", "recommendation")') + .option('--region ', 'AWS region override') + .option('--json', 'Output as JSON') + .action( + async (cliOptions: { + bundle: string; + branch?: string; + latestPerBranch?: boolean; + createdBy?: string; + region?: string; + json?: boolean; + }) => { + requireProject(); + try { + const result = await handleVersions(cliOptions); + + if (cliOptions.json) { + console.log(JSON.stringify(result, null, 2)); + return; + } + + if (result.versions.length === 0) { + render(No versions found for bundle "{cliOptions.bundle}".); + return; + } + + // Group by branch + const byBranch = new Map(); + for (const v of result.versions) { + const branch = v.lineageMetadata?.branchName ?? 'unknown'; + if (!byBranch.has(branch)) byBranch.set(branch, []); + byBranch.get(branch)!.push(v); + } + + render( + + + {result.bundleName} — {result.versions.length} version(s) + + + {[...byBranch.entries()].map(([branch, versions]) => ( + + + Branch: {branch} + + {versions.map((v, i) => { + const meta = v.lineageMetadata; + const creator = meta?.createdBy?.name ?? 'unknown'; + const message = meta?.commitMessage ?? ''; + const isLast = i === versions.length - 1; + const connector = isLast ? '└' : '├'; + return ( + + + {connector} {v.versionId}{' '} + {formatTimestamp(v.versionCreatedAt)}{' '} + {message && "{message}"} + + + {isLast ? ' ' : '│'} by: {creator} + {meta?.parentVersionIds?.length ? ( + (parent: {meta.parentVersionIds.join(', ')}) + ) : null} + + + ); + })} + + ))} + Use --json for complete output + + ); + } catch (error) { + render(Error: {getErrorMessage(error)}); + process.exit(1); + } + } + ); + + // --- diff --- + cmd + .command('diff') + .description('Diff two versions of a configuration bundle (get version IDs from `cb versions`)') + .requiredOption('--bundle ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') + .requiredOption('--from ', 'Source version ID (from `config-bundle versions --json`)') + .requiredOption('--to ', 'Target version ID (from `config-bundle versions --json`)') + .option('--region ', 'AWS region override') + .option('--json', 'Output as JSON') + .action(async (cliOptions: { bundle: string; from: string; to: string; region?: string; json?: boolean }) => { + requireProject(); + try { + const result = await handleDiff(cliOptions); + + if (cliOptions.json) { + console.log(JSON.stringify(result, null, 2)); + return; + } + + const fromMeta = result.fromVersion.lineageMetadata; + const toMeta = result.toVersion.lineageMetadata; + + render( + + + Diff: {result.fromVersion.versionId} → {result.toVersion.versionId} + + + From: {fromMeta?.commitMessage ?? '(no message)'} ({formatTimestamp(result.fromVersion.versionCreatedAt)}) + + + To: {toMeta?.commitMessage ?? '(no message)'} ({formatTimestamp(result.toVersion.versionCreatedAt)}) + + + {result.diffs.length === 0 ? ( + No differences found. + ) : ( + <> + {result.diffs.length} change(s): + + {result.diffs.map((d, i) => ( + + {d.path} + {d.type === 'added' && + {JSON.stringify(d.newValue)}} + {d.type === 'removed' && - {JSON.stringify(d.oldValue)}} + {d.type === 'changed' && ( + <> + - {JSON.stringify(d.oldValue)} + + {JSON.stringify(d.newValue)} + + )} + + ))} + + )} + + ); + } catch (error) { + render(Error: {getErrorMessage(error)}); + process.exit(1); + } + }); + + // --- create-branch --- + cmd + .command('create-branch') + .description('Create a new branch on an existing configuration bundle') + .requiredOption('--bundle ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') + .requiredOption('--branch ', 'Name for the new branch') + .option('--from ', 'Parent version ID to branch from (defaults to latest version)') + .option('--commit-message ', 'Commit message for the branch point') + .option('--region ', 'AWS region override') + .option('--json', 'Output as JSON') + .action( + async (cliOptions: { + bundle: string; + branch: string; + from?: string; + commitMessage?: string; + region?: string; + json?: boolean; + }) => { + requireProject(); + try { + const region = cliOptions.region ?? (await resolveRegion()); + const resolved = await resolveBundleByName(cliOptions.bundle, region); + + // Determine parent version + let parentVersionId = cliOptions.from; + if (!parentVersionId) { + const versions = await listConfigurationBundleVersions({ + region, + bundleId: resolved.bundleId, + maxResults: 50, + }); + if (versions.versions.length === 0) { + throw new Error(`No versions found for bundle "${cliOptions.bundle}".`); + } + // Sort descending by creation time to get the latest version + const sorted = [...versions.versions].sort( + (a, b) => new Date(b.versionCreatedAt).getTime() - new Date(a.versionCreatedAt).getTime() + ); + parentVersionId = sorted[0]!.versionId; + } + + // Get the parent version's components to carry forward + const parentVersion = await getConfigurationBundleVersion({ + region, + bundleId: resolved.bundleId, + versionId: parentVersionId, + }); + + const result = await updateConfigurationBundle({ + region, + bundleId: resolved.bundleId, + components: parentVersion.components, + parentVersionIds: [parentVersionId], + branchName: cliOptions.branch, + commitMessage: cliOptions.commitMessage ?? `Create branch ${cliOptions.branch}`, + }); + + if (cliOptions.json) { + console.log(JSON.stringify(result, null, 2)); + return; + } + + render( + + + Branch "{cliOptions.branch}" created on bundle "{cliOptions.bundle}" + + + Version: {result.versionId} + + Parent: {parentVersionId} + + ); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + render(Error: {getErrorMessage(error)}); + } + process.exit(1); + } + } + ); + + return cmd; +}; diff --git a/src/cli/commands/config-bundle/index.ts b/src/cli/commands/config-bundle/index.ts new file mode 100644 index 000000000..2ebcc4c68 --- /dev/null +++ b/src/cli/commands/config-bundle/index.ts @@ -0,0 +1 @@ +export { registerConfigBundle } from './command'; diff --git a/src/cli/commands/create/action.ts b/src/cli/commands/create/action.ts index dbfc215d7..a00397f38 100644 --- a/src/cli/commands/create/action.ts +++ b/src/cli/commands/create/action.ts @@ -11,6 +11,7 @@ import type { import { getErrorMessage } from '../../errors'; import { checkCreateDependencies } from '../../external-requirements'; import { initGitRepo, setupPythonProject, writeEnvFile, writeGitignore } from '../../operations'; +import { createConfigBundleForAgent } from '../../operations/agent/config-bundle-defaults'; import { mapGenerateConfigToRenderConfig, mapModelProviderToIdentityProviders, @@ -131,6 +132,7 @@ export interface CreateWithAgentOptions { idleTimeout?: number; maxLifetime?: number; sessionStorageMountPath?: string; + withConfigBundle?: boolean; skipGit?: boolean; skipInstall?: boolean; skipPythonSetup?: boolean; @@ -156,6 +158,7 @@ export async function createProjectWithAgent(options: CreateWithAgentOptions): P idleTimeout, maxLifetime: maxLifetimeOpt, sessionStorageMountPath, + withConfigBundle, skipGit, skipInstall, skipPythonSetup, @@ -245,6 +248,7 @@ export async function createProjectWithAgent(options: CreateWithAgentOptions): P ...(idleTimeout !== undefined && { idleRuntimeSessionTimeout: idleTimeout }), ...(maxLifetimeOpt !== undefined && { maxLifetime: maxLifetimeOpt }), ...(sessionStorageMountPath && { sessionStorageMountPath }), + ...(withConfigBundle && { withConfigBundle }), }; // Resolve credential strategy FIRST (new project has no existing credentials) @@ -286,6 +290,11 @@ export async function createProjectWithAgent(options: CreateWithAgentOptions): P } onProgress?.('Add agent to project', 'done'); + // Auto-create config bundle when opted in + if (withConfigBundle) { + await createConfigBundleForAgent(agentName, configBaseDir); + } + // Set up Python environment if needed (unless skipped) if (language === 'Python' && !skipPythonSetup && !skipInstall) { onProgress?.('Set up Python environment', 'start'); diff --git a/src/cli/commands/create/command.tsx b/src/cli/commands/create/command.tsx index ec70fdf85..9be7e7bf0 100644 --- a/src/cli/commands/create/command.tsx +++ b/src/cli/commands/create/command.tsx @@ -284,6 +284,7 @@ async function handleCreateAgentCLI(options: CreateOptions): Promise { idleTimeout: options.idleTimeout ? Number(options.idleTimeout) : undefined, maxLifetime: options.maxLifetime ? Number(options.maxLifetime) : undefined, sessionStorageMountPath: options.sessionStorageMountPath, + withConfigBundle: options.withConfigBundle, skipGit: options.skipGit, skipInstall: options.skipInstall, skipPythonSetup: options.skipPythonSetup, @@ -346,6 +347,7 @@ export const registerCreate = (program: Command) => { '--session-storage-mount-path ', 'Absolute mount path for session filesystem storage under /mnt (e.g. /mnt/data) [non-interactive]' ) + .option('--with-config-bundle', 'Create a config bundle wired into the agent template [preview] [non-interactive]') .option('--output-dir ', 'Output directory (default: current directory) [non-interactive]') .option('--skip-git', 'Skip git repository initialization [non-interactive]') .option('--skip-python-setup', 'Skip Python virtual environment setup [non-interactive]') diff --git a/src/cli/commands/create/types.ts b/src/cli/commands/create/types.ts index 0f4211eb3..a46a85885 100644 --- a/src/cli/commands/create/types.ts +++ b/src/cli/commands/create/types.ts @@ -19,6 +19,7 @@ export interface CreateOptions extends VpcOptions { idleTimeout?: number | string; maxLifetime?: number | string; sessionStorageMountPath?: string; + withConfigBundle?: boolean; outputDir?: string; skipGit?: boolean; skipPythonSetup?: boolean; diff --git a/src/cli/commands/deploy/actions.ts b/src/cli/commands/deploy/actions.ts index 521d59a7c..410e3c2b2 100644 --- a/src/cli/commands/deploy/actions.ts +++ b/src/cli/commands/deploy/actions.ts @@ -428,8 +428,12 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise c.name); - const onlineEvalConfigs = parseOnlineEvalOutputs(outputs, onlineEvalNames); + const onlineEvalSpecs = (context.projectSpec.onlineEvalConfigs ?? []).map(c => ({ + name: c.name, + agent: c.agent, + endpoint: c.endpoint, + })); + const onlineEvalConfigs = parseOnlineEvalOutputs(outputs, onlineEvalSpecs); // Parse policy engine outputs const policyEngineSpecs = context.projectSpec.policyEngines ?? []; diff --git a/src/cli/commands/deploy/command.tsx b/src/cli/commands/deploy/command.tsx index ad04726c3..fc235ac51 100644 --- a/src/cli/commands/deploy/command.tsx +++ b/src/cli/commands/deploy/command.tsx @@ -77,6 +77,13 @@ async function handleDeployCLI(options: DeployOptions): Promise { } } + if (result.postDeployWarnings && result.postDeployWarnings.length > 0) { + console.log('\n⚠ Post-deploy warnings:'); + for (const warning of result.postDeployWarnings) { + console.log(` ${warning}`); + } + } + if (result.notes && result.notes.length > 0) { for (const note of result.notes) { console.log(`\nNote: ${note}`); @@ -98,7 +105,8 @@ async function handleDeployCLI(options: DeployOptions): Promise { } } - process.exit(result.success ? 0 : 1); + const hasPostDeployWarnings = result.success && result.postDeployWarnings && result.postDeployWarnings.length > 0; + process.exit(result.success ? (hasPostDeployWarnings ? 2 : 0) : 1); } export const registerDeploy = (program: Command) => { diff --git a/src/cli/commands/deploy/types.ts b/src/cli/commands/deploy/types.ts index 16d4f39a2..44cdc7847 100644 --- a/src/cli/commands/deploy/types.ts +++ b/src/cli/commands/deploy/types.ts @@ -16,6 +16,7 @@ export interface DeployResult { logPath?: string; nextSteps?: string[]; notes?: string[]; + postDeployWarnings?: string[]; error?: string; } diff --git a/src/cli/commands/import/command.ts b/src/cli/commands/import/command.ts index df4c3c0c7..381167aaa 100644 --- a/src/cli/commands/import/command.ts +++ b/src/cli/commands/import/command.ts @@ -13,7 +13,7 @@ const { green, yellow, cyan, dim, reset } = ANSI; export const registerImport = (program: Command) => { const importCmd = program .command('import') - .description('Import a runtime, memory, gateway, or starter toolkit into this project.'); + .description('Import a runtime, memory, or starter toolkit into this project.'); // Existing YAML flow: agentcore import --source importCmd diff --git a/src/cli/commands/import/import-evaluator.ts b/src/cli/commands/import/import-evaluator.ts index be85829f3..7c6c8b0d2 100644 --- a/src/cli/commands/import/import-evaluator.ts +++ b/src/cli/commands/import/import-evaluator.ts @@ -10,6 +10,7 @@ import { ANSI } from './constants'; import { failResult, parseAndValidateArn } from './import-utils'; import { executeResourceImport } from './resource-import'; import type { ImportResourceOptions, ImportResourceResult, ResourceImportDescriptor } from './types'; +import { ResourceNotFoundException } from '@aws-sdk/client-bedrock-agentcore-control'; import type { Command } from '@commander-js/extra-typings'; /** @@ -92,11 +93,18 @@ const evaluatorDescriptor: ResourceImportDescriptor 0) { - const oecDetails = await Promise.all( - oecSummaries.map(s => - getOnlineEvaluationConfig({ region: target.region, configId: s.onlineEvaluationConfigId }) + // Configs can be deleted between list and get (TOCTOU race). + // Skip ResourceNotFoundException — a deleted config can't be locking our evaluator. + const oecDetails = ( + await Promise.all( + oecSummaries.map(s => + getOnlineEvaluationConfig({ region: target.region, configId: s.onlineEvaluationConfigId }).catch(err => { + if (err instanceof ResourceNotFoundException) return null; + throw err; + }) + ) ) - ); + ).filter(r => r !== null); const referencingOec = oecDetails.find(oec => oec.evaluatorIds?.includes(detail.evaluatorId)); diff --git a/src/cli/commands/index.ts b/src/cli/commands/index.ts index c8c1bd68b..a7eda0787 100644 --- a/src/cli/commands/index.ts +++ b/src/cli/commands/index.ts @@ -11,6 +11,7 @@ export { registerPause } from './pause'; export { registerRemove } from './remove'; export { registerResume } from './resume'; export { registerRun } from './run'; +export { registerStop } from './stop'; export { registerStatus } from './status'; export { registerTraces } from './traces'; export { registerUpdate } from './update'; diff --git a/src/cli/commands/invoke/action.ts b/src/cli/commands/invoke/action.ts index 2635371e7..07537202d 100644 --- a/src/cli/commands/invoke/action.ts +++ b/src/cli/commands/invoke/action.ts @@ -128,6 +128,20 @@ export async function handleInvoke(context: InvokeContext, options: InvokeOption return { success: false, error: `Agent '${agentSpec.name}' is not deployed to target '${selectedTargetName}'` }; } + // Build config bundle baggage if a bundle is associated with this agent + const deployedBundles = targetState?.resources?.configBundles ?? {}; + let baggage: string | undefined; + const bundleSpec = project.configBundles?.find(b => { + const keys = Object.keys(b.components ?? {}); + return keys.some(k => k === `{{runtime:${agentSpec.name}}}`); + }); + if (bundleSpec) { + const bundleState = deployedBundles[bundleSpec.name]; + if (bundleState?.bundleArn && bundleState?.versionId) { + baggage = `aws.agentcore.configbundle_arn=${encodeURIComponent(bundleState.bundleArn)},aws.agentcore.configbundle_version=${encodeURIComponent(bundleState.versionId)}`; + } + } + // Auto-fetch bearer token for CUSTOM_JWT agents when not provided if (agentSpec.authorizerType === 'CUSTOM_JWT' && !options.bearerToken) { const canFetch = await canFetchRuntimeToken(agentSpec.name); @@ -264,6 +278,7 @@ export async function handleInvoke(context: InvokeContext, options: InvokeOption userId: options.userId, headers: options.headers, bearerToken: options.bearerToken, + baggage, }; // list-tools: list available MCP tools @@ -444,6 +459,7 @@ export async function handleInvoke(context: InvokeContext, options: InvokeOption logger, headers: options.headers, bearerToken: options.bearerToken, + baggage, }); for await (const chunk of result.stream) { @@ -477,6 +493,7 @@ export async function handleInvoke(context: InvokeContext, options: InvokeOption userId: options.userId, headers: options.headers, bearerToken: options.bearerToken, + baggage, }); logger.logResponse(response.content); diff --git a/src/cli/commands/logs/__tests__/action.test.ts b/src/cli/commands/logs/__tests__/action.test.ts index dbeddb534..842fa333a 100644 --- a/src/cli/commands/logs/__tests__/action.test.ts +++ b/src/cli/commands/logs/__tests__/action.test.ts @@ -61,6 +61,9 @@ describe('resolveAgentContext', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }, deployedState: { targets: { @@ -123,6 +126,9 @@ describe('resolveAgentContext', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }, }); const result = await resolveAgentContext(context, {}); @@ -165,6 +171,9 @@ describe('resolveAgentContext', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }, deployedState: { targets: { @@ -217,6 +226,9 @@ describe('resolveAgentContext', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }, }); const result = await resolveAgentContext(context, {}); diff --git a/src/cli/commands/pause/command.tsx b/src/cli/commands/pause/command.tsx index 5a3183ea9..82a79bccf 100644 --- a/src/cli/commands/pause/command.tsx +++ b/src/cli/commands/pause/command.tsx @@ -1,3 +1,6 @@ +import { ConfigIO } from '../../../lib'; +import { listABTests, updateABTest } from '../../aws/agentcore-ab-tests'; +import { stopBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; import { getErrorMessage } from '../../errors'; import { handlePauseResume } from '../../operations/eval'; import type { OnlineEvalActionOptions } from '../../operations/eval'; @@ -67,12 +70,267 @@ function registerOnlineEvalSubcommand(parent: Command, action: 'pause' | 'resume }); } +async function getRegion(cliRegion?: string): Promise { + if (cliRegion) return cliRegion; + try { + const configIO = new ConfigIO(); + const targets = await configIO.resolveAWSDeploymentTargets(); + if (targets.length > 0) return targets[0]!.region; + } catch { + // Fall through to env vars + } + return process.env.AWS_DEFAULT_REGION ?? process.env.AWS_REGION ?? 'us-east-1'; +} + +async function resolveABTestId( + testName: string, + region: string +): Promise<{ abTestId: string; region: string; error?: string }> { + let projectName: string | undefined; + try { + const configIO = new ConfigIO(); + const deployedState = await configIO.readDeployedState(); + const awsTargets = await configIO.readAWSDeploymentTargets(); + + try { + const projectSpec = await configIO.readProjectSpec(); + projectName = projectSpec.name; + } catch { + // Project spec unavailable + } + + for (const [targetName, target] of Object.entries(deployedState.targets ?? {})) { + const abTests = target.resources?.abTests; + if (abTests?.[testName]) { + const targetConfig = awsTargets.find(t => t.name === targetName); + const resolvedRegion = targetConfig?.region ?? region; + return { abTestId: abTests[testName].abTestId, region: resolvedRegion }; + } + } + } catch { + // No deployed state + } + + try { + const result = await listABTests({ region, maxResults: 100 }); + // Match against both prefixed name ({projectName}_{testName}) and bare testName (backwards compat) + const prefixedName = projectName ? `${projectName}_${testName}` : undefined; + const match = + result.abTests.find(t => prefixedName != null && t.name === prefixedName) ?? + result.abTests.find(t => t.name === testName); + if (match) return { abTestId: match.abTestId, region }; + } catch { + // API call failed + } + + return { abTestId: '', region, error: `AB test "${testName}" not found in deployed state or API.` }; +} + +function registerABTestSubcommand(parent: Command, action: 'pause' | 'resume') { + const executionStatus = action === 'pause' ? 'PAUSED' : 'RUNNING'; + const pastTense = action === 'pause' ? 'Paused' : 'Resumed'; + + parent + .command('ab-test') + .description(`[preview] ${action === 'pause' ? 'Pause' : 'Resume'} a deployed A/B test`) + .argument('', 'AB test name') + .option('--region ', 'AWS region') + .option('--json', 'Output as JSON') + .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { + try { + const region = await getRegion(cliOptions.region); + const { abTestId, error } = await resolveABTestId(name, region); + if (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + console.error(error); + } + process.exit(1); + } + + const result = await updateABTest({ + region, + abTestId, + executionStatus, + }); + + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, ...result })); + } else { + console.log(`${pastTense} AB test "${name}" (execution: ${result.executionStatus})`); + } + process.exit(0); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(`Error: ${getErrorMessage(error)}`); + } + process.exit(1); + } + }); +} + export const registerPause = (program: Command) => { const pauseCmd = program.command('pause').description(COMMAND_DESCRIPTIONS.pause); registerOnlineEvalSubcommand(pauseCmd, 'pause'); + registerABTestSubcommand(pauseCmd, 'pause'); }; export const registerResume = (program: Command) => { const resumeCmd = program.command('resume').description(COMMAND_DESCRIPTIONS.resume); registerOnlineEvalSubcommand(resumeCmd, 'resume'); + registerABTestSubcommand(resumeCmd, 'resume'); +}; + +export const registerStop = (program: Command) => { + const stopCmd = program.command('stop').description('Stop resources'); + + stopCmd + .command('ab-test') + .description('[preview] Stop a deployed A/B test permanently') + .argument('', 'AB test name') + .option('--region ', 'AWS region') + .option('--json', 'Output as JSON') + .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { + try { + const region = await getRegion(cliOptions.region); + const { abTestId, error } = await resolveABTestId(name, region); + if (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + console.error(error); + } + process.exit(1); + } + + const result = await updateABTest({ + region, + abTestId, + executionStatus: 'STOPPED', + }); + + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, ...result })); + } else { + console.log(`Stopped AB test "${name}" (execution: ${result.executionStatus})`); + } + process.exit(0); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(`Error: ${getErrorMessage(error)}`); + } + process.exit(1); + } + }); + + stopCmd + .command('batch-evaluation') + .description('[preview] Stop a running batch evaluation') + .requiredOption('-i, --id ', 'Batch evaluation ID to stop') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--json', 'Output as JSON') + .action(async (cliOptions: { id: string; region?: string; json?: boolean }) => { + try { + const region = await getRegion(cliOptions.region); + + const result = await stopBatchEvaluation({ + region, + batchEvaluationId: cliOptions.id, + }); + + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, ...result })); + } else { + console.log(`\nBatch evaluation stopped successfully`); + console.log(`ID: ${result.batchEvaluationId}`); + console.log(`Status: ${result.status}\n`); + } + + process.exit(0); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + render(Error: {getErrorMessage(error)}); + } + process.exit(1); + } + }); +}; + +export const registerPromote = (program: Command) => { + const promoteCmd = program.command('promote').description('Promote resources'); + + promoteCmd + .command('ab-test') + .description('Promote the winning treatment of an A/B test') + .argument('', 'AB test name') + .option('--region ', 'AWS region') + .option('--json', 'Output as JSON') + .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { + try { + const region = await getRegion(cliOptions.region); + const { abTestId, error } = await resolveABTestId(name, region); + if (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + console.error(error); + } + process.exit(1); + } + + // Stop the AB test + const result = await updateABTest({ + region, + abTestId, + executionStatus: 'STOPPED', + }); + + // Apply promotion to agentcore.json + const { promoteABTestConfig } = await import('../../operations/ab-test/promote'); + let promoted = false; + let mode: string | undefined; + let promotionDetail = ''; + try { + const promoResult = await promoteABTestConfig(abTestId, name); + promoted = promoResult.promoted; + mode = promoResult.mode; + promotionDetail = promoResult.promotionDetail; + } catch { + // Config read/write failed + } + + if (cliOptions.json) { + console.log( + JSON.stringify({ + success: true, + ...result, + ...(mode && { mode }), + promoted, + ...(promotionDetail && { promotionDetail }), + }) + ); + } else { + console.log(`AB test "${name}" stopped.`); + if (promoted) { + console.log(`\n${promotionDetail}`); + console.log(`\nRun: agentcore deploy`); + } + } + process.exit(0); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(`Error: ${getErrorMessage(error)}`); + } + process.exit(1); + } + }); }; diff --git a/src/cli/commands/pause/index.ts b/src/cli/commands/pause/index.ts index 858054fd2..1bc38e3be 100644 --- a/src/cli/commands/pause/index.ts +++ b/src/cli/commands/pause/index.ts @@ -1 +1 @@ -export { registerPause } from './command'; +export { registerPause, registerPromote } from './command'; diff --git a/src/cli/commands/recommendations/command.tsx b/src/cli/commands/recommendations/command.tsx new file mode 100644 index 000000000..bcf3b2784 --- /dev/null +++ b/src/cli/commands/recommendations/command.tsx @@ -0,0 +1,63 @@ +import { getErrorMessage } from '../../errors'; +import { listAllRecommendations } from '../../operations/recommendation'; +import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; +import { requireProject } from '../../tui/guards'; +import type { Command } from '@commander-js/extra-typings'; +import { Text, render } from 'ink'; +import React from 'react'; + +export const registerRecommendations = (program: Command) => { + const recCmd = program.command('recommendations').description(COMMAND_DESCRIPTIONS.recommendations); + + recCmd + .command('history') + .description('Show past recommendation runs saved locally') + .option('--json', 'Output as JSON') + .action((cliOptions: { json?: boolean }) => { + requireProject(); + + try { + const records = listAllRecommendations(); + + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, recommendations: records })); + process.exit(0); + return; + } + + if (records.length === 0) { + console.log('No recommendation runs found. Run `agentcore run recommendation` to create one.'); + return; + } + + console.log( + `\n${'Date'.padEnd(22)} ${'Type'.padEnd(20)} ${'Agent'.padEnd(20)} ${'Recommendation ID'.padEnd(40)}` + ); + console.log('─'.repeat(105)); + + for (const record of records) { + const date = record.startedAt + ? new Date(record.startedAt).toLocaleString([], { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + }) + : 'unknown'; + console.log( + `${date.padEnd(22)} ${(record.type ?? 'unknown').padEnd(20)} ${(record.agent ?? 'unknown').padEnd(20)} ${record.recommendationId.padEnd(40)}` + ); + } + + console.log(''); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + render(Error: {getErrorMessage(error)}); + } + process.exit(1); + } + }); +}; diff --git a/src/cli/commands/recommendations/index.ts b/src/cli/commands/recommendations/index.ts new file mode 100644 index 000000000..8c0a96809 --- /dev/null +++ b/src/cli/commands/recommendations/index.ts @@ -0,0 +1 @@ +export { registerRecommendations } from './command'; diff --git a/src/cli/commands/remove/command.tsx b/src/cli/commands/remove/command.tsx index 604e87348..3705c523f 100644 --- a/src/cli/commands/remove/command.tsx +++ b/src/cli/commands/remove/command.tsx @@ -35,6 +35,9 @@ async function handleRemoveAll(_options: RemoveAllOptions): Promise = { + 'system-prompt': 'SYSTEM_PROMPT_RECOMMENDATION', + 'tool-description': 'TOOL_DESCRIPTION_RECOMMENDATION', +}; + function formatRunOutput(result: Awaited>): void { if (!result.run) return; @@ -59,7 +76,7 @@ export const registerRun = (program: Command) => { ) .option('-r, --runtime ', 'Runtime name from project config') .option('--runtime-arn ', 'Runtime ARN — run outside a project directory') - .option('-e, --evaluator ', 'Evaluator name(s) from project or Builtin.* IDs') + .option('-e, --evaluator ', 'Evaluator name(s) — project evaluators or Builtin.* IDs') .option('--evaluator-arn ', 'Evaluator ARN(s) — use with --runtime-arn for standalone mode') .option('--region ', 'AWS region (required with --runtime-arn, auto-detected otherwise)') .option('-s, --session-id ', 'Evaluate a specific session only') @@ -69,9 +86,9 @@ export const registerRun = (program: Command) => { 'Runtime endpoint name (e.g. PROMPT_V1). Defaults to AGENTCORE_RUNTIME_ENDPOINT env var, then DEFAULT' ) .option('--days ', 'Lookback window in days', '7') - .option('-A, --assertion ', 'Assertion the agent should satisfy (repeatable)') - .option('--expected-trajectory ', 'Expected tool calls in order (comma-separated)') - .option('--expected-response ', 'Expected agent response text') + .option('-A, --assertion ', 'Ground truth assertion the agent response must satisfy (repeatable)') + .option('--expected-trajectory ', 'Ground truth: expected tool call names in order (comma-separated)') + .option('--expected-response ', 'Ground truth: expected agent response text to compare against') .option('--output ', 'Custom output file path for results') .option('--json', 'Output as JSON') .action( @@ -148,4 +165,386 @@ export const registerRun = (program: Command) => { } } ); + + runCmd + .command('batch-evaluation') + .description('[preview] Run evaluators in batch across all agent sessions in CloudWatch') + .requiredOption('-r, --runtime ', 'Runtime name from project config') + .requiredOption('-e, --evaluator ', 'Evaluator name(s) — Builtin.* IDs') + .option('-n, --name ', 'Name for the batch evaluation (auto-generated if omitted)') + .option('-d, --lookback-days ', 'Lookback window in days (filters sessions by time range)') + .option('-s, --session-ids ', 'Specific session IDs to evaluate') + .option( + '-g, --ground-truth ', + 'JSON file with session metadata and ground truth (assertions, expected trajectory, turns)' + ) + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--json', 'Output as JSON') + .action( + async (cliOptions: { + runtime: string; + evaluator: string[]; + name?: string; + lookbackDays?: string; + sessionIds?: string[]; + groundTruth?: string; + region?: string; + json?: boolean; + }) => { + requireProject(); + + try { + // Parse ground truth file if provided + let sessionMetadata: import('../../aws/agentcore-batch-evaluation').SessionMetadataEntry[] | undefined; + if (cliOptions.groundTruth) { + const { readFileSync } = await import('node:fs'); + const gtContent = readFileSync(cliOptions.groundTruth, 'utf-8'); + const gtData = JSON.parse(gtContent) as Record; + // Accept either a raw array or an object with a sessionMetadata key + sessionMetadata = Array.isArray(gtData) + ? (gtData as import('../../aws/agentcore-batch-evaluation').SessionMetadataEntry[]) + : (gtData.sessionMetadata as import('../../aws/agentcore-batch-evaluation').SessionMetadataEntry[]); + if (!Array.isArray(sessionMetadata)) { + throw new Error( + 'Ground truth file must be a JSON array of session metadata entries, or an object with a "sessionMetadata" key' + ); + } + } + + const lookbackDays = cliOptions.lookbackDays ? parseInt(cliOptions.lookbackDays, 10) : undefined; + const result = await runBatchEvaluationCommand({ + agent: cliOptions.runtime, + evaluators: cliOptions.evaluator, + name: cliOptions.name, + region: cliOptions.region, + sessionIds: cliOptions.sessionIds, + lookbackDays: lookbackDays && !isNaN(lookbackDays) ? lookbackDays : undefined, + sessionMetadata, + onProgress: cliOptions.json + ? undefined + : (_status, message) => { + console.log(message); + }, + }); + + // Save results locally + if (result.success) { + try { + const filePath = saveBatchEvalRun(result); + if (!cliOptions.json) { + console.log(`\nResults saved to: ${filePath}`); + } + } catch { + // Non-fatal — skip saving + } + } + + if (cliOptions.json) { + console.log(JSON.stringify(result)); + } else if (result.success) { + formatBatchEvalOutput(result); + } else { + render({result.error}); + if (result.logFilePath) { + console.error(`\nLog: ${result.logFilePath}`); + } + } + + process.exit(result.success ? 0 : 1); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + render(Error: {getErrorMessage(error)}); + } + process.exit(1); + } + } + ); + + runCmd + .command('recommendation') + .description('[preview] Optimize a system prompt or tool descriptions using agent traces as signal') + .option('-t, --type ', 'What to optimize: system-prompt or tool-description (default: system-prompt)') + .option('-r, --runtime ', 'Runtime name from project config') + .option('-e, --evaluator ', 'Evaluator name — required for system-prompt (exactly one)') + .option('--prompt-file ', 'Load the current system prompt from a file') + .option('--inline ', 'Provide the current system prompt or tool descriptions inline') + .option('--bundle-name ', 'Read current content from a deployed config bundle') + .option('--bundle-version ', 'Config bundle version (used with --bundle-name)') + .option( + '--system-prompt-json-path ', + 'Field name under "configuration" in the bundle (e.g. "systemPrompt"). The CLI resolves it to the full path automatically. Do not use bracket notation — use dot notation only.' + ) + .option( + '--tool-desc-json-path ', + 'Tool name:field pairs for tool descriptions in a config bundle (e.g. --tool-desc-json-path "search:searchDesc"). The CLI resolves each to the full path automatically.' + ) + .option( + '--tools ', + 'Tool name:description pairs (repeatable, e.g. --tools "search:Searches the web" --tools "calc:Does math")' + ) + .option('--spans-file ', 'JSON file with OTEL session spans (use instead of CloudWatch traces)') + .option('--lookback ', 'How far back to search for traces in CloudWatch (days)', '7') + .option('-s, --session-id ', 'Limit trace collection to specific session IDs') + .option('-n, --run ', 'Run name prefix for the recommendation') + .option('--region ', 'AWS region') + .option('--json', 'Output as JSON') + .action( + async (cliOptions: { + type?: string; + runtime?: string; + evaluator?: string; + promptFile?: string; + inline?: string; + bundleName?: string; + bundleVersion?: string; + systemPromptJsonPath?: string; + toolDescJsonPath?: string[]; + tools?: string[]; + spansFile?: string; + lookback: string; + sessionId?: string[]; + run?: string; + region?: string; + json?: boolean; + }) => { + requireProject(); + + const typeKey = cliOptions.type ?? 'system-prompt'; + const recType = RECOMMENDATION_TYPE_MAP[typeKey]; + if (!recType) { + const error = `Invalid --type "${typeKey}". Must be one of: ${Object.keys(RECOMMENDATION_TYPE_MAP).join(', ')}`; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + + const agent = cliOptions.runtime; + const evaluator = cliOptions.evaluator; + + if (!agent) { + const error = '--runtime is required'; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + + // Evaluator is required for system-prompt recs, optional for tool-description + if (recType === 'SYSTEM_PROMPT_RECOMMENDATION' && !evaluator) { + const error = '--evaluator is required for system-prompt recommendations'; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + + try { + const inputSource = cliOptions.promptFile + ? ('file' as const) + : cliOptions.inline + ? ('inline' as const) + : cliOptions.bundleName + ? ('config-bundle' as const) + : ('inline' as const); + + const traceSource = cliOptions.spansFile + ? ('spans-file' as const) + : cliOptions.sessionId + ? ('sessions' as const) + : ('cloudwatch' as const); + + // Parse --tool-desc-json-path pairs ("toolName:$.json.path") into structured format + const toolDescJsonPaths = cliOptions.toolDescJsonPath + ?.map(pair => { + const colonIdx = pair.indexOf(':'); + if (colonIdx <= 0) return undefined; + return { + toolName: pair.slice(0, colonIdx), + toolDescriptionJsonPath: pair.slice(colonIdx + 1), + }; + }) + .filter((p): p is { toolName: string; toolDescriptionJsonPath: string } => p !== undefined); + + const result = await runRecommendationCommand({ + type: recType, + agent, + evaluators: evaluator ? [evaluator] : [], + promptFile: cliOptions.promptFile, + inlineContent: cliOptions.inline, + bundleName: cliOptions.bundleName, + bundleVersion: cliOptions.bundleVersion, + systemPromptJsonPath: cliOptions.systemPromptJsonPath, + toolDescJsonPaths: toolDescJsonPaths?.length ? toolDescJsonPaths : undefined, + tools: cliOptions.tools, + lookbackDays: parseInt(cliOptions.lookback, 10), + sessionIds: cliOptions.sessionId, + spansFile: cliOptions.spansFile, + recommendationName: cliOptions.run, + region: cliOptions.region, + inputSource, + traceSource, + onProgress: cliOptions.json + ? undefined + : (_status, message) => { + console.log(message); + }, + }); + + if (!result.success) { + if (cliOptions.json) { + console.log(JSON.stringify(result)); + } else { + render({result.error}); + if (result.logFilePath) { + console.error(`\nLog: ${result.logFilePath}`); + } + } + process.exit(1); + } + + // Save results locally + let savedFilePath: string | undefined; + try { + if (result.recommendationId) { + savedFilePath = saveRecommendationRun( + result.recommendationId, + result, + recType, + agent, + evaluator ? [evaluator] : [] + ); + } + } catch { + // Non-fatal — skip saving + } + + if (cliOptions.json) { + console.log(JSON.stringify(result)); + } else { + console.log(`\nRecommendation ID: ${result.recommendationId}`); + + if (result.result) { + const sysResult = result.result.systemPromptRecommendationResult; + const toolResult = result.result.toolDescriptionRecommendationResult; + + if (sysResult) { + if (sysResult.recommendedSystemPrompt) { + console.log('\n+++ Recommended System Prompt +++'); + console.log(sysResult.recommendedSystemPrompt); + } + } else if (toolResult?.tools) { + for (const tool of toolResult.tools) { + console.log(`\nTool: ${tool.toolName}`); + console.log(`Recommended: ${tool.recommendedToolDescription}`); + } + } + } + + if (savedFilePath) { + console.log(`\nResults saved to: ${savedFilePath}`); + } + + // Sync local config bundle after server-side recommendation apply + if (inputSource === 'config-bundle' && cliOptions.bundleName && result.result && result.region) { + try { + const applyResult = await applyRecommendationToBundle({ + bundleName: cliOptions.bundleName, + result: result.result, + region: result.region, + }); + if (applyResult.success) { + console.log( + `\nA new config bundle version (${applyResult.newVersionId}) was created with the recommended changes.` + ); + console.log(`Local config for "${cliOptions.bundleName}" has been updated to match.`); + } else { + console.log(`\nCould not sync config bundle: ${applyResult.error}`); + } + } catch { + // Non-fatal — user can manually sync + } + } + console.log(''); + } + + process.exit(0); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + render(Error: {getErrorMessage(error)}); + } + process.exit(1); + } + } + ); }; + +function formatBatchEvalOutput(result: RunBatchEvaluationCommandResult): void { + console.log(`\nBatch Evaluation: ${result.name ?? result.batchEvaluationId}`); + console.log(`ID: ${result.batchEvaluationId}`); + console.log(`Status: ${result.status}`); + + // Show session stats from API if available + const evalResults = result.evaluationResults; + if (evalResults) { + const parts: string[] = []; + if (evalResults.totalNumberOfSessions != null) parts.push(`${evalResults.totalNumberOfSessions} sessions`); + if (evalResults.numberOfSessionsCompleted != null) parts.push(`${evalResults.numberOfSessionsCompleted} completed`); + if (evalResults.numberOfSessionsFailed) parts.push(`${evalResults.numberOfSessionsFailed} failed`); + if (parts.length > 0) console.log(`Sessions: ${parts.join(', ')}`); + } + + console.log(''); + + // Prefer API evaluatorSummaries over local computation + const summaries = evalResults?.evaluatorSummaries; + if (summaries && summaries.length > 0) { + for (const s of summaries) { + const avg = s.statistics?.averageScore; + const avgStr = avg != null ? avg.toFixed(2) : 'N/A'; + const failSuffix = s.totalFailed ? ` (${s.totalFailed} failed)` : ''; + const evalCount = s.totalEvaluated != null ? ` [${s.totalEvaluated} evaluated]` : ''; + console.log(` ${s.evaluatorId}: ${avgStr} avg${failSuffix}${evalCount}`); + } + } else if (result.results.length > 0) { + // Fall back to local computation from CloudWatch results + const byEvaluator = new Map(); + for (const r of result.results) { + const group = byEvaluator.get(r.evaluatorId) ?? []; + group.push(r); + byEvaluator.set(r.evaluatorId, group); + } + + for (const [evalId, evalGroup] of byEvaluator) { + const scores = evalGroup.filter(r => !r.error).map(r => r.score!); + const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0; + const errors = evalGroup.filter(r => r.error).length; + const errorSuffix = errors > 0 ? ` (${errors} errors)` : ''; + + console.log(` ${evalId}: ${avg.toFixed(2)} avg${errorSuffix}`); + + for (const r of evalGroup) { + if (r.error) { + console.log(` ERROR: ${r.error.slice(0, 80)}`); + } else { + const labelStr = r.label ? ` (${r.label})` : ''; + console.log(` ${r.score?.toFixed(2)}${labelStr}`); + } + } + } + } else { + console.log(' No evaluation results found.'); + } + + console.log(''); +} diff --git a/src/cli/commands/status/action.ts b/src/cli/commands/status/action.ts index 0d1d8ca3e..1ef72aaca 100644 --- a/src/cli/commands/status/action.ts +++ b/src/cli/commands/status/action.ts @@ -3,6 +3,7 @@ import type { AgentCoreProjectSpec, AwsDeploymentTargets, DeployedResourceState, import { getAgentRuntimeStatus } from '../../aws'; import { getEvaluator, getOnlineEvaluationConfig } from '../../aws/agentcore-control'; import { getHarness } from '../../aws/agentcore-harness'; +import { dnsSuffix } from '../../aws/partition'; import { getErrorMessage } from '../../errors'; import { ExecLogger } from '../../logging'; import type { ResourceDeploymentState } from './constants'; @@ -21,6 +22,8 @@ export interface ResourceStatusEntry { | 'online-eval' | 'policy-engine' | 'policy' + | 'config-bundle' + | 'ab-test' | 'runtime-endpoint'; name: string; deploymentState: ResourceDeploymentState; @@ -126,6 +129,30 @@ function diffResourceSet({ return entries; } +/** + * Build the full gateway invocation URL for an AB test. + * Appends the runtime target name and /invocations path to the gateway base URL. + */ +function buildGatewayInvocationUrl( + gwState: { gatewayId: string; gatewayArn: string; gatewayUrl?: string }, + gwName: string, + project: AgentCoreProjectSpec +): string | undefined { + // Use stored URL or derive from ARN: arn:aws:bedrock-agentcore:{region}:{account}:gateway/{id} + const baseUrl = + gwState.gatewayUrl ?? + (() => { + const region = gwState.gatewayArn.split(':')[3]; + return region + ? `https://${gwState.gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}` + : undefined; + })(); + if (!baseUrl) return undefined; + const gwSpec = (project.httpGateways ?? []).find(gw => gw.name === gwName); + if (!gwSpec) return baseUrl; + return `${baseUrl}/${gwSpec.runtimeRef}/invocations`; +} + export function computeResourceStatuses( project: AgentCoreProjectSpec, resources: DeployedResourceState | undefined @@ -219,6 +246,37 @@ export function computeResourceStatuses( getDeployedKey: item => `${item.engineName}/${item.name}`, }); + const configBundles = diffResourceSet({ + resourceType: 'config-bundle', + localItems: project.configBundles ?? [], + deployedRecord: resources?.configBundles ?? {}, + getIdentifier: deployed => deployed.bundleArn, + getLocalDetail: item => item.description, + }); + + const abTests = diffResourceSet({ + resourceType: 'ab-test', + localItems: project.abTests ?? [], + deployedRecord: resources?.abTests ?? {}, + getIdentifier: deployed => deployed.abTestArn, + getLocalDetail: item => item.description, + }); + + // Enrich deployed AB tests with gateway invocation URL + const httpGatewayState = resources?.httpGateways ?? {}; + for (const entry of abTests) { + if (entry.deploymentState !== 'deployed') continue; + const testSpec = (project.abTests ?? []).find(t => t.name === entry.name); + if (!testSpec) continue; + const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(testSpec.gatewayRef); + const gwName = gwMatch?.[1]; + if (!gwName) continue; + const gwState = httpGatewayState[gwName]; + if (!gwState) continue; + const url = buildGatewayInvocationUrl(gwState, gwName, project); + if (url) entry.invocationUrl = url; + } + // Flatten runtime endpoints for diffing against deployed state const localEndpoints: { name: string; agentName: string; version: number; description?: string }[] = []; for (const runtime of project.runtimes) { @@ -255,6 +313,8 @@ export function computeResourceStatuses( ...onlineEvalConfigs, ...policyEngines, ...policies, + ...configBundles, + ...abTests, ]; } diff --git a/src/cli/commands/status/command.tsx b/src/cli/commands/status/command.tsx index 3db8edd05..bd997a999 100644 --- a/src/cli/commands/status/command.tsx +++ b/src/cli/commands/status/command.tsx @@ -18,6 +18,8 @@ const VALID_RESOURCE_TYPES = [ 'online-eval', 'policy-engine', 'policy', + 'config-bundle', + 'ab-test', ] as const; const VALID_STATES = ['deployed', 'local-only', 'pending-removal'] as const; @@ -60,7 +62,7 @@ export const registerStatus = (program: Command) => { .option('--target ', 'Select deployment target') .option( '--type ', - 'Filter by resource type (agent, harness, runtime-endpoint, memory, credential, gateway, evaluator, online-eval, policy-engine, policy)' + 'Filter by resource type (agent, harness, runtime-endpoint, memory, credential, gateway, evaluator, online-eval, policy-engine, policy, config-bundle, ab-test)' ) .option('--state ', 'Filter by deployment state (deployed, local-only, pending-removal)') .option('--runtime ', 'Filter to a specific runtime') @@ -146,11 +148,14 @@ export const registerStatus = (program: Command) => { const onlineEvals = filtered.filter(r => r.resourceType === 'online-eval'); const policyEngines = filtered.filter(r => r.resourceType === 'policy-engine'); const policies = filtered.filter(r => r.resourceType === 'policy'); + const configBundles = filtered.filter(r => r.resourceType === 'config-bundle'); + const abTests = filtered.filter(r => r.resourceType === 'ab-test'); + // TODO: Add http-gateway resource type when diffResourceSet for HTTP gateways is added to action.ts render( - AgentCore Status (target: {result.targetName} + AgentCore Status (target: {result.targetName || 'No target configured'} {result.targetRegion ? `, ${result.targetRegion}` : ''}) @@ -268,6 +273,33 @@ export const registerStatus = (program: Command) => { )} + {configBundles.length > 0 && ( + + Config Bundles + {configBundles.map(entry => ( + + ))} + + )} + + {abTests.length > 0 && ( + + AB Tests + {abTests.map(entry => ( + + + {entry.invocationUrl && ( + + {' '}Invocation URL: {entry.invocationUrl} + + )} + + ))} + + )} + + {/* TODO: Add HTTP Gateways render section when diffResourceSet is added to action.ts */} + {filtered.length === 0 && No resources match the given filters.} ); diff --git a/src/cli/commands/stop/command.tsx b/src/cli/commands/stop/command.tsx new file mode 100644 index 000000000..7a29dd8da --- /dev/null +++ b/src/cli/commands/stop/command.tsx @@ -0,0 +1,57 @@ +import { ConfigIO } from '../../../lib'; +import { stopBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; +import { getErrorMessage } from '../../errors'; +import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; +import type { Command } from '@commander-js/extra-typings'; +import { Text, render } from 'ink'; +import React from 'react'; + +async function getRegion(cliRegion?: string): Promise { + if (cliRegion) return cliRegion; + try { + const configIO = new ConfigIO(); + const targets = await configIO.resolveAWSDeploymentTargets(); + if (targets.length > 0) return targets[0]!.region; + } catch { + // Fall through to env vars + } + return process.env.AWS_DEFAULT_REGION ?? process.env.AWS_REGION ?? 'us-east-1'; +} + +export const registerStop = (program: Command) => { + const stopCmd = program.command('stop').description(COMMAND_DESCRIPTIONS.stop); + + stopCmd + .command('batch-evaluation') + .description('[preview] Stop a running batch evaluation') + .requiredOption('-i, --id ', 'Batch evaluation ID to stop') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--json', 'Output as JSON') + .action(async (cliOptions: { id: string; region?: string; json?: boolean }) => { + try { + const region = await getRegion(cliOptions.region); + + const result = await stopBatchEvaluation({ + region, + batchEvaluationId: cliOptions.id, + }); + + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, ...result })); + } else { + console.log(`\nBatch evaluation stopped successfully`); + console.log(`ID: ${result.batchEvaluationId}`); + console.log(`Status: ${result.status}\n`); + } + + process.exit(0); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + render(Error: {getErrorMessage(error)}); + } + process.exit(1); + } + }); +}; diff --git a/src/cli/commands/stop/index.ts b/src/cli/commands/stop/index.ts new file mode 100644 index 000000000..1f1a5e1e2 --- /dev/null +++ b/src/cli/commands/stop/index.ts @@ -0,0 +1 @@ +export { registerStop } from '../pause/command'; diff --git a/src/cli/commands/telemetry/__tests__/telemetry.test.ts b/src/cli/commands/telemetry/__tests__/telemetry.test.ts index b0e615fcd..efdfd2f23 100644 --- a/src/cli/commands/telemetry/__tests__/telemetry.test.ts +++ b/src/cli/commands/telemetry/__tests__/telemetry.test.ts @@ -1,5 +1,5 @@ +import { readGlobalConfig } from '../../../../lib/schemas/io/global-config'; import { createTempConfig } from '../../../__tests__/helpers/temp-config'; -import { readGlobalConfig } from '../../../global-config'; import { handleTelemetryDisable, handleTelemetryEnable, handleTelemetryStatus } from '../actions'; import { chmod, mkdir, rm, writeFile } from 'fs/promises'; import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; diff --git a/src/cli/commands/telemetry/actions.ts b/src/cli/commands/telemetry/actions.ts index 90750a0f6..696608e01 100644 --- a/src/cli/commands/telemetry/actions.ts +++ b/src/cli/commands/telemetry/actions.ts @@ -1,4 +1,4 @@ -import { GLOBAL_CONFIG_DIR, GLOBAL_CONFIG_FILE, updateGlobalConfig } from '../../global-config.js'; +import { GLOBAL_CONFIG_DIR, GLOBAL_CONFIG_FILE, updateGlobalConfig } from '../../../lib/schemas/io/global-config.js'; import { resolveTelemetryPreference } from '../../telemetry/config.js'; export async function handleTelemetryDisable( diff --git a/src/cli/commands/update/command.tsx b/src/cli/commands/update/command.tsx index cd7d3b70a..06bb9ebad 100644 --- a/src/cli/commands/update/command.tsx +++ b/src/cli/commands/update/command.tsx @@ -3,11 +3,54 @@ import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; import { handleUpdate } from './action'; import type { Command } from '@commander-js/extra-typings'; import { Text, render } from 'ink'; +import React from 'react'; export const registerUpdate = (program: Command) => { - program - .command('update') - .description(COMMAND_DESCRIPTIONS.update) + const updateCmd = program.command('update').description(COMMAND_DESCRIPTIONS.update); + + // Default action for bare `agentcore update` - backwards compatibility with CLI self-update + updateCmd.option('-c, --check', 'Check for updates without installing').action(async options => { + try { + render(Checking for updates...); + const result = await handleUpdate(options.check ?? false); + + switch (result.status) { + case 'up-to-date': + render(You are already on the latest version ({result.currentVersion})); + break; + case 'newer-local': + render( + + Your version ({result.currentVersion}) is newer than the published version ({result.latestVersion}) + + ); + break; + case 'update-available': + render( + + Update available: {result.currentVersion} → {result.latestVersion} + + ); + render(Run `agentcore update` to install the update.); + break; + case 'updated': + render(Successfully updated to {result.latestVersion}); + break; + case 'update-failed': + render(Failed to install update. Try running: npm install -g @aws/agentcore@latest); + process.exit(1); + break; + } + } catch (error) { + render(Error: {getErrorMessage(error)}); + process.exit(1); + } + }); + + // CLI self-update subcommand + updateCmd + .command('cli') + .description('Update the AgentCore CLI to the latest version') .option('-c, --check', 'Check for updates without installing') .action(async options => { try { @@ -31,7 +74,7 @@ export const registerUpdate = (program: Command) => { Update available: {result.currentVersion} → {result.latestVersion} ); - render(Run `agentcore update` to install the update.); + render(Run `agentcore update cli` to install the update.); break; case 'updated': render(Successfully updated to {result.latestVersion}); diff --git a/src/cli/external-requirements/__tests__/checks-extended.test.ts b/src/cli/external-requirements/__tests__/checks-extended.test.ts index dec5ab209..ecd4fe526 100644 --- a/src/cli/external-requirements/__tests__/checks-extended.test.ts +++ b/src/cli/external-requirements/__tests__/checks-extended.test.ts @@ -54,6 +54,9 @@ describe('requiresUv', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(requiresUv(project)).toBe(true); }); @@ -80,6 +83,9 @@ describe('requiresUv', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(requiresUv(project)).toBe(false); }); @@ -97,6 +103,9 @@ describe('requiresUv', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(requiresUv(project)).toBe(false); }); @@ -125,6 +134,9 @@ describe('requiresContainerRuntime', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(requiresContainerRuntime(project)).toBe(true); }); @@ -151,6 +163,9 @@ describe('requiresContainerRuntime', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(requiresContainerRuntime(project)).toBe(false); }); @@ -168,6 +183,9 @@ describe('requiresContainerRuntime', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(requiresContainerRuntime(project)).toBe(false); }); @@ -202,6 +220,9 @@ describe('requiresContainerRuntime', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(requiresContainerRuntime(project)).toBe(true); }); @@ -270,6 +291,9 @@ describe('checkDependencyVersions', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const result = await checkDependencyVersions(project); @@ -291,6 +315,9 @@ describe('checkDependencyVersions', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const result = await checkDependencyVersions(project); @@ -320,6 +347,9 @@ describe('checkDependencyVersions', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const result = await checkDependencyVersions(project); diff --git a/src/cli/logging/remove-logger.ts b/src/cli/logging/remove-logger.ts index 45f9b1de1..4f659c089 100644 --- a/src/cli/logging/remove-logger.ts +++ b/src/cli/logging/remove-logger.ts @@ -18,7 +18,9 @@ export interface RemoveLoggerOptions { | 'evaluator' | 'online-eval' | 'policy-engine' - | 'policy'; + | 'policy' + | 'config-bundle' + | 'ab-test'; /** Name of the resource being removed */ resourceName: string; } diff --git a/src/cli/operations/ab-test/__tests__/promote.test.ts b/src/cli/operations/ab-test/__tests__/promote.test.ts new file mode 100644 index 000000000..2abf8583c --- /dev/null +++ b/src/cli/operations/ab-test/__tests__/promote.test.ts @@ -0,0 +1,270 @@ +import { promoteABTestConfig } from '../promote'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// Mock ConfigIO — vi.hoisted ensures these are available before the hoisted vi.mock runs +const { mockReadProjectSpec, mockWriteProjectSpec, mockReadDeployedState } = vi.hoisted(() => ({ + mockReadProjectSpec: vi.fn(), + mockWriteProjectSpec: vi.fn(), + mockReadDeployedState: vi.fn(), +})); + +vi.mock('../../../../lib', () => { + class MockConfigIO { + readProjectSpec = mockReadProjectSpec; + writeProjectSpec = mockWriteProjectSpec; + readDeployedState = mockReadDeployedState; + } + return { ConfigIO: MockConfigIO }; +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeConfigBundleProject(testName = 'myTest') { + return { + name: 'TestProject', + runtimes: [], + httpGateways: [], + onlineEvalConfigs: [], + abTests: [ + { + name: testName, + mode: 'config-bundle' as const, + gatewayRef: '{{gateway:my-gw}}', + variants: [ + { + name: 'C' as const, + weight: 50, + variantConfiguration: { + configurationBundle: { bundleArn: 'arn:aws:bundle:control', bundleVersion: 'v1' }, + }, + }, + { + name: 'T1' as const, + weight: 50, + variantConfiguration: { + configurationBundle: { bundleArn: 'arn:aws:bundle:treatment', bundleVersion: 'v2' }, + }, + }, + ], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:aws:eval:config' }, + }, + ], + }; +} + +function makeTargetBasedProject(testName = 'targetTest') { + return { + name: 'TestProject', + runtimes: [ + { + name: 'my-runtime', + endpoints: { + control: { version: '1.0' }, + treatment: { version: '2.0' }, + }, + }, + ], + httpGateways: [ + { + name: 'my-gw', + targets: [ + { name: 'ctrl-target', runtimeRef: 'my-runtime', qualifier: 'control' }, + { name: 'treat-target', runtimeRef: 'my-runtime', qualifier: 'treatment' }, + ], + }, + ], + onlineEvalConfigs: [], + abTests: [ + { + name: testName, + mode: 'target-based' as const, + gatewayRef: '{{gateway:my-gw}}', + variants: [ + { + name: 'C' as const, + weight: 50, + variantConfiguration: { target: { targetName: 'ctrl-target' } }, + }, + { + name: 'T1' as const, + weight: 50, + variantConfiguration: { target: { targetName: 'treat-target' } }, + }, + ], + evaluationConfig: { + perVariantOnlineEvaluationConfig: [ + { treatmentName: 'C' as const, onlineEvaluationConfigArn: 'eval-c' }, + { treatmentName: 'T1' as const, onlineEvaluationConfigArn: 'eval-t1' }, + ], + }, + }, + ], + }; +} + +function makeDeployedState(specName: string, abTestId: string) { + return { + targets: { + default: { + resources: { + abTests: { + [specName]: { abTestId, abTestArn: `arn:aws:ab-test:${abTestId}` }, + }, + }, + }, + }, + }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('promoteABTestConfig', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockWriteProjectSpec.mockResolvedValue(undefined); + }); + + describe('target-based promote', () => { + it('updates control endpoint version to treatment version', async () => { + const project = makeTargetBasedProject(); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockResolvedValue(makeDeployedState('targetTest', 'ab-123')); + + const result = await promoteABTestConfig('ab-123'); + + expect(result.promoted).toBe(true); + expect(result.mode).toBe('target-based'); + expect(result.promotionDetail).toContain('control'); + expect(result.promotionDetail).toContain('2.0'); + + // Verify the project was written with updated control version + expect(mockWriteProjectSpec).toHaveBeenCalledOnce(); + const writtenProject = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenProject.runtimes[0].endpoints.control.version).toBe('2.0'); + }); + }); + + describe('config-bundle promote', () => { + it('copies treatment bundle ref to control', async () => { + const project = makeConfigBundleProject(); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockResolvedValue(makeDeployedState('myTest', 'ab-456')); + + const result = await promoteABTestConfig('ab-456'); + + expect(result.promoted).toBe(true); + expect(result.mode).toBe('config-bundle'); + expect(result.promotionDetail).toContain('arn:aws:bundle:treatment'); + expect(result.promotionDetail).toContain('v2'); + + // Verify the control bundle was updated + expect(mockWriteProjectSpec).toHaveBeenCalledOnce(); + const writtenProject = mockWriteProjectSpec.mock.calls[0]![0]; + const controlVariant = writtenProject.abTests[0].variants.find((v: { name: string }) => v.name === 'C'); + expect(controlVariant.variantConfiguration.configurationBundle.bundleArn).toBe('arn:aws:bundle:treatment'); + expect(controlVariant.variantConfiguration.configurationBundle.bundleVersion).toBe('v2'); + }); + }); + + describe('not found', () => { + it('returns promoted=false with message when AB test not found', async () => { + const project = makeConfigBundleProject(); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockResolvedValue({ targets: { default: { resources: { abTests: {} } } } }); + + const result = await promoteABTestConfig('nonexistent-id'); + + expect(result.promoted).toBe(false); + expect(result.promotionDetail).toContain('not found'); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + }); + + describe('ID-based lookup from deployed state', () => { + it('resolves spec name from deployed state using abTestId', async () => { + const project = makeConfigBundleProject('mySpecTest'); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockResolvedValue(makeDeployedState('mySpecTest', 'ab-789')); + + const result = await promoteABTestConfig('ab-789'); + + expect(result.promoted).toBe(true); + expect(result.mode).toBe('config-bundle'); + // Should have resolved without needing testNameFallback + expect(mockWriteProjectSpec).toHaveBeenCalledOnce(); + }); + + it('searches across multiple targets in deployed state', async () => { + const project = makeConfigBundleProject('crossTarget'); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockResolvedValue({ + targets: { + 'us-east-1': { resources: { abTests: {} } }, + 'us-west-2': { + resources: { + abTests: { + crossTarget: { abTestId: 'ab-cross', abTestArn: 'arn:aws:ab-test:ab-cross' }, + }, + }, + }, + }, + }); + + const result = await promoteABTestConfig('ab-cross'); + + expect(result.promoted).toBe(true); + }); + }); + + describe('name fallback when deployed state missing', () => { + it('falls back to name-based lookup when deployed state throws', async () => { + const project = makeConfigBundleProject('fallbackTest'); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockRejectedValue(new Error('No deployed state')); + + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(vi.fn()); + + const result = await promoteABTestConfig('unknown-id', 'fallbackTest'); + + expect(result.promoted).toBe(true); + expect(result.mode).toBe('config-bundle'); + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('falling back to name')); + + warnSpy.mockRestore(); + }); + + it('falls back to prefixed name match', async () => { + const project = makeConfigBundleProject('myTest'); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockRejectedValue(new Error('No deployed state')); + + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(vi.fn()); + + // testNameFallback uses the prefixed format {projectName}_{testName} + const result = await promoteABTestConfig('unknown-id', 'TestProject_myTest'); + + expect(result.promoted).toBe(true); + + warnSpy.mockRestore(); + }); + + it('returns not found when neither deployed state nor name matches', async () => { + const project = makeConfigBundleProject('myTest'); + mockReadProjectSpec.mockResolvedValue(project); + mockReadDeployedState.mockRejectedValue(new Error('No deployed state')); + + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(vi.fn()); + + const result = await promoteABTestConfig('unknown-id', 'nonexistent'); + + expect(result.promoted).toBe(false); + expect(result.promotionDetail).toContain('not found'); + + warnSpy.mockRestore(); + }); + }); +}); diff --git a/src/cli/operations/ab-test/promote.ts b/src/cli/operations/ab-test/promote.ts new file mode 100644 index 000000000..5f98e52f6 --- /dev/null +++ b/src/cli/operations/ab-test/promote.ts @@ -0,0 +1,124 @@ +import { ConfigIO } from '../../../lib'; + +export interface PromoteABTestResult { + promoted: boolean; + mode?: string; + promotionDetail: string; +} + +/** + * Resolve the spec-level AB test name from a deployed abTestId. + * Looks up which entry in deployed state has that abTestId and returns + * the spec name (the key in the abTests record). + */ +function resolveSpecNameFromDeployedState( + configIO: ConfigIO, + deployedState: { targets: Record } }> }, + abTestId: string +): string | undefined { + for (const target of Object.values(deployedState.targets)) { + const abTests = target.resources?.abTests; + if (!abTests) continue; + for (const [specName, entry] of Object.entries(abTests)) { + if (entry.abTestId === abTestId) { + return specName; + } + } + } + return undefined; +} + +/** + * Apply AB test promotion to agentcore.json. + * Updates the control variant's config to match the treatment variant. + * Does NOT stop the AB test — caller is responsible for that. + * + * @param abTestId - The deployed AB test ID + * @param testNameFallback - Optional name fallback when deployed state is unavailable + */ +export async function promoteABTestConfig(abTestId: string, testNameFallback?: string): Promise { + const configIO = new ConfigIO(); + const project = await configIO.readProjectSpec(); + + // Try to resolve spec name from deployed state + let specName: string | undefined; + try { + const deployedState = await configIO.readDeployedState(); + specName = resolveSpecNameFromDeployedState(configIO, deployedState, abTestId); + } catch { + // Deployed state unavailable + } + + // Fall back to name-based lookup if deployed state didn't resolve + if (!specName && testNameFallback) { + console.warn( + `[promote] Could not resolve AB test ID "${abTestId}" from deployed state; falling back to name "${testNameFallback}".` + ); + const lowerName = testNameFallback.toLowerCase(); + const match = (project.abTests ?? []).find( + t => t.name.toLowerCase() === lowerName || `${project.name}_${t.name}`.toLowerCase() === lowerName + ); + specName = match?.name; + } + + const abTest = specName ? (project.abTests ?? []).find(t => t.name === specName) : undefined; + + if (!abTest) { + return { promoted: false, promotionDetail: `AB test with ID "${abTestId}" not found in project config.` }; + } + + const mode = abTest.mode ?? 'config-bundle'; + + if (abTest.mode === 'target-based') { + const treatmentVariant = abTest.variants.find(v => v.name === 'T1'); + const controlVariant = abTest.variants.find(v => v.name === 'C'); + const controlTargetName = controlVariant?.variantConfiguration.target?.targetName; + const treatmentTargetName = treatmentVariant?.variantConfiguration.target?.targetName; + + const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(abTest.gatewayRef); + const gwName = gwMatch?.[1]; + if (gwName) { + const gw = (project.httpGateways ?? []).find(g => g.name === gwName); + if (gw?.targets) { + const controlTarget = gw.targets.find(t => t.name === controlTargetName); + const treatmentTarget = gw.targets.find(t => t.name === treatmentTargetName); + + if (controlTarget && treatmentTarget) { + const runtime = project.runtimes.find(r => r.name === controlTarget.runtimeRef); + const controlEp = runtime?.endpoints?.[controlTarget.qualifier]; + const treatmentEp = runtime?.endpoints?.[treatmentTarget.qualifier]; + if (controlEp && treatmentEp) { + controlEp.version = treatmentEp.version; + await configIO.writeProjectSpec(project); + return { + promoted: true, + mode, + promotionDetail: `Control endpoint "${controlTarget.qualifier}" updated to version ${treatmentEp.version} (from treatment "${treatmentTarget.qualifier}").`, + }; + } + } + } + } + return { promoted: false, mode, promotionDetail: 'Could not resolve target endpoints for promotion.' }; + } + + // Config-bundle mode + const controlVariant = abTest.variants.find(v => v.name === 'C'); + const treatmentVariant = abTest.variants.find(v => v.name === 'T1'); + if ( + controlVariant?.variantConfiguration.configurationBundle && + treatmentVariant?.variantConfiguration.configurationBundle + ) { + controlVariant.variantConfiguration.configurationBundle = { + ...treatmentVariant.variantConfiguration.configurationBundle, + }; + await configIO.writeProjectSpec(project); + return { + promoted: true, + mode, + promotionDetail: `Control bundle updated to "${treatmentVariant.variantConfiguration.configurationBundle.bundleArn}" version "${treatmentVariant.variantConfiguration.configurationBundle.bundleVersion}".`, + }; + } + + return { promoted: false, mode, promotionDetail: 'Could not resolve config bundles for promotion.' }; +} diff --git a/src/cli/operations/agent/config-bundle-defaults.ts b/src/cli/operations/agent/config-bundle-defaults.ts new file mode 100644 index 000000000..25db93003 --- /dev/null +++ b/src/cli/operations/agent/config-bundle-defaults.ts @@ -0,0 +1,30 @@ +import { ConfigIO } from '../../../lib'; + +export async function createConfigBundleForAgent(agentName: string, configBaseDir: string): Promise { + const configIO = new ConfigIO({ baseDir: configBaseDir }); + const project = await configIO.readProjectSpec(); + + const bundleName = `${agentName}Config`; + if ((project.configBundles ?? []).some(b => b.name === bundleName)) return; + + project.configBundles ??= []; + project.configBundles.push({ + type: 'ConfigurationBundle', + name: bundleName, + description: `Configuration for ${agentName} — managed by agentcore CLI`, + components: { + [`{{runtime:${agentName}}}`]: { + configuration: { + systemPrompt: 'You are a helpful assistant. Use tools when appropriate.', + toolDescriptions: { + add_numbers: 'Return the sum of two numbers', + }, + }, + }, + }, + branchName: 'mainline', + commitMessage: 'Initial configuration', + }); + + await configIO.writeProjectSpec(project); +} diff --git a/src/cli/operations/agent/generate/schema-mapper.ts b/src/cli/operations/agent/generate/schema-mapper.ts index 6d8ca15ab..3ed449236 100644 --- a/src/cli/operations/agent/generate/schema-mapper.ts +++ b/src/cli/operations/agent/generate/schema-mapper.ts @@ -284,5 +284,6 @@ export async function mapGenerateConfigToRenderConfig( dockerfile: config.dockerfile, sessionStorageMountPath: config.sessionStorageMountPath, enableOtel, + hasConfigBundle: config.withConfigBundle, }; } diff --git a/src/cli/operations/agent/generate/write-agent-to-project.ts b/src/cli/operations/agent/generate/write-agent-to-project.ts index 8c05a52ea..728236e54 100644 --- a/src/cli/operations/agent/generate/write-agent-to-project.ts +++ b/src/cli/operations/agent/generate/write-agent-to-project.ts @@ -73,6 +73,9 @@ export async function writeAgentToProject(config: GenerateConfig, options?: Writ agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; await configIO.writeProjectSpec(project); diff --git a/src/cli/operations/config-bundle/__tests__/bundle-name-variants.test.ts b/src/cli/operations/config-bundle/__tests__/bundle-name-variants.test.ts new file mode 100644 index 000000000..5c753cb52 --- /dev/null +++ b/src/cli/operations/config-bundle/__tests__/bundle-name-variants.test.ts @@ -0,0 +1,22 @@ +import { getBundleNameVariants } from '../bundle-name-variants'; +import { describe, expect, it } from 'vitest'; + +describe('getBundleNameVariants', () => { + it('returns only the bundle name when no project name', () => { + expect(getBundleNameVariants('MyBundle')).toEqual(['MyBundle']); + }); + + it('returns only the bundle name when project name is undefined', () => { + expect(getBundleNameVariants('MyBundle', undefined)).toEqual(['MyBundle']); + }); + + it('returns three variants when project name is provided', () => { + const variants = getBundleNameVariants('MyBundle', 'testevo'); + expect(variants).toEqual(['MyBundle', 'testevoMyBundle', 'testevo_MyBundle']); + }); + + it('filters out empty bundle name', () => { + const variants = getBundleNameVariants('', 'proj'); + expect(variants).toEqual(['proj', 'proj_']); + }); +}); diff --git a/src/cli/operations/config-bundle/__tests__/resolve-bundle.test.ts b/src/cli/operations/config-bundle/__tests__/resolve-bundle.test.ts new file mode 100644 index 000000000..6ecbeb71e --- /dev/null +++ b/src/cli/operations/config-bundle/__tests__/resolve-bundle.test.ts @@ -0,0 +1,103 @@ +import { resolveBundleByName } from '../resolve-bundle'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockListConfigurationBundles, mockListConfigurationBundleVersions } = vi.hoisted(() => ({ + mockListConfigurationBundles: vi.fn(), + mockListConfigurationBundleVersions: vi.fn(), +})); + +vi.mock('../../../aws/agentcore-config-bundles', () => ({ + listConfigurationBundles: mockListConfigurationBundles, + listConfigurationBundleVersions: mockListConfigurationBundleVersions, +})); + +const mockConfigIO = { + readDeployedState: vi.fn(), + readProjectSpec: vi.fn(), +} as any; + +const REGION = 'us-east-1'; + +describe('resolveBundleByName', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockConfigIO.readDeployedState.mockResolvedValue({ targets: {} }); + mockConfigIO.readProjectSpec.mockResolvedValue({ name: 'testproj' }); + }); + + it('resolves via deployed state fast path', async () => { + mockConfigIO.readDeployedState.mockResolvedValue({ + targets: { + 'us-east-1': { + resources: { + configBundles: { + MyBundle: { bundleId: 'bundle-123', bundleArn: 'arn:bundle', versionId: 'v1' }, + }, + }, + }, + }, + }); + mockListConfigurationBundleVersions.mockResolvedValue({ + versions: [{ versionId: 'v2', versionCreatedAt: '2026-01-01T00:00:00Z' }], + }); + + const result = await resolveBundleByName('MyBundle', REGION, mockConfigIO); + expect(result.bundleId).toBe('bundle-123'); + expect(result.versionId).toBe('v2'); + expect(mockListConfigurationBundles).not.toHaveBeenCalled(); + }); + + it('falls back to API when deployed state is empty', async () => { + mockListConfigurationBundles.mockResolvedValue({ + bundles: [{ bundleId: 'bundle-456', bundleArn: 'arn:bundle-456', bundleName: 'testprojMyBundle' }], + nextToken: undefined, + }); + mockListConfigurationBundleVersions.mockResolvedValue({ + versions: [{ versionId: 'v1', versionCreatedAt: '2026-01-01T00:00:00Z' }], + }); + + const result = await resolveBundleByName('MyBundle', REGION, mockConfigIO); + expect(result.bundleId).toBe('bundle-456'); + }); + + it('matches legacy underscore-prefixed name', async () => { + mockListConfigurationBundles.mockResolvedValue({ + bundles: [{ bundleId: 'bundle-789', bundleArn: 'arn:bundle-789', bundleName: 'testproj_MyBundle' }], + nextToken: undefined, + }); + mockListConfigurationBundleVersions.mockResolvedValue({ + versions: [{ versionId: 'v1', versionCreatedAt: '2026-01-01T00:00:00Z' }], + }); + + const result = await resolveBundleByName('MyBundle', REGION, mockConfigIO); + expect(result.bundleId).toBe('bundle-789'); + }); + + it('paginates through multiple pages to find bundle', async () => { + mockListConfigurationBundles + .mockResolvedValueOnce({ + bundles: [{ bundleId: 'other-1', bundleArn: 'arn:other', bundleName: 'OtherBundle' }], + nextToken: 'page2', + }) + .mockResolvedValueOnce({ + bundles: [{ bundleId: 'bundle-found', bundleArn: 'arn:found', bundleName: 'testprojMyBundle' }], + nextToken: undefined, + }); + mockListConfigurationBundleVersions.mockResolvedValue({ + versions: [{ versionId: 'v1', versionCreatedAt: '2026-01-01T00:00:00Z' }], + }); + + const result = await resolveBundleByName('MyBundle', REGION, mockConfigIO); + expect(result.bundleId).toBe('bundle-found'); + expect(mockListConfigurationBundles).toHaveBeenCalledTimes(2); + }); + + it('throws when bundle not found after all pages', async () => { + mockListConfigurationBundles.mockResolvedValue({ + bundles: [{ bundleId: 'other', bundleArn: 'arn:other', bundleName: 'SomeOtherBundle' }], + nextToken: undefined, + }); + + await expect(resolveBundleByName('MyBundle', REGION, mockConfigIO)).rejects.toThrow('not found'); + }); +}); diff --git a/src/cli/operations/config-bundle/bundle-name-variants.ts b/src/cli/operations/config-bundle/bundle-name-variants.ts new file mode 100644 index 000000000..a282b9ad3 --- /dev/null +++ b/src/cli/operations/config-bundle/bundle-name-variants.ts @@ -0,0 +1,11 @@ +/** + * Returns all possible API-side names for a config bundle. + * The API stores bundles with a project-name prefix, but users reference them by local name. + */ +export function getBundleNameVariants(bundleName: string, projectName?: string): string[] { + return [ + bundleName, + projectName ? `${projectName}${bundleName}` : undefined, + projectName ? `${projectName}_${bundleName}` : undefined, + ].filter((x): x is string => Boolean(x)); +} diff --git a/src/cli/operations/config-bundle/diff-versions.ts b/src/cli/operations/config-bundle/diff-versions.ts new file mode 100644 index 000000000..cc9ae6ed9 --- /dev/null +++ b/src/cli/operations/config-bundle/diff-versions.ts @@ -0,0 +1,63 @@ +/** + * Client-side deep diff between two config bundle version components. + */ + +export interface DiffEntry { + path: string; + type: 'added' | 'removed' | 'changed'; + oldValue?: unknown; + newValue?: unknown; +} + +/** + * Deep diff two JSON objects, returning a flat list of changes with dot-notation paths. + */ +export function deepDiff(from: unknown, to: unknown, prefix = ''): DiffEntry[] { + const entries: DiffEntry[] = []; + + if (from === to) return entries; + + if (from === null || to === null || typeof from !== typeof to) { + if (from === undefined) { + entries.push({ path: prefix, type: 'added', newValue: to }); + } else if (to === undefined) { + entries.push({ path: prefix, type: 'removed', oldValue: from }); + } else { + entries.push({ path: prefix, type: 'changed', oldValue: from, newValue: to }); + } + return entries; + } + + if (typeof from !== 'object') { + entries.push({ path: prefix, type: 'changed', oldValue: from, newValue: to }); + return entries; + } + + if (Array.isArray(from) || Array.isArray(to)) { + if (!Array.isArray(from) || !Array.isArray(to) || from.length !== to.length) { + entries.push({ path: prefix, type: 'changed', oldValue: from, newValue: to }); + return entries; + } + for (let i = 0; i < from.length; i++) { + entries.push(...deepDiff(from[i], to[i], `${prefix}[${i}]`)); + } + return entries; + } + + const fromObj = from as Record; + const toObj = to as Record; + const allKeys = new Set([...Object.keys(fromObj), ...Object.keys(toObj)]); + + for (const key of allKeys) { + const childPath = prefix ? `${prefix}.${key}` : key; + if (!(key in fromObj)) { + entries.push({ path: childPath, type: 'added', newValue: toObj[key] }); + } else if (!(key in toObj)) { + entries.push({ path: childPath, type: 'removed', oldValue: fromObj[key] }); + } else { + entries.push(...deepDiff(fromObj[key], toObj[key], childPath)); + } + } + + return entries; +} diff --git a/src/cli/operations/config-bundle/resolve-bundle.ts b/src/cli/operations/config-bundle/resolve-bundle.ts new file mode 100644 index 000000000..964c705c1 --- /dev/null +++ b/src/cli/operations/config-bundle/resolve-bundle.ts @@ -0,0 +1,91 @@ +/** + * Resolves a config bundle name to its bundle ID. + * + * Fast path: reads deployed-state.json for known bundle IDs. + * Fallback: calls listConfigurationBundles API to find by name. + */ +import { ConfigIO } from '../../../lib'; +import { listConfigurationBundleVersions, listConfigurationBundles } from '../../aws/agentcore-config-bundles'; +import { getBundleNameVariants } from './bundle-name-variants'; + +export interface ResolvedBundle { + bundleId: string; + bundleArn?: string; + versionId?: string; + region: string; +} + +/** + * Resolve a bundle name to its API identifiers. + * Tries deployed-state.json first, then falls back to list API. + */ +export async function resolveBundleByName( + bundleName: string, + region: string, + configIO: ConfigIO = new ConfigIO() +): Promise { + // Fast path: check deployed state + const deployedState = await configIO.readDeployedState(); + for (const targetName of Object.keys(deployedState.targets ?? {})) { + const target = deployedState.targets?.[targetName]; + const bundles = target?.resources?.configBundles; + const bundle = bundles?.[bundleName]; + if (bundle) { + // Verify the bundle still exists by listing versions (branch-agnostic) + try { + const versions = await listConfigurationBundleVersions({ + region, + bundleId: bundle.bundleId, + maxResults: 1, + }); + const latestVersion = versions.versions[0]; + return { + bundleId: bundle.bundleId, + bundleArn: bundle.bundleArn, + versionId: latestVersion?.versionId ?? bundle.versionId, + region, + }; + } catch { + // Stale deployed-state entry — fall through to API lookup + } + } + } + + // Fallback: search via API + // The API stores bundles with a prefixed name: {projectName}{bundleName} + let projectName: string | undefined; + try { + const projectSpec = await configIO.readProjectSpec(); + projectName = projectSpec.name; + } catch { + // Project spec may not be available + } + + const nameVariants = getBundleNameVariants(bundleName, projectName); + let nextToken: string | undefined; + let match: { bundleId: string; bundleArn: string; bundleName: string } | undefined; + do { + const page = await listConfigurationBundles({ region, maxResults: 100, nextToken }); + match = page.bundles.find(b => nameVariants.includes(b.bundleName)); + nextToken = page.nextToken; + } while (!match && nextToken); + + if (!match) { + throw new Error(`Configuration bundle "${bundleName}" not found. Has it been deployed?`); + } + + // Get the latest version ID (branch-agnostic) + const versions = await listConfigurationBundleVersions({ + region, + bundleId: match.bundleId, + maxResults: 1, + }); + const latestVersion = versions.versions[0]; + + return { + bundleId: match.bundleId, + bundleArn: match.bundleArn, + versionId: latestVersion?.versionId, + region, + }; +} diff --git a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts new file mode 100644 index 000000000..ef23302cb --- /dev/null +++ b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts @@ -0,0 +1,597 @@ +import type { AgentCoreProjectSpec, DeployedResourceState } from '../../../../schema'; +import { deleteOrphanedABTests, setupABTests } from '../post-deploy-ab-tests.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// ── Hoisted mocks ────────────────────────────────────────────────────────── + +const { + mockCreateABTest, + mockDeleteABTest, + mockGetABTest, + mockUpdateABTest, + mockListABTests, + mockGetCredentialProvider, + mockIAMSend, +} = vi.hoisted(() => ({ + mockCreateABTest: vi.fn(), + mockDeleteABTest: vi.fn(), + mockGetABTest: vi.fn(), + mockUpdateABTest: vi.fn(), + mockListABTests: vi.fn(), + mockGetCredentialProvider: vi.fn().mockReturnValue(undefined), + mockIAMSend: vi.fn(), +})); + +vi.mock('../../../aws/agentcore-ab-tests', () => ({ + createABTest: mockCreateABTest, + deleteABTest: mockDeleteABTest, + getABTest: mockGetABTest, + updateABTest: mockUpdateABTest, + listABTests: mockListABTests, +})); + +vi.mock('../../../aws/account', () => ({ + getCredentialProvider: mockGetCredentialProvider, +})); + +vi.mock('@aws-sdk/client-iam', () => ({ + IAMClient: class { + send = mockIAMSend; + }, + CreateRoleCommand: class { + constructor(public input: unknown) {} + }, + PutRolePolicyCommand: class { + constructor(public input: unknown) {} + }, + DeleteRolePolicyCommand: class { + constructor(public input: unknown) {} + }, + DeleteRoleCommand: class { + constructor(public input: unknown) {} + }, +})); + +// ── Helpers ──────────────────────────────────────────────────────────────── + +function makeProjectSpec(abTests: AgentCoreProjectSpec['abTests'] = []): AgentCoreProjectSpec { + return { + name: 'TestProject', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + memories: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + agentCoreGateways: [], + policyEngines: [], + harnesses: [], + configBundles: [], + httpGateways: [], + abTests, + }; +} + +const sampleABTest = { + name: 'TestOne', + mode: 'config-bundle' as const, + gatewayRef: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:gateway/gw-123', + variants: [ + { + name: 'C' as const, + weight: 80, + variantConfiguration: { configurationBundle: { bundleArn: 'arn:bundle:control', bundleVersion: 'v1' } }, + }, + { + name: 'T1' as const, + weight: 20, + variantConfiguration: { configurationBundle: { bundleArn: 'arn:bundle:treatment', bundleVersion: 'v1' } }, + }, + ], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval:config' }, + roleArn: 'arn:aws:iam::123456789012:role/ExistingRole', +}; + +// ── Tests ────────────────────────────────────────────────────────────────── + +describe('setupABTests', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockListABTests.mockResolvedValue({ abTests: [] }); + mockUpdateABTest.mockResolvedValue({}); + mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'STOPPED' }); + }); + + describe('creation', () => { + it('creates new AB test when not in deployed state', async () => { + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-001', abTestArn: 'arn:abt:001' }); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([sampleABTest]), + }); + + expect(result.hasErrors).toBe(false); + expect(result.results).toHaveLength(1); + expect(result.results[0]!.status).toBe('created'); + expect(result.results[0]!.abTestId).toBe('abt-001'); + expect(result.abTests.TestOne).toEqual( + expect.objectContaining({ abTestId: 'abt-001', abTestArn: 'arn:abt:001' }) + ); + }); + + it('updates already-deployed test', async () => { + mockUpdateABTest.mockResolvedValue({ abTestId: 'abt-existing', abTestArn: 'arn:abt:existing' }); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([sampleABTest]), + existingABTests: { + TestOne: { abTestId: 'abt-existing', abTestArn: 'arn:abt:existing' }, + }, + }); + + expect(result.results[0]!.status).toBe('updated'); + expect(mockCreateABTest).not.toHaveBeenCalled(); + expect(mockUpdateABTest).toHaveBeenCalled(); + }); + + it('updates test found via API list (state loss recovery)', async () => { + mockListABTests.mockResolvedValue({ + abTests: [{ name: 'TestOne', abTestId: 'abt-api', abTestArn: 'arn:abt:api' }], + }); + mockUpdateABTest.mockResolvedValue({ abTestId: 'abt-api', abTestArn: 'arn:abt:api' }); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([sampleABTest]), + }); + + expect(result.results[0]!.status).toBe('updated'); + expect(result.abTests.TestOne!.abTestId).toBe('abt-api'); + expect(mockCreateABTest).not.toHaveBeenCalled(); + expect(mockUpdateABTest).toHaveBeenCalled(); + }); + + it('auto-creates IAM role when roleArn not provided', async () => { + const testWithoutRole = { ...sampleABTest, roleArn: undefined }; + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-002', abTestArn: 'arn:abt:002' }); + mockIAMSend.mockResolvedValue({ Role: { Arn: 'arn:aws:iam::123:role/AutoRole' } }); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([testWithoutRole]), + }); + + expect(result.results[0]!.status).toBe('created'); + expect(result.abTests.TestOne!.roleCreatedByCli).toBe(true); + expect(mockIAMSend).toHaveBeenCalled(); + }); + + it('uses provided roleArn without creating IAM role', async () => { + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-003', abTestArn: 'arn:abt:003' }); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([sampleABTest]), + }); + + expect(result.results[0]!.status).toBe('created'); + expect(result.abTests.TestOne!.roleCreatedByCli).toBe(false); + expect(mockIAMSend).not.toHaveBeenCalled(); + }); + + it('reports error when createABTest fails', async () => { + mockCreateABTest.mockRejectedValue(new Error('API failure')); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([sampleABTest]), + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toBe('API failure'); + }); + }); + + describe('ARN resolution', () => { + it('resolves bundle name to ARN from deployed state', async () => { + const testWithNames = { + ...sampleABTest, + variants: [ + { + name: 'C' as const, + weight: 80, + variantConfiguration: { configurationBundle: { bundleArn: 'my-bundle', bundleVersion: 'LATEST' } }, + }, + { + name: 'T1' as const, + weight: 20, + variantConfiguration: { configurationBundle: { bundleArn: 'my-bundle', bundleVersion: 'v2' } }, + }, + ], + }; + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-004', abTestArn: 'arn:abt:004' }); + + await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([testWithNames]), + deployedResources: { + configBundles: { + 'my-bundle': { bundleArn: 'arn:bundle:resolved', versionId: 'ver-latest' }, + }, + } as unknown as DeployedResourceState, + }); + + const callArgs = mockCreateABTest.mock.calls[0]![0]; + expect(callArgs.variants[0].variantConfiguration.configurationBundle.bundleArn).toBe('arn:bundle:resolved'); + expect(callArgs.variants[0].variantConfiguration.configurationBundle.bundleVersion).toBe('ver-latest'); + expect(callArgs.variants[1].variantConfiguration.configurationBundle.bundleVersion).toBe('v2'); + }); + + it('resolves gateway placeholder to ARN', async () => { + const testWithPlaceholder = { + ...sampleABTest, + gatewayRef: '{{gateway:my-gw}}', + }; + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-005', abTestArn: 'arn:abt:005' }); + + await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([testWithPlaceholder]), + deployedResources: { + mcp: { + gateways: { + 'my-gw': { gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/resolved-gw' }, + }, + }, + } as unknown as DeployedResourceState, + }); + + expect(mockCreateABTest.mock.calls[0]![0].gatewayArn).toBe( + 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/resolved-gw' + ); + }); + + it('resolves gateway placeholder to ARN from HTTP gateways', async () => { + const testWithPlaceholder = { + ...sampleABTest, + gatewayRef: '{{gateway:my-http-gw}}', + }; + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-007', abTestArn: 'arn:abt:007' }); + + await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([testWithPlaceholder]), + deployedResources: { + httpGateways: { + 'my-http-gw': { + gatewayId: 'httpgw-001', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/httpgw-001', + }, + }, + } as unknown as DeployedResourceState, + }); + + expect(mockCreateABTest.mock.calls[0]![0].gatewayArn).toBe( + 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/httpgw-001' + ); + }); + + it('resolves online eval config name to ARN', async () => { + const testWithEvalName = { + ...sampleABTest, + evaluationConfig: { onlineEvaluationConfigArn: 'my-eval-config' }, + }; + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-006', abTestArn: 'arn:abt:006' }); + + await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([testWithEvalName]), + deployedResources: { + onlineEvalConfigs: { + 'my-eval-config': { onlineEvaluationConfigArn: 'arn:eval:resolved' }, + }, + } as unknown as DeployedResourceState, + }); + + expect(mockCreateABTest.mock.calls[0]![0].evaluationConfig.onlineEvaluationConfigArn).toBe('arn:eval:resolved'); + }); + }); + + describe('deletion (reconciliation)', () => { + it('stops, polls until executionStatus is STOPPED, then deletes orphaned AB test', async () => { + const callOrder: string[] = []; + mockUpdateABTest.mockImplementation(() => { + callOrder.push('stop'); + return Promise.resolve({}); + }); + let getCallCount = 0; + mockGetABTest.mockImplementation(() => { + getCallCount++; + callOrder.push(`poll(${getCallCount})`); + // First poll: executionStatus not yet STOPPED (still transitioning) + if (getCallCount === 1) return Promise.resolve({ status: 'ACTIVE', executionStatus: 'RUNNING' }); + // Second poll: executionStatus is STOPPED — done + return Promise.resolve({ status: 'ACTIVE', executionStatus: 'STOPPED' }); + }); + mockDeleteABTest.mockImplementation(() => { + callOrder.push('delete'); + return Promise.resolve({ success: true }); + }); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + RemovedTest: { abTestId: 'abt-old', abTestArn: 'arn:abt:old' }, + }, + }); + + // Verify: stop → poll (RUNNING) → poll (STOPPED) → delete + expect(callOrder).toEqual(['stop', 'poll(1)', 'poll(2)', 'delete']); + expect(mockUpdateABTest).toHaveBeenCalledWith({ + region: 'us-east-1', + abTestId: 'abt-old', + executionStatus: 'STOPPED', + }); + expect(result.results[0]!.status).toBe('deleted'); + }); + + it('proceeds with delete when stop fails (already stopped)', async () => { + mockUpdateABTest.mockRejectedValue(new Error('Cannot update in current state')); + mockDeleteABTest.mockResolvedValue({ success: true }); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + RemovedTest: { abTestId: 'abt-stopped', abTestArn: 'arn:abt:stopped' }, + }, + }); + + expect(mockUpdateABTest).toHaveBeenCalled(); + expect(mockDeleteABTest).toHaveBeenCalled(); + expect(result.results[0]!.status).toBe('deleted'); + }); + + it('cleans up auto-created IAM role on deletion', async () => { + mockDeleteABTest.mockResolvedValue({ success: true }); + mockIAMSend.mockResolvedValue({}); + + await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + RemovedTest: { + abTestId: 'abt-old', + abTestArn: 'arn:abt:old', + roleArn: 'arn:aws:iam::123:role/AutoCreatedRole', + roleCreatedByCli: true, + }, + }, + }); + + // Should have called delete policy + delete role + expect(mockIAMSend).toHaveBeenCalledTimes(2); + + // Verify first call is DeleteRolePolicyCommand + const firstCall = mockIAMSend.mock.calls[0]![0]; + expect(firstCall.input).toEqual( + expect.objectContaining({ RoleName: 'AutoCreatedRole', PolicyName: expect.any(String) }) + ); + + // Verify second call is DeleteRoleCommand + const secondCall = mockIAMSend.mock.calls[1]![0]; + expect(secondCall.input).toEqual(expect.objectContaining({ RoleName: 'AutoCreatedRole' })); + }); + + it('does not delete role when roleCreatedByCli is false', async () => { + mockDeleteABTest.mockResolvedValue({ success: true }); + + await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + RemovedTest: { + abTestId: 'abt-old', + abTestArn: 'arn:abt:old', + roleArn: 'arn:aws:iam::123:role/UserRole', + roleCreatedByCli: false, + }, + }, + }); + + expect(mockIAMSend).not.toHaveBeenCalled(); + }); + + it('reports error when deletion fails', async () => { + mockDeleteABTest.mockRejectedValue(new Error('delete failed')); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + FailTest: { abTestId: 'abt-fail', abTestArn: 'arn:abt:fail' }, + }, + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toBe('delete failed'); + }); + + it('sets warning when AB test was stopped before deletion', async () => { + mockUpdateABTest.mockResolvedValue({}); + mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'STOPPED' }); + mockDeleteABTest.mockResolvedValue({ success: true }); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + StoppedTest: { abTestId: 'abt-warn', abTestArn: 'arn:abt:warn' }, + }, + }); + + expect(result.results[0]!.status).toBe('deleted'); + expect(result.results[0]!.warning).toBe('AB test "StoppedTest" was stopped before deletion'); + }); + + it('does not set warning when stop fails (already stopped)', async () => { + mockUpdateABTest.mockRejectedValue(new Error('Cannot update')); + mockDeleteABTest.mockResolvedValue({ success: true }); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + AlreadyStopped: { abTestId: 'abt-no-warn', abTestArn: 'arn:abt:no-warn' }, + }, + }); + + expect(result.results[0]!.status).toBe('deleted'); + expect(result.results[0]!.warning).toBeUndefined(); + }); + + it('proceeds with delete even when poll never reaches STOPPED (timeout)', async () => { + mockUpdateABTest.mockResolvedValue({}); + // executionStatus never becomes STOPPED — always RUNNING + mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'RUNNING' }); + mockDeleteABTest.mockResolvedValue({ success: true }); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + StuckTest: { abTestId: 'abt-stuck', abTestArn: 'arn:abt:stuck' }, + }, + }); + + // Should still attempt delete after exhausting poll loop + expect(mockDeleteABTest).toHaveBeenCalledWith({ region: 'us-east-1', abTestId: 'abt-stuck' }); + expect(result.results[0]!.status).toBe('deleted'); + // Poll was called 20 times (the loop limit) + expect(mockGetABTest).toHaveBeenCalledTimes(20); + // Should warn that polling timed out + expect(result.results[0]!.warning).toBe( + 'AB test "StuckTest" did not reach STOPPED status within the polling window — proceeding with delete' + ); + }, 120_000); + + it('sets warning even when deleteABTest returns success: false', async () => { + mockUpdateABTest.mockResolvedValue({}); + mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'STOPPED' }); + mockDeleteABTest.mockResolvedValue({ success: false, error: 'still running' }); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + FailAfterStop: { abTestId: 'abt-fail-stop', abTestArn: 'arn:abt:fail-stop' }, + }, + }); + + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toBe('still running'); + // Warning should still be set because stop succeeded + expect(result.results[0]!.warning).toBe('AB test "FailAfterStop" was stopped before deletion'); + }); + }); + + describe('IAM role creation', () => { + it('creates role with correct trust policy and inline policy', async () => { + const testWithoutRole = { ...sampleABTest, roleArn: undefined }; + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-iam', abTestArn: 'arn:abt:iam' }); + mockIAMSend.mockResolvedValue({ Role: { Arn: 'arn:aws:iam::123:role/AutoRole' } }); + + await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([testWithoutRole]), + }); + + // First call: CreateRoleCommand with trust policy + const createRoleCall = mockIAMSend.mock.calls[0]![0]; + const trustPolicy = JSON.parse(createRoleCall.input.AssumeRolePolicyDocument); + expect(trustPolicy.Statement).toHaveLength(1); + expect(trustPolicy.Statement[0].Principal.Service).toBe('bedrock-agentcore.amazonaws.com'); + + // Second call: PutRolePolicyCommand with inline policy + const putPolicyCall = mockIAMSend.mock.calls[1]![0]; + const policy = JSON.parse(putPolicyCall.input.PolicyDocument); + const sids = policy.Statement.map((s: { Sid: string }) => s.Sid); + expect(sids).toContain('GatewayRuleStatement'); + expect(sids).toContain('GatewayReadStatement'); + expect(sids).toContain('GatewayListStatement'); + expect(sids).toContain('OnlineEvaluationConfigStatement'); + expect(sids).toContain('ConfigurationBundleReadStatement'); + expect(sids).toContain('CloudWatchLogReadStatement'); + expect(sids).toContain('CloudWatchIndexPolicyStatement'); + + // ListGateways must use wildcard resource (can't be scoped) + const listGatewayStmt = policy.Statement.find((s: { Sid: string }) => s.Sid === 'GatewayListStatement'); + expect(listGatewayStmt.Resource).toEqual(['*']); + }); + }); + + describe('edge cases', () => { + it('proceeds with creation when listABTests fails', async () => { + mockListABTests.mockRejectedValue(new Error('API unavailable')); + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-new', abTestArn: 'arn:abt:new' }); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([sampleABTest]), + }); + + expect(result.results[0]!.status).toBe('created'); + expect(mockCreateABTest).toHaveBeenCalled(); + }); + + it('swallows errors during IAM role deletion', async () => { + mockDeleteABTest.mockResolvedValue({ success: true }); + mockIAMSend.mockRejectedValue(new Error('IAM permission denied')); + + const result = await deleteOrphanedABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingABTests: { + OldTest: { + abTestId: 'abt-old', + abTestArn: 'arn:abt:old', + roleArn: 'arn:aws:iam::123:role/SomeRole', + roleCreatedByCli: true, + }, + }, + }); + + // Deletion should still succeed even though IAM cleanup failed + expect(result.results[0]!.status).toBe('deleted'); + }); + }); + + describe('mixed operations', () => { + it('creates new and updates existing', async () => { + const newTest = { ...sampleABTest, name: 'NewTest' }; + const keptTest = { ...sampleABTest, name: 'KeptTest' }; + + mockCreateABTest.mockResolvedValue({ abTestId: 'abt-new', abTestArn: 'arn:abt:new' }); + mockUpdateABTest.mockResolvedValue({ abTestId: 'abt-kept', abTestArn: 'arn:abt:kept' }); + + const result = await setupABTests({ + region: 'us-east-1', + projectSpec: makeProjectSpec([newTest, keptTest]), + existingABTests: { + KeptTest: { abTestId: 'abt-kept', abTestArn: 'arn:abt:kept' }, + }, + }); + + expect(result.results).toHaveLength(2); + const statuses = result.results.map(r => `${r.testName}:${r.status}`); + expect(statuses).toContain('NewTest:created'); + expect(statuses).toContain('KeptTest:updated'); + }); + }); +}); diff --git a/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts new file mode 100644 index 000000000..34be18b88 --- /dev/null +++ b/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts @@ -0,0 +1,652 @@ +import type { AgentCoreProjectSpec, DeployedState } from '../../../../schema'; +import { resolveConfigBundleComponentKeys, setupConfigBundles } from '../post-deploy-config-bundles.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { + mockCreateConfigurationBundle, + mockDeleteConfigurationBundle, + mockGetConfigurationBundleVersion, + mockListConfigurationBundleVersions, + mockListConfigurationBundles, + mockUpdateConfigurationBundle, +} = vi.hoisted(() => ({ + mockCreateConfigurationBundle: vi.fn(), + mockDeleteConfigurationBundle: vi.fn(), + mockGetConfigurationBundleVersion: vi.fn(), + mockListConfigurationBundleVersions: vi.fn(), + mockListConfigurationBundles: vi.fn(), + mockUpdateConfigurationBundle: vi.fn(), +})); + +vi.mock('../../../aws/agentcore-config-bundles', () => ({ + createConfigurationBundle: mockCreateConfigurationBundle, + deleteConfigurationBundle: mockDeleteConfigurationBundle, + getConfigurationBundleVersion: mockGetConfigurationBundleVersion, + listConfigurationBundleVersions: mockListConfigurationBundleVersions, + listConfigurationBundles: mockListConfigurationBundles, + updateConfigurationBundle: mockUpdateConfigurationBundle, +})); + +const REGION = 'us-west-2'; + +function makeProjectSpec(configBundles: Record[]) { + return { name: 'TestProject', configBundles } as any; +} + +describe('setupConfigBundles', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('create new bundle', () => { + it('should create a new bundle when not in existingBundles and not found by name', async () => { + mockListConfigurationBundles.mockResolvedValue({ bundles: [] }); + mockCreateConfigurationBundle.mockResolvedValue({ + bundleId: 'b-new', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-new', + versionId: 'v-1', + }); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { name: 'MyBundle', type: 'ConfigurationBundle', components: { foo: { type: 'inline', value: 'bar' } } }, + ]), + }); + + expect(mockCreateConfigurationBundle).toHaveBeenCalledWith( + expect.objectContaining({ + region: REGION, + bundleName: 'TestProjectMyBundle', + components: { foo: { type: 'inline', value: 'bar' } }, + commitMessage: 'Create MyBundle', + }) + ); + expect(result.hasErrors).toBe(false); + expect(result.results).toHaveLength(1); + expect(result.results[0]).toMatchObject({ bundleName: 'MyBundle', status: 'created', bundleId: 'b-new' }); + expect(result.configBundles.MyBundle).toEqual({ + bundleId: 'b-new', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-new', + versionId: 'v-1', + }); + }); + }); + + describe('update existing bundle', () => { + it('should update an existing bundle when components have changed', async () => { + const existingBundles = { + MyBundle: { + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + }, + }; + + mockGetConfigurationBundleVersion.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + components: { foo: { type: 'inline', value: 'old' } }, + description: undefined, + lineageMetadata: { branchName: 'main' }, + }); + + mockUpdateConfigurationBundle.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-2', + }); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { name: 'MyBundle', type: 'ConfigurationBundle', components: { foo: { type: 'inline', value: 'new' } } }, + ]), + existingBundles, + }); + + expect(mockUpdateConfigurationBundle).toHaveBeenCalledWith( + expect.objectContaining({ + region: REGION, + bundleId: 'b-123', + components: { foo: { type: 'inline', value: 'new' } }, + parentVersionIds: ['v-1'], + branchName: 'main', + commitMessage: 'Update MyBundle', + }) + ); + expect(result.results[0]).toMatchObject({ status: 'updated', versionId: 'v-2' }); + expect(result.hasErrors).toBe(false); + }); + }); + + describe('skip unchanged bundle', () => { + it('should skip update when components and description are unchanged', async () => { + const components = { foo: { type: 'inline', value: 'same' } }; + const existingBundles = { + MyBundle: { + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + }, + }; + + mockGetConfigurationBundleVersion.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + components, + description: 'My desc', + lineageMetadata: { branchName: 'main' }, + }); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { name: 'MyBundle', type: 'ConfigurationBundle', components, description: 'My desc' }, + ]), + existingBundles, + }); + + expect(mockUpdateConfigurationBundle).not.toHaveBeenCalled(); + expect(mockCreateConfigurationBundle).not.toHaveBeenCalled(); + expect(result.results[0]).toMatchObject({ bundleName: 'MyBundle', status: 'skipped', versionId: 'v-1' }); + expect(result.configBundles.MyBundle).toEqual(existingBundles.MyBundle); + }); + }); + + describe('deep equal is key-order-independent', () => { + it('should skip update when components differ only in key order', async () => { + const existingBundles = { + MyBundle: { + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + }, + }; + + // API returns keys in one order + mockGetConfigurationBundleVersion.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + components: { a: { type: 'inline', value: '1' }, b: { type: 'inline', value: '2' } }, + description: undefined, + lineageMetadata: { branchName: 'main' }, + }); + + // Spec has same keys in different order + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { + name: 'MyBundle', + components: { b: { type: 'inline', value: '2' }, a: { type: 'inline', value: '1' } }, + }, + ]), + existingBundles, + }); + + expect(mockUpdateConfigurationBundle).not.toHaveBeenCalled(); + expect(result.results[0]).toMatchObject({ status: 'skipped' }); + }); + }); + + describe('delete orphaned bundles', () => { + it('should delete bundles in existingBundles but not in projectSpec', async () => { + const existingBundles = { + OrphanBundle: { + bundleId: 'b-orphan', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-orphan', + versionId: 'v-1', + }, + }; + + mockDeleteConfigurationBundle.mockResolvedValue(undefined); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([]), + existingBundles, + }); + + expect(mockDeleteConfigurationBundle).toHaveBeenCalledWith({ + region: REGION, + bundleId: 'b-orphan', + }); + expect(result.results[0]).toMatchObject({ bundleName: 'OrphanBundle', status: 'deleted' }); + expect(result.hasErrors).toBe(false); + }); + + it('should report error status when delete throws', async () => { + const existingBundles = { + OrphanBundle: { + bundleId: 'b-orphan', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-orphan', + versionId: 'v-1', + }, + }; + + mockDeleteConfigurationBundle.mockRejectedValue(new Error('Access denied')); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([]), + existingBundles, + }); + + expect(result.results[0]).toMatchObject({ bundleName: 'OrphanBundle', status: 'error', error: 'Access denied' }); + expect(result.hasErrors).toBe(true); + }); + }); + + describe('uses branch from API when bundleSpec has no branchName', () => { + it('should use branchName from getConfigurationBundleVersion lineageMetadata', async () => { + const existingBundles = { + MyBundle: { + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + }, + }; + + mockGetConfigurationBundleVersion.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + components: { old: { type: 'inline', value: 'data' } }, + description: undefined, + lineageMetadata: { branchName: 'feature-branch' }, + }); + + mockUpdateConfigurationBundle.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-2', + }); + + await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { + name: 'MyBundle', + components: { new: { type: 'inline', value: 'data' } }, + // no branchName specified + }, + ]), + existingBundles, + }); + + expect(mockUpdateConfigurationBundle).toHaveBeenCalledWith( + expect.objectContaining({ + branchName: 'feature-branch', + }) + ); + }); + + it('should prefer bundleSpec branchName over API branchName', async () => { + const existingBundles = { + MyBundle: { + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + }, + }; + + mockGetConfigurationBundleVersion.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + components: { old: { type: 'inline', value: 'data' } }, + description: undefined, + lineageMetadata: { branchName: 'api-branch' }, + }); + + mockUpdateConfigurationBundle.mockResolvedValue({ + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-2', + }); + + await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { + name: 'MyBundle', + components: { new: { type: 'inline', value: 'data' } }, + branchName: 'spec-branch', + }, + ]), + existingBundles, + }); + + expect(mockUpdateConfigurationBundle).toHaveBeenCalledWith( + expect.objectContaining({ + branchName: 'spec-branch', + }) + ); + }); + }); + + describe('fallback path via findBundleByName', () => { + it('should fall through to findBundleByName when getConfigurationBundleVersion throws 404', async () => { + const existingBundles = { + MyBundle: { + bundleId: 'b-old', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-old', + versionId: 'v-old', + }, + }; + + // First call (existing bundle path) throws 404 + mockGetConfigurationBundleVersion.mockRejectedValueOnce(new Error('404 not found')).mockResolvedValueOnce({ + bundleId: 'b-found', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-found', + versionId: 'v-latest', + components: { old: { type: 'inline', value: 'data' } }, + description: undefined, + lineageMetadata: { branchName: 'main' }, + }); + + mockListConfigurationBundles.mockResolvedValue({ + bundles: [{ bundleId: 'b-found', bundleName: 'TestProjectMyBundle' }], + }); + + mockListConfigurationBundleVersions.mockResolvedValue({ + versions: [{ versionId: 'v-latest', versionCreatedAt: 1234567890 }], + }); + + mockUpdateConfigurationBundle.mockResolvedValue({ + bundleId: 'b-found', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-found', + versionId: 'v-new', + }); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { + name: 'MyBundle', + components: { new: { type: 'inline', value: 'data' } }, + }, + ]), + existingBundles, + }); + + expect(mockListConfigurationBundles).toHaveBeenCalledWith({ region: REGION, maxResults: 100 }); + expect(mockListConfigurationBundleVersions).toHaveBeenCalledWith({ + region: REGION, + bundleId: 'b-found', + }); + expect(result.results[0]).toMatchObject({ status: 'updated', bundleId: 'b-found', versionId: 'v-new' }); + expect(result.hasErrors).toBe(false); + }); + + it('should create a new bundle when findBundleByName returns nothing after 404', async () => { + const existingBundles = { + MyBundle: { + bundleId: 'b-old', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-old', + versionId: 'v-old', + }, + }; + + mockGetConfigurationBundleVersion.mockRejectedValueOnce(new Error('404 not found')); + mockListConfigurationBundles.mockResolvedValue({ bundles: [] }); + mockCreateConfigurationBundle.mockResolvedValue({ + bundleId: 'b-new', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-new', + versionId: 'v-1', + }); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { name: 'MyBundle', type: 'ConfigurationBundle', components: { x: { type: 'inline', value: '1' } } }, + ]), + existingBundles, + }); + + expect(mockCreateConfigurationBundle).toHaveBeenCalled(); + expect(result.results[0]).toMatchObject({ status: 'created', bundleId: 'b-new' }); + }); + }); + + describe('error handling', () => { + it('should report error status when create fails', async () => { + mockListConfigurationBundles.mockResolvedValue({ bundles: [] }); + mockCreateConfigurationBundle.mockRejectedValue(new Error('Service unavailable')); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { name: 'MyBundle', type: 'ConfigurationBundle', components: { x: { type: 'inline', value: '1' } } }, + ]), + }); + + expect(result.results[0]).toMatchObject({ + bundleName: 'MyBundle', + status: 'error', + error: 'Service unavailable', + }); + expect(result.hasErrors).toBe(true); + }); + + it('should report error status when update fails with non-404 error', async () => { + const existingBundles = { + MyBundle: { + bundleId: 'b-123', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', + versionId: 'v-1', + }, + }; + + mockGetConfigurationBundleVersion.mockRejectedValue(new Error('Throttling exception')); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([ + { name: 'MyBundle', type: 'ConfigurationBundle', components: { x: { type: 'inline', value: '1' } } }, + ]), + existingBundles, + }); + + expect(result.results[0]).toMatchObject({ + bundleName: 'MyBundle', + status: 'error', + error: 'Throttling exception', + }); + expect(result.hasErrors).toBe(true); + // Should NOT fall through to findBundleByName + expect(mockListConfigurationBundles).not.toHaveBeenCalled(); + }); + + it('should report error when delete throws an exception', async () => { + const existingBundles = { + OrphanBundle: { + bundleId: 'b-orphan', + bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-orphan', + versionId: 'v-1', + }, + }; + + mockDeleteConfigurationBundle.mockRejectedValue(new Error('Network error')); + + const result = await setupConfigBundles({ + region: REGION, + projectSpec: makeProjectSpec([]), + existingBundles, + }); + + expect(result.results[0]).toMatchObject({ + bundleName: 'OrphanBundle', + status: 'error', + error: 'Network error', + }); + expect(result.hasErrors).toBe(true); + }); + }); +}); + +// ── resolveConfigBundleComponentKeys ─────────────────────────────────────── + +describe('resolveConfigBundleComponentKeys', () => { + function makeFullProjectSpec(configBundles: AgentCoreProjectSpec['configBundles'] = []): AgentCoreProjectSpec { + return { + name: 'TestProject', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + memories: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + agentCoreGateways: [], + policyEngines: [], + harnesses: [], + configBundles, + httpGateways: [], + abTests: [], + }; + } + + function makeDeployedState(targetName: string, resources: Record): DeployedState { + return { + targets: { + [targetName]: { resources }, + }, + } as unknown as DeployedState; + } + + it('returns projectSpec unchanged when target has no resources', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{runtime:my-rt}}': { configuration: { k: 'v' } } } } as any, + ]); + const deployedState = { targets: {} } as unknown as DeployedState; + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'missing-target'); + expect(result).toBe(spec); // same reference — no transformation + }); + + it('resolves {{runtime:name}} placeholder to runtime ARN', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{runtime:my-agent}}': { configuration: { k: 'v' } } } } as any, + ]); + const deployedState = makeDeployedState('target1', { + runtimes: { 'my-agent': { runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1' } }, + }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + const keys = Object.keys(result.configBundles[0]!.components); + expect(keys).toEqual(['arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1']); + }); + + it('resolves {{gateway:name}} placeholder to HTTP gateway ARN', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{gateway:my-gw}}': { configuration: { k: 'v' } } } } as any, + ]); + const deployedState = makeDeployedState('target1', { + httpGateways: { 'my-gw': { gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-1' } }, + }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + const keys = Object.keys(result.configBundles[0]!.components); + expect(keys).toEqual(['arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-1']); + }); + + it('resolves {{gateway:name}} placeholder to MCP gateway ARN', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{gateway:my-mcp-gw}}': { configuration: { k: 'v' } } } } as any, + ]); + const deployedState = makeDeployedState('target1', { + mcp: { gateways: { 'my-mcp-gw': { gatewayArn: 'arn:mcp:gw:resolved' } } }, + }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + const keys = Object.keys(result.configBundles[0]!.components); + expect(keys).toEqual(['arn:mcp:gw:resolved']); + }); + + it('passes through keys that are already ARNs', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { 'arn:existing:key': { configuration: { k: 'v' } } } } as any, + ]); + const deployedState = makeDeployedState('target1', { runtimes: {} }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + const keys = Object.keys(result.configBundles[0]!.components); + expect(keys).toEqual(['arn:existing:key']); + }); + + it('passes through plain string keys that are not placeholders or ARNs', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { 'some-plain-key': { configuration: { k: 'v' } } } } as any, + ]); + const deployedState = makeDeployedState('target1', { runtimes: {} }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + const keys = Object.keys(result.configBundles[0]!.components); + expect(keys).toEqual(['some-plain-key']); + }); + + it('throws when gateway placeholder references non-existent gateway', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{gateway:missing}}': { configuration: {} } } } as any, + ]); + const deployedState = makeDeployedState('target1', { httpGateways: {}, mcp: { gateways: {} } }); + + expect(() => resolveConfigBundleComponentKeys(spec, deployedState, 'target1')).toThrow( + 'Config bundle references gateway "missing" but it was not found in deployed resources' + ); + }); + + it('throws when runtime placeholder references non-existent runtime', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{runtime:missing}}': { configuration: {} } } } as any, + ]); + const deployedState = makeDeployedState('target1', { runtimes: {} }); + + expect(() => resolveConfigBundleComponentKeys(spec, deployedState, 'target1')).toThrow( + 'Config bundle references runtime "missing" but it was not found in deployed resources' + ); + }); + + it('handles projectSpec with no configBundles', () => { + const spec = makeFullProjectSpec([]); + const deployedState = makeDeployedState('target1', { runtimes: {} }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + expect(result.configBundles).toEqual([]); + }); + + it('does not mutate the original projectSpec', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{runtime:my-rt}}': { configuration: { k: 'v' } } } } as any, + ]); + const deployedState = makeDeployedState('target1', { + runtimes: { 'my-rt': { runtimeArn: 'arn:resolved' } }, + }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + // Original should still have the placeholder + expect(Object.keys(spec.configBundles[0]!.components)).toEqual(['{{runtime:my-rt}}']); + // Result should have the resolved key + expect(Object.keys(result.configBundles[0]!.components)).toEqual(['arn:resolved']); + }); + + it('prefers HTTP gateway over MCP gateway when both exist with same name', () => { + const spec = makeFullProjectSpec([ + { name: 'b1', components: { '{{gateway:dupe-gw}}': { configuration: {} } } } as any, + ]); + const deployedState = makeDeployedState('target1', { + httpGateways: { 'dupe-gw': { gatewayArn: 'arn:http:gw' } }, + mcp: { gateways: { 'dupe-gw': { gatewayArn: 'arn:mcp:gw' } } }, + }); + + const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); + const keys = Object.keys(result.configBundles[0]!.components); + // HTTP gateway should take precedence (checked first in code) + expect(keys).toEqual(['arn:http:gw']); + }); +}); diff --git a/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts new file mode 100644 index 000000000..f395c52f3 --- /dev/null +++ b/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts @@ -0,0 +1,436 @@ +import type { AgentCoreProjectSpec, DeployedResourceState, HttpGatewayDeployedState } from '../../../../schema'; +import { deleteOrphanedHttpGateways, setupHttpGateways } from '../post-deploy-http-gateways.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// ── Hoisted mocks ────────────────────────────────────────────────────────── + +const { + mockCreateHttpGateway, + mockCreateHttpGatewayTarget, + mockDeleteHttpGateway, + mockDeleteHttpGatewayTarget, + mockListAllHttpGateways, + mockListHttpGatewayTargets, + mockWaitForGatewayReady, + mockWaitForTargetReady, + mockGetCredentialProvider, + mockIAMSend, +} = vi.hoisted(() => ({ + mockCreateHttpGateway: vi.fn(), + mockCreateHttpGatewayTarget: vi.fn(), + mockDeleteHttpGateway: vi.fn(), + mockDeleteHttpGatewayTarget: vi.fn(), + mockListAllHttpGateways: vi.fn(), + mockListHttpGatewayTargets: vi.fn(), + mockWaitForGatewayReady: vi.fn(), + mockWaitForTargetReady: vi.fn(), + mockGetCredentialProvider: vi.fn().mockReturnValue(undefined), + mockIAMSend: vi.fn(), +})); + +vi.mock('../../../aws/agentcore-http-gateways', () => ({ + createHttpGateway: mockCreateHttpGateway, + createHttpGatewayTarget: mockCreateHttpGatewayTarget, + deleteHttpGateway: mockDeleteHttpGateway, + deleteHttpGatewayTarget: mockDeleteHttpGatewayTarget, + listAllHttpGateways: mockListAllHttpGateways, + listHttpGatewayTargets: mockListHttpGatewayTargets, + waitForGatewayReady: mockWaitForGatewayReady, + waitForTargetReady: mockWaitForTargetReady, +})); + +vi.mock('../../../aws/account', () => ({ + getCredentialProvider: mockGetCredentialProvider, +})); + +vi.mock('@aws-sdk/client-iam', () => ({ + IAMClient: class { + send = mockIAMSend; + }, + CreateRoleCommand: class { + constructor(public input: unknown) {} + }, + GetRoleCommand: class { + constructor(public input: unknown) {} + }, + PutRolePolicyCommand: class { + constructor(public input: unknown) {} + }, + DeleteRolePolicyCommand: class { + constructor(public input: unknown) {} + }, + DeleteRoleCommand: class { + constructor(public input: unknown) {} + }, +})); + +// ── Helpers ──────────────────────────────────────────────────────────────── + +function makeProjectSpec(httpGateways: AgentCoreProjectSpec['httpGateways'] = []): AgentCoreProjectSpec { + return { + name: 'TestProject', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + memories: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + agentCoreGateways: [], + policyEngines: [], + harnesses: [], + configBundles: [], + abTests: [], + httpGateways, + }; +} + +const sampleHttpGateway = { + name: 'MyHttpGw', + runtimeRef: 'my-agent', + roleArn: 'arn:aws:iam::123456789012:role/ExistingRole', +}; + +const sampleDeployedResources = { + runtimes: { + 'my-agent': { + runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-123', + runtimeId: 'rt-123', + }, + }, +} as unknown as DeployedResourceState; + +// ── Tests ────────────────────────────────────────────────────────────────── + +describe('setupHttpGateways', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockListAllHttpGateways.mockResolvedValue([]); + mockListHttpGatewayTargets.mockResolvedValue({ targets: [] }); + mockWaitForGatewayReady.mockResolvedValue({ gatewayId: 'gw-001', status: 'READY' }); + mockWaitForTargetReady.mockResolvedValue({}); + }); + + describe('creation', () => { + it('creates gateway + target for new spec entry', async () => { + mockCreateHttpGateway.mockResolvedValue({ + gatewayId: 'gw-001', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/gw-001', + }); + mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-001' }); + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([sampleHttpGateway]), + deployedResources: sampleDeployedResources, + }); + + expect(result.hasErrors).toBe(false); + expect(result.results).toHaveLength(1); + expect(result.results[0]!.status).toBe('created'); + expect(result.results[0]!.gatewayId).toBe('gw-001'); + expect(result.httpGateways.MyHttpGw).toEqual( + expect.objectContaining({ + gatewayId: 'gw-001', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/gw-001', + targetId: 'tgt-001', + }) + ); + + expect(mockCreateHttpGateway).toHaveBeenCalledWith({ + region: 'us-east-1', + name: 'MyHttpGw', + roleArn: 'arn:aws:iam::123456789012:role/ExistingRole', + }); + expect(mockCreateHttpGatewayTarget).toHaveBeenCalledWith({ + region: 'us-east-1', + gatewayId: 'gw-001', + targetName: 'my-agent', + runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-123', + }); + }); + + it('skips existing gateway', async () => { + const existingHttpGateways: Record = { + MyHttpGw: { + gatewayId: 'gw-existing', + gatewayArn: 'arn:httpgw:existing', + targetId: 'tgt-existing', + }, + }; + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([sampleHttpGateway]), + existingHttpGateways, + deployedResources: sampleDeployedResources, + }); + + expect(result.results[0]!.status).toBe('skipped'); + expect(result.results[0]!.gatewayId).toBe('gw-existing'); + expect(mockCreateHttpGateway).not.toHaveBeenCalled(); + expect(mockCreateHttpGatewayTarget).not.toHaveBeenCalled(); + }); + + it('finds gateway by name via list (state loss recovery)', async () => { + mockListAllHttpGateways.mockResolvedValue([ + { name: 'MyHttpGw', gatewayId: 'gw-api', gatewayArn: 'arn:httpgw:api' }, + ]); + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([sampleHttpGateway]), + deployedResources: sampleDeployedResources, + }); + + expect(result.results[0]!.status).toBe('skipped'); + expect(result.httpGateways.MyHttpGw!.gatewayId).toBe('gw-api'); + expect(mockCreateHttpGateway).not.toHaveBeenCalled(); + }); + + it('reports error on missing runtime ref', async () => { + const emptyDeployedResources = {} as unknown as DeployedResourceState; + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([sampleHttpGateway]), + deployedResources: emptyDeployedResources, + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toContain('Runtime "my-agent" not found'); + expect(mockCreateHttpGateway).not.toHaveBeenCalled(); + }); + + it('auto-creates IAM role when roleArn not provided', async () => { + const gwWithoutRole = { ...sampleHttpGateway, roleArn: undefined }; + mockCreateHttpGateway.mockResolvedValue({ + gatewayId: 'gw-002', + gatewayArn: 'arn:httpgw:002', + }); + mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-002' }); + mockIAMSend.mockResolvedValue({ Role: { Arn: 'arn:aws:iam::123:role/AutoRole' } }); + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([gwWithoutRole]), + deployedResources: sampleDeployedResources, + }); + + expect(result.results[0]!.status).toBe('created'); + expect(result.httpGateways.MyHttpGw!.roleCreatedByCli).toBe(true); + expect(mockIAMSend).toHaveBeenCalled(); + + // Verify CreateRoleCommand was sent with correct trust policy + const createRoleCall = mockIAMSend.mock.calls[0]![0]; + const trustPolicy = JSON.parse(createRoleCall.input.AssumeRolePolicyDocument); + expect(trustPolicy.Statement[0].Principal.Service).toBe('bedrock-agentcore.amazonaws.com'); + + // Verify PutRolePolicyCommand was sent with correct inline policy actions + const putPolicyCall = mockIAMSend.mock.calls[1]![0]; + const inlinePolicy = JSON.parse(putPolicyCall.input.PolicyDocument); + const actions = inlinePolicy.Statement[0].Action; + expect(actions).toContain('bedrock-agentcore:InvokeRuntime'); + expect(actions).toContain('bedrock-agentcore:InvokeAgent'); + expect(actions).toContain('bedrock-agentcore:InvokeAgentRuntime'); + expect(inlinePolicy.Statement[0].Resource).toBe('*'); + }); + + it('rollback on target creation failure', async () => { + mockCreateHttpGateway.mockResolvedValue({ + gatewayId: 'gw-rollback', + gatewayArn: 'arn:httpgw:rollback', + }); + mockCreateHttpGatewayTarget.mockRejectedValue(new Error('Target creation failed')); + mockDeleteHttpGateway.mockResolvedValue({ success: true }); + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([sampleHttpGateway]), + deployedResources: sampleDeployedResources, + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toContain('Target creation failed'); + expect(result.results[0]!.error).toContain('gateway rolled back'); + + // Verify rollback: deleteHttpGateway was called + expect(mockDeleteHttpGateway).toHaveBeenCalledWith({ + region: 'us-east-1', + gatewayId: 'gw-rollback', + }); + }); + }); + + describe('deletion (reconciliation)', () => { + it('deletes orphaned gateway not in project spec', async () => { + mockDeleteHttpGateway.mockResolvedValue({ success: true }); + mockDeleteHttpGatewayTarget.mockResolvedValue({ success: true }); + + const result = await deleteOrphanedHttpGateways({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingHttpGateways: { + RemovedGw: { + gatewayId: 'gw-old', + gatewayArn: 'arn:httpgw:old', + targetId: 'tgt-old', + }, + }, + }); + + expect(mockDeleteHttpGatewayTarget).toHaveBeenCalledWith({ + region: 'us-east-1', + gatewayId: 'gw-old', + targetId: 'tgt-old', + }); + expect(mockDeleteHttpGateway).toHaveBeenCalledWith({ + region: 'us-east-1', + gatewayId: 'gw-old', + }); + expect(result.results[0]!.status).toBe('deleted'); + }); + + it('cleans up auto-created IAM role on deletion', async () => { + mockDeleteHttpGateway.mockResolvedValue({ success: true }); + mockIAMSend.mockResolvedValue({}); + + await deleteOrphanedHttpGateways({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingHttpGateways: { + RemovedGw: { + gatewayId: 'gw-old', + gatewayArn: 'arn:httpgw:old', + roleArn: 'arn:aws:iam::123:role/AutoCreatedRole', + roleCreatedByCli: true, + }, + }, + }); + + // Should have called delete policy + delete role + expect(mockIAMSend).toHaveBeenCalledTimes(2); + + // Verify first call is DeleteRolePolicyCommand + const firstCall = mockIAMSend.mock.calls[0]![0]; + expect(firstCall.input).toEqual( + expect.objectContaining({ RoleName: 'AutoCreatedRole', PolicyName: expect.any(String) }) + ); + + // Verify second call is DeleteRoleCommand + const secondCall = mockIAMSend.mock.calls[1]![0]; + expect(secondCall.input).toEqual(expect.objectContaining({ RoleName: 'AutoCreatedRole' })); + }); + + it('reports error when deletion fails', async () => { + mockDeleteHttpGateway.mockRejectedValue(new Error('delete failed')); + + const result = await deleteOrphanedHttpGateways({ + region: 'us-east-1', + projectSpec: makeProjectSpec([]), + existingHttpGateways: { + FailGw: { gatewayId: 'gw-fail', gatewayArn: 'arn:httpgw:fail' }, + }, + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toBe('delete failed'); + }); + }); + + describe('edge cases', () => { + it('proceeds with creation when listHttpGateways fails', async () => { + mockListAllHttpGateways.mockRejectedValue(new Error('API unavailable')); + mockCreateHttpGateway.mockResolvedValue({ + gatewayId: 'gw-new', + gatewayArn: 'arn:httpgw:new', + }); + mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-new' }); + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([sampleHttpGateway]), + deployedResources: sampleDeployedResources, + }); + + expect(result.results[0]!.status).toBe('created'); + expect(mockCreateHttpGateway).toHaveBeenCalled(); + }); + + it('uses provided roleArn without creating IAM role', async () => { + mockCreateHttpGateway.mockResolvedValue({ + gatewayId: 'gw-003', + gatewayArn: 'arn:httpgw:003', + }); + mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-003' }); + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([sampleHttpGateway]), + deployedResources: sampleDeployedResources, + }); + + expect(result.results[0]!.status).toBe('created'); + expect(result.httpGateways.MyHttpGw!.roleCreatedByCli).toBe(false); + expect(mockIAMSend).not.toHaveBeenCalled(); + }); + }); + + describe('mixed operations', () => { + it('creates new and skips existing (orphan deletion is a separate pass)', async () => { + const newGw = { ...sampleHttpGateway, name: 'NewGw' }; + const keptGw = { ...sampleHttpGateway, name: 'KeptGw' }; + + mockCreateHttpGateway.mockResolvedValue({ + gatewayId: 'gw-new', + gatewayArn: 'arn:httpgw:new', + }); + mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-new' }); + mockDeleteHttpGateway.mockResolvedValue({ success: true }); + + const result = await setupHttpGateways({ + region: 'us-east-1', + projectName: 'TestProject', + projectSpec: makeProjectSpec([newGw, keptGw]), + existingHttpGateways: { + KeptGw: { gatewayId: 'gw-kept', gatewayArn: 'arn:httpgw:kept' }, + OrphanGw: { gatewayId: 'gw-orphan', gatewayArn: 'arn:httpgw:orphan' }, + }, + deployedResources: sampleDeployedResources, + }); + + expect(result.results).toHaveLength(2); + const statuses = result.results.map(r => `${r.gatewayName}:${r.status}`); + expect(statuses).toContain('NewGw:created'); + expect(statuses).toContain('KeptGw:skipped'); + }); + + it('deleteOrphanedHttpGateways removes orphans separately', async () => { + mockDeleteHttpGateway.mockResolvedValue({ success: true }); + + const result = await deleteOrphanedHttpGateways({ + region: 'us-east-1', + projectSpec: makeProjectSpec([{ ...sampleHttpGateway, name: 'KeptGw' }]), + existingHttpGateways: { + KeptGw: { gatewayId: 'gw-kept', gatewayArn: 'arn:httpgw:kept' }, + OrphanGw: { gatewayId: 'gw-orphan', gatewayArn: 'arn:httpgw:orphan' }, + }, + }); + + expect(result.results).toHaveLength(1); + expect(result.results[0]!.gatewayName).toBe('OrphanGw'); + expect(result.results[0]!.status).toBe('deleted'); + }); + }); +}); diff --git a/src/cli/operations/deploy/__tests__/post-deploy-observability.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-observability.test.ts index 9155a699d..ba069f29e 100644 --- a/src/cli/operations/deploy/__tests__/post-deploy-observability.test.ts +++ b/src/cli/operations/deploy/__tests__/post-deploy-observability.test.ts @@ -1,23 +1,23 @@ import { setupTransactionSearch } from '../post-deploy-observability.js'; import { beforeEach, describe, expect, it, vi } from 'vitest'; -const { mockEnableTransactionSearch, mockReadCliConfig } = vi.hoisted(() => ({ +const { mockEnableTransactionSearch, mockReadGlobalConfigSync } = vi.hoisted(() => ({ mockEnableTransactionSearch: vi.fn(), - mockReadCliConfig: vi.fn(), + mockReadGlobalConfigSync: vi.fn(), })); vi.mock('../../../aws/transaction-search', () => ({ enableTransactionSearch: mockEnableTransactionSearch, })); -vi.mock('../../../../lib/schemas/io/cli-config', () => ({ - readCliConfig: mockReadCliConfig, +vi.mock('../../../../lib/schemas/io/global-config', () => ({ + readGlobalConfigSync: mockReadGlobalConfigSync, })); describe('setupTransactionSearch', () => { beforeEach(() => { vi.clearAllMocks(); - mockReadCliConfig.mockReturnValue({}); + mockReadGlobalConfigSync.mockReturnValue({}); mockEnableTransactionSearch.mockResolvedValue({ success: true }); }); @@ -33,7 +33,7 @@ describe('setupTransactionSearch', () => { }); it('passes custom transactionSearchIndexPercentage from config', async () => { - mockReadCliConfig.mockReturnValue({ transactionSearchIndexPercentage: 25 }); + mockReadGlobalConfigSync.mockReturnValue({ transactionSearchIndexPercentage: 25 }); const result = await setupTransactionSearch({ region: 'us-east-1', @@ -57,7 +57,7 @@ describe('setupTransactionSearch', () => { }); it('skips when disableTransactionSearch is true in config', async () => { - mockReadCliConfig.mockReturnValue({ disableTransactionSearch: true }); + mockReadGlobalConfigSync.mockReturnValue({ disableTransactionSearch: true }); const result = await setupTransactionSearch({ region: 'us-east-1', diff --git a/src/cli/operations/deploy/__tests__/post-deploy-online-evals.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-online-evals.test.ts new file mode 100644 index 000000000..8120167ae --- /dev/null +++ b/src/cli/operations/deploy/__tests__/post-deploy-online-evals.test.ts @@ -0,0 +1,179 @@ +import { enableOnlineEvalConfigs } from '../post-deploy-online-evals'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockUpdateOnlineEvalExecutionStatus } = vi.hoisted(() => ({ + mockUpdateOnlineEvalExecutionStatus: vi.fn(), +})); + +vi.mock('../../../aws/agentcore-control', () => ({ + updateOnlineEvalExecutionStatus: mockUpdateOnlineEvalExecutionStatus, +})); + +function makeOnlineEvalConfig(overrides: Record = {}) { + return { + name: 'MyEval', + agent: 'my-agent', + evaluators: ['Builtin.Faithfulness'], + samplingRate: 10, + enableOnCreate: true, + ...overrides, + }; +} + +const deployedConfigs = { + MyEval: { + onlineEvaluationConfigId: 'oec-123', + onlineEvaluationConfigArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:online-evaluation-config/oec-123', + }, +}; + +describe('enableOnlineEvalConfigs', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockUpdateOnlineEvalExecutionStatus.mockResolvedValue({ + configId: 'oec-123', + executionStatus: 'ENABLED', + status: 'ACTIVE', + }); + }); + + describe('enablement', () => { + it('enables config with enableOnCreate true', async () => { + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [makeOnlineEvalConfig()], + deployedOnlineEvalConfigs: deployedConfigs, + }); + + expect(result.hasErrors).toBe(false); + expect(result.results).toHaveLength(1); + expect(result.results[0]!.status).toBe('enabled'); + expect(mockUpdateOnlineEvalExecutionStatus).toHaveBeenCalledWith({ + region: 'us-east-1', + onlineEvaluationConfigId: 'oec-123', + executionStatus: 'ENABLED', + }); + }); + + it('enables config when enableOnCreate is undefined (defaults to enable)', async () => { + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [makeOnlineEvalConfig({ enableOnCreate: undefined })], + deployedOnlineEvalConfigs: deployedConfigs, + }); + + expect(result.hasErrors).toBe(false); + expect(result.results[0]!.status).toBe('enabled'); + expect(mockUpdateOnlineEvalExecutionStatus).toHaveBeenCalled(); + }); + + it('skips config with enableOnCreate false', async () => { + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [makeOnlineEvalConfig({ enableOnCreate: false })], + deployedOnlineEvalConfigs: deployedConfigs, + }); + + expect(result.hasErrors).toBe(false); + expect(result.results[0]!.status).toBe('skipped'); + expect(mockUpdateOnlineEvalExecutionStatus).not.toHaveBeenCalled(); + }); + }); + + describe('error handling', () => { + it('reports error when config not in deployed state', async () => { + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [makeOnlineEvalConfig({ name: 'Missing' })], + deployedOnlineEvalConfigs: deployedConfigs, + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toContain('not found in deployed state'); + }); + + it('reports error when API call fails', async () => { + mockUpdateOnlineEvalExecutionStatus.mockRejectedValue(new Error('AccessDenied')); + + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [makeOnlineEvalConfig()], + deployedOnlineEvalConfigs: deployedConfigs, + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('error'); + expect(result.results[0]!.error).toBe('AccessDenied'); + }); + + it('hasErrors is true when any config fails', async () => { + mockUpdateOnlineEvalExecutionStatus + .mockResolvedValueOnce({ configId: 'oec-123', executionStatus: 'ENABLED', status: 'ACTIVE' }) + .mockRejectedValueOnce(new Error('Throttled')); + + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [makeOnlineEvalConfig({ name: 'MyEval' }), makeOnlineEvalConfig({ name: 'OtherEval' })], + deployedOnlineEvalConfigs: { + ...deployedConfigs, + OtherEval: { + onlineEvaluationConfigId: 'oec-456', + onlineEvaluationConfigArn: + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:online-evaluation-config/oec-456', + }, + }, + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]!.status).toBe('enabled'); + expect(result.results[1]!.status).toBe('error'); + }); + }); + + describe('multiple configs', () => { + it('processes multiple configs independently', async () => { + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [makeOnlineEvalConfig({ name: 'MyEval' }), makeOnlineEvalConfig({ name: 'OtherEval' })], + deployedOnlineEvalConfigs: { + ...deployedConfigs, + OtherEval: { + onlineEvaluationConfigId: 'oec-456', + onlineEvaluationConfigArn: + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:online-evaluation-config/oec-456', + }, + }, + }); + + expect(result.hasErrors).toBe(false); + expect(result.results).toHaveLength(2); + expect(result.results[0]!.status).toBe('enabled'); + expect(result.results[1]!.status).toBe('enabled'); + expect(mockUpdateOnlineEvalExecutionStatus).toHaveBeenCalledTimes(2); + }); + + it('mixed enableOnCreate values', async () => { + const result = await enableOnlineEvalConfigs({ + region: 'us-east-1', + onlineEvalConfigs: [ + makeOnlineEvalConfig({ name: 'MyEval', enableOnCreate: true }), + makeOnlineEvalConfig({ name: 'OtherEval', enableOnCreate: false }), + ], + deployedOnlineEvalConfigs: { + ...deployedConfigs, + OtherEval: { + onlineEvaluationConfigId: 'oec-456', + onlineEvaluationConfigArn: + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:online-evaluation-config/oec-456', + }, + }, + }); + + expect(result.hasErrors).toBe(false); + expect(result.results[0]!.status).toBe('enabled'); + expect(result.results[1]!.status).toBe('skipped'); + expect(mockUpdateOnlineEvalExecutionStatus).toHaveBeenCalledTimes(1); + }); + }); +}); diff --git a/src/cli/operations/deploy/__tests__/preflight.test.ts b/src/cli/operations/deploy/__tests__/preflight.test.ts index ff4af8c5f..bca98bfd6 100644 --- a/src/cli/operations/deploy/__tests__/preflight.test.ts +++ b/src/cli/operations/deploy/__tests__/preflight.test.ts @@ -32,10 +32,20 @@ vi.mock('../../../../lib/index.js', () => ({ resolveAWSDeploymentTargets = mockReadAWSDeploymentTargets; readDeployedState = mockReadDeployedState; configExists = mockConfigExists; + getPathResolver = () => ({ getAgentConfigPath: () => '/tmp/mock-agentcore.json' }); }, requireConfigRoot: mockRequireConfigRoot, })); +vi.mock('node:fs', async importOriginal => { + const actual = await importOriginal(); + return { + ...actual, + readFileSync: () => JSON.stringify({}), + writeFileSync: vi.fn(), + }; +}); + vi.mock('../../../cdk/local-cdk-project.js', () => ({ LocalCdkProject: class { validate = mockValidate; diff --git a/src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts b/src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts index c01574e16..4d5123852 100644 --- a/src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts +++ b/src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts @@ -49,6 +49,9 @@ function createContext(overrides?: { onlineEvalConfigs: [], agentCoreGateways: [], policyEngines: [], + configBundles: [], + abTests: [], + httpGateways: [], harnesses: overrides?.harnesses, } as AgentCoreProjectSpec; diff --git a/src/cli/operations/deploy/index.ts b/src/cli/operations/deploy/index.ts index a5b9a2f9d..332f0ca2d 100644 --- a/src/cli/operations/deploy/index.ts +++ b/src/cli/operations/deploy/index.ts @@ -45,6 +45,30 @@ export { // Post-deploy observability setup export { setupTransactionSearch, type TransactionSearchSetupResult } from './post-deploy-observability'; +// Post-deploy HTTP gateways +export { + setupHttpGateways, + type SetupHttpGatewaysOptions, + type SetupHttpGatewaysResult, + type HttpGatewaySetupResult, +} from './post-deploy-http-gateways'; + +// Post-deploy online eval enablement +export { + enableOnlineEvalConfigs, + type EnableOnlineEvalsOptions, + type EnableOnlineEvalsResult, + type OnlineEvalEnableResult, +} from './post-deploy-online-evals'; + +// Post-deploy config bundles +export { + setupConfigBundles, + type SetupConfigBundlesOptions, + type SetupConfigBundlesResult, + type ConfigBundleSetupResult, +} from './post-deploy-config-bundles'; + // Re-export external requirements for convenience export { checkDependencyVersions, diff --git a/src/cli/operations/deploy/post-deploy-ab-tests.ts b/src/cli/operations/deploy/post-deploy-ab-tests.ts new file mode 100644 index 000000000..4678d16f8 --- /dev/null +++ b/src/cli/operations/deploy/post-deploy-ab-tests.ts @@ -0,0 +1,733 @@ +import type { ABTestDeployedState, AgentCoreProjectSpec, DeployedResourceState } from '../../../schema'; +import { getCredentialProvider } from '../../aws/account'; +import { createABTest, deleteABTest, getABTest, listABTests, updateABTest } from '../../aws/agentcore-ab-tests'; +import type { ABTestEvaluationConfig, ABTestVariant, TrafficAllocationConfig } from '../../aws/agentcore-ab-tests'; +import { arnPrefix } from '../../aws/partition'; +import { + CreateRoleCommand, + DeleteRoleCommand, + DeleteRolePolicyCommand, + GetRoleCommand, + IAMClient, + PutRolePolicyCommand, +} from '@aws-sdk/client-iam'; +import { createHash } from 'node:crypto'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface SetupABTestsOptions { + region: string; + projectSpec: AgentCoreProjectSpec; + existingABTests?: Record; + /** Full deployed resource state for resolving ARN references. */ + deployedResources?: DeployedResourceState; +} + +export interface ABTestSetupResult { + testName: string; + status: 'created' | 'updated' | 'deleted' | 'skipped' | 'error'; + abTestId?: string; + abTestArn?: string; + error?: string; + warning?: string; +} + +export interface SetupABTestsResult { + results: ABTestSetupResult[]; + abTests: Record; + hasErrors: boolean; +} + +// ============================================================================ +// Constants +// ============================================================================ + +const AB_TEST_ROLE_POLICY_NAME = 'ABTestExecutionPolicy'; + +// ============================================================================ +// Config Hash +// ============================================================================ + +/** + * Compute a deterministic SHA-256 hash of the key AB test configuration fields. + * Used to detect whether a redeployment actually changed the test config. + */ +function computeConfigHash(testSpec: { + variants: unknown; + evaluationConfig: unknown; + gatewayRef: string; + gatewayFilter?: unknown; + trafficAllocationConfig?: unknown; +}): string { + const payload = JSON.stringify({ + variants: testSpec.variants, + evaluationConfig: testSpec.evaluationConfig, + gatewayRef: testSpec.gatewayRef, + gatewayFilter: testSpec.gatewayFilter, + trafficAllocationConfig: testSpec.trafficAllocationConfig, + }); + return createHash('sha256').update(payload).digest('hex'); +} + +// ============================================================================ +// Shared Update Helper +// ============================================================================ + +interface ApplyABTestUpdateOptions { + region: string; + abTestId: string; + resolvedVariants: ABTestVariant[]; + resolvedEvalConfig: ABTestEvaluationConfig; + trafficAllocationConfig?: TrafficAllocationConfig; + resolvedRoleArn?: string; + testName: string; + roleCreatedByCli: boolean; + currentHash: string; +} + +async function applyABTestUpdate( + options: ApplyABTestUpdateOptions +): Promise<{ state: ABTestDeployedState; result: ABTestSetupResult }> { + const updateResult = await updateABTest({ + region: options.region, + abTestId: options.abTestId, + variants: options.resolvedVariants, + evaluationConfig: options.resolvedEvalConfig, + trafficAllocationConfig: options.trafficAllocationConfig, + roleArn: options.resolvedRoleArn, + }); + + return { + state: { + abTestId: updateResult.abTestId, + abTestArn: updateResult.abTestArn, + roleArn: options.resolvedRoleArn, + roleCreatedByCli: options.roleCreatedByCli, + configHash: options.currentHash, + }, + result: { + testName: options.testName, + status: 'updated', + abTestId: updateResult.abTestId, + abTestArn: updateResult.abTestArn, + }, + }; +} + +// ============================================================================ +// Implementation +// ============================================================================ + +/** + * Create, update, or delete AB tests post-deploy. + * + * Pattern: + * 1. For each AB test in project spec → resolve ARN references, create or skip + * 2. For each AB test in deployed-state but NOT in project spec → delete (reconciliation) + * 3. Return updated deployed state entries + */ +export async function setupABTests(options: SetupABTestsOptions): Promise { + const { region, projectSpec, existingABTests, deployedResources } = options; + const results: ABTestSetupResult[] = []; + const abTests: Record = {}; + + // Create or skip tests from the spec + for (const testSpec of projectSpec.abTests ?? []) { + let resolvedRoleArn: string | undefined; + let roleCreatedByCli = false; + try { + const currentHash = computeConfigHash(testSpec); + const existingTest = existingABTests?.[testSpec.name]; + + // Resolve ARN references from deployed state + const resolvedVariants = resolveVariants(testSpec.variants, deployedResources); + const resolvedGatewayArn = resolveGatewayArn(testSpec.gatewayRef, deployedResources); + if (!resolvedGatewayArn.startsWith('arn:') || resolvedGatewayArn.split(':').length < 6) { + results.push({ + testName: testSpec.name, + status: 'error', + error: `Gateway ARN could not be resolved for AB test "${testSpec.name}". Reference "${testSpec.gatewayRef}" did not match any deployed gateway. Ensure the HTTP gateway was deployed successfully.`, + }); + continue; + } + const resolvedEvalConfig = resolveEvalConfig(testSpec.evaluationConfig, deployedResources); + const evalConfigArns: string[] = + 'onlineEvaluationConfigArn' in resolvedEvalConfig + ? [resolvedEvalConfig.onlineEvaluationConfigArn] + : resolvedEvalConfig.perVariantOnlineEvaluationConfig.map(pv => pv.onlineEvaluationConfigArn); + if (testSpec.roleArn) { + resolvedRoleArn = testSpec.roleArn; + } else { + resolvedRoleArn = await getOrCreateABTestRole({ + region, + projectName: projectSpec.name, + testName: testSpec.name, + gatewayArn: resolvedGatewayArn, + onlineEvalConfigArns: evalConfigArns, + }); + roleCreatedByCli = true; + } + + if (existingTest) { + // Config unchanged — skip to preserve running state + if (existingTest.configHash === currentHash) { + abTests[testSpec.name] = existingTest; + results.push({ + testName: testSpec.name, + status: 'skipped', + abTestId: existingTest.abTestId, + abTestArn: existingTest.abTestArn, + }); + continue; + } + + // Config changed — update in-place instead of delete+recreate + const applied = await applyABTestUpdate({ + region, + abTestId: existingTest.abTestId, + resolvedVariants, + resolvedEvalConfig, + trafficAllocationConfig: testSpec.trafficAllocationConfig as TrafficAllocationConfig | undefined, + resolvedRoleArn, + testName: testSpec.name, + roleCreatedByCli: existingTest.roleCreatedByCli ?? roleCreatedByCli, + currentHash, + }); + abTests[testSpec.name] = applied.state; + results.push(applied.result); + continue; + } + + // Try to find by name via list (handles re-creation after state loss) + const existingByName = await findABTestByName(region, projectSpec.name, testSpec.name); + if (existingByName) { + // Found by name — update in-place with fresh config + const applied = await applyABTestUpdate({ + region, + abTestId: existingByName.abTestId, + resolvedVariants, + resolvedEvalConfig, + trafficAllocationConfig: testSpec.trafficAllocationConfig as TrafficAllocationConfig | undefined, + resolvedRoleArn, + testName: testSpec.name, + roleCreatedByCli, + currentHash, + }); + abTests[testSpec.name] = applied.state; + results.push(applied.result); + continue; + } + + const createOptions = { + region, + name: `${projectSpec.name}_${testSpec.name}`, + description: testSpec.description, + gatewayArn: resolvedGatewayArn, + roleArn: resolvedRoleArn, + variants: resolvedVariants, + evaluationConfig: resolvedEvalConfig, + gatewayFilter: testSpec.gatewayFilter, + trafficAllocationConfig: testSpec.trafficAllocationConfig as TrafficAllocationConfig | undefined, + maxDurationDays: testSpec.maxDurationDays, + enableOnCreate: testSpec.enableOnCreate, + }; + + // Retry on gateway/eval access denied — IAM policy propagation can take time + let result; + const MAX_RETRIES = 5; + const BASE_DELAY_MS = 5_000; + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + try { + result = await createABTest(createOptions); + break; + } catch (err: unknown) { + const errCode = (err as { name?: string }).name; + const errStatus = (err as { $metadata?: { httpStatusCode?: number } }).$metadata?.httpStatusCode; + const msg = err instanceof Error ? err.message : String(err); + + const isRetryable = + errCode === 'AccessDeniedException' || + errStatus === 403 || + msg.includes('Access denied') || + msg.includes('Gateway validation error'); + + if (isRetryable && attempt < MAX_RETRIES - 1) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 1000; + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } + throw err; + } + } + if (!result) throw new Error('AB test creation failed after retries'); + + abTests[testSpec.name] = { + abTestId: result.abTestId, + abTestArn: result.abTestArn, + roleArn: resolvedRoleArn, + roleCreatedByCli, + configHash: currentHash, + }; + + results.push({ + testName: testSpec.name, + status: 'created', + abTestId: result.abTestId, + abTestArn: result.abTestArn, + }); + } catch (err) { + // Clean up auto-created role on AB test creation failure to avoid orphaned roles + if (roleCreatedByCli && resolvedRoleArn) { + try { + await deleteABTestRole(region, resolvedRoleArn); + } catch { + // Best-effort role cleanup + } + } + results.push({ + testName: testSpec.name, + status: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + + // Orphaned AB tests are deleted by deleteOrphanedABTests() which runs + // as a separate pre-pass before HTTP gateway setup. No deletion loop here. + + return { + results, + abTests, + hasErrors: results.some(r => r.status === 'error'), + }; +} + +/** + * Delete orphaned AB tests (in deployed-state but removed from spec). + * + * AB tests create rules on HTTP gateways, so they must be deleted before + * the gateway can be deleted. Call this before setupHttpGateways. + * + * The main setupABTests deletion loop becomes a no-op for any tests + * already cleaned up here. + */ +export async function deleteOrphanedABTests(options: { + region: string; + projectSpec: AgentCoreProjectSpec; + existingABTests?: Record; +}): Promise<{ results: ABTestSetupResult[]; hasErrors: boolean }> { + const { region, projectSpec, existingABTests } = options; + if (!existingABTests) return { results: [], hasErrors: false }; + + const specTestNames = new Set((projectSpec.abTests ?? []).map(t => t.name)); + const results: ABTestSetupResult[] = []; + + for (const [testName, testState] of Object.entries(existingABTests)) { + if (!specTestNames.has(testName)) { + try { + // Stop the AB test first — running tests cannot be deleted + let wasStopped = false; + let stopTimedOut = false; + try { + await updateABTest({ region, abTestId: testState.abTestId, executionStatus: 'STOPPED' }); + wasStopped = true; + + // Poll until executionStatus is STOPPED (stop is async) + let stopped = false; + for (let i = 0; i < 20; i++) { + const test = await getABTest({ region, abTestId: testState.abTestId }); + if (test.executionStatus === 'STOPPED') { + stopped = true; + break; + } + await new Promise(resolve => setTimeout(resolve, 3_000)); + } + if (!stopped) { + stopTimedOut = true; + } + } catch { + // May already be stopped or in a state that doesn't need stopping — proceed with delete + } + + const deleteResult = await deleteABTest({ + region, + abTestId: testState.abTestId, + }); + + if (deleteResult.success && testState.roleCreatedByCli && testState.roleArn) { + await deleteABTestRole(region, testState.roleArn); + } + + results.push({ + testName, + status: deleteResult.success ? 'deleted' : 'error', + error: deleteResult.error, + warning: stopTimedOut + ? `AB test "${testName}" did not reach STOPPED status within the polling window — proceeding with delete` + : wasStopped + ? `AB test "${testName}" was stopped before deletion` + : undefined, + }); + } catch (err) { + results.push({ + testName, + status: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + } + + return { + results, + hasErrors: results.some(r => r.status === 'error'), + }; +} + +// ============================================================================ +// ARN Resolution Helpers +// ============================================================================ + +async function findABTestByName( + region: string, + projectName: string, + testName: string +): Promise<{ abTestId: string; abTestArn: string } | undefined> { + try { + const prefixedName = `${projectName}_${testName}`; + const result = await listABTests({ region, maxResults: 100 }); + return result.abTests.find( + t => t.name.toLowerCase() === prefixedName.toLowerCase() || t.name.toLowerCase() === testName.toLowerCase() + ); + } catch { + return undefined; + } +} + +/** + * Resolve variant config bundle references. + * If bundleArn is a name (not an ARN), look it up in deployed config bundles. + * Target-based variants are passed through as-is. + */ +function resolveVariants( + variants: { + name: 'C' | 'T1'; + weight: number; + variantConfiguration: { + configurationBundle?: { bundleArn: string; bundleVersion: string }; + target?: { targetName: string }; + }; + }[], + deployedResources?: DeployedResourceState +): ABTestVariant[] { + return variants.map(v => { + const bundle = v.variantConfiguration.configurationBundle; + if (bundle) { + return { + name: v.name, + weight: v.weight, + variantConfiguration: { + configurationBundle: { + bundleArn: resolveConfigBundleArn(bundle.bundleArn, deployedResources), + bundleVersion: resolveConfigBundleVersion(bundle.bundleArn, bundle.bundleVersion, deployedResources), + }, + }, + }; + } + // Target-based variant — pass through + return { + name: v.name, + weight: v.weight, + variantConfiguration: { + ...(v.variantConfiguration.target && { target: { name: v.variantConfiguration.target.targetName } }), + }, + }; + }); +} + +function resolveConfigBundleArn(ref: string, deployedResources?: DeployedResourceState): string { + if (ref.startsWith('arn:')) return ref; + + const bundles = deployedResources?.configBundles; + if (bundles?.[ref]) { + return bundles[ref].bundleArn; + } + + return ref; +} + +function resolveConfigBundleVersion( + bundleRef: string, + versionRef: string, + deployedResources?: DeployedResourceState +): string { + if (versionRef !== 'LATEST') return versionRef; + + // Resolve LATEST to the deployed versionId + const bundles = deployedResources?.configBundles; + const name = bundleRef.startsWith('arn:') ? undefined : bundleRef; + if (name && bundles?.[name]) { + return bundles[name].versionId; + } + + return versionRef; +} + +function resolveGatewayArn(ref: string, deployedResources?: DeployedResourceState): string { + if (ref.startsWith('arn:')) return ref; + + // Check for placeholder pattern {{gateway:}} + const placeholderMatch = /^\{\{gateway:(.+)\}\}$/.exec(ref); + const gwName = placeholderMatch ? placeholderMatch[1] : ref; + + const gateways = deployedResources?.mcp?.gateways; + if (gateways && gwName && gateways[gwName]) { + return gateways[gwName].gatewayArn; + } + + // Check HTTP gateways (imperatively created for A/B testing) + const httpGateways = deployedResources?.httpGateways; + if (httpGateways && gwName && httpGateways[gwName]) { + return httpGateways[gwName].gatewayArn; + } + + return ref; +} + +function resolveEvalConfig( + config: + | { onlineEvaluationConfigArn: string } + | { perVariantOnlineEvaluationConfig: { treatmentName: 'C' | 'T1'; onlineEvaluationConfigArn: string }[] }, + deployedResources?: DeployedResourceState +): ABTestEvaluationConfig { + if ('perVariantOnlineEvaluationConfig' in config) { + // Per-variant eval config — resolve each ARN + return { + perVariantOnlineEvaluationConfig: config.perVariantOnlineEvaluationConfig.map(pv => ({ + name: pv.treatmentName, + onlineEvaluationConfigArn: resolveOnlineEvalArn(pv.onlineEvaluationConfigArn, deployedResources), + })), + }; + } + + const ref = config.onlineEvaluationConfigArn; + return { onlineEvaluationConfigArn: resolveOnlineEvalArn(ref, deployedResources) }; +} + +function resolveOnlineEvalArn(ref: string, deployedResources?: DeployedResourceState): string { + if (ref.startsWith('arn:')) return ref; + + const configs = deployedResources?.onlineEvalConfigs; + if (configs?.[ref]) { + return configs[ref].onlineEvaluationConfigArn; + } + + return ref; +} + +// ============================================================================ +// IAM Role Management +// ============================================================================ + +/** + * Generate a project-scoped role name following the CDK pattern: + * AgentCore-{ProjectName}-ABTest{TestName}-{Hash} + */ +function generateRoleName(projectName: string, testName: string): string { + // Deterministic hash so retries produce the same role name (avoids orphaned roles) + const hash = createHash('sha256').update(`${projectName}:${testName}`).digest('hex').slice(0, 8); + const base = `AgentCore-${projectName}-ABTest${testName}`; + // IAM role names max 64 chars + return `${base.slice(0, 55)}-${hash}`; +} + +/** + * Extract role name from ARN: arn:aws:iam::123456789012:role/RoleName → RoleName + */ +function roleNameFromArn(roleArn: string): string { + const parts = roleArn.split('/'); + return parts[parts.length - 1] ?? roleArn; +} + +interface CreateABTestRoleOptions { + region: string; + projectName: string; + testName: string; + gatewayArn: string; + onlineEvalConfigArns: string[]; +} + +async function getOrCreateABTestRole(options: CreateABTestRoleOptions): Promise { + const { region, projectName, testName, gatewayArn, onlineEvalConfigArns } = options; + const credentials = getCredentialProvider(); + const iamClient = new IAMClient({ region, credentials }); + + // Extract account ID from gateway ARN (arn:aws:bedrock-agentcore:REGION:ACCOUNT:gateway/ID) + const accountId = gatewayArn.split(':')[4] ?? '*'; + // Extract gateway ID for resource scoping + const gatewayId = gatewayArn.split('/').pop() ?? '*'; + + const roleName = generateRoleName(projectName, testName); + + const trustPolicy = JSON.stringify({ + Version: '2012-10-17', + Statement: [ + { + Effect: 'Allow', + Principal: { Service: 'bedrock-agentcore.amazonaws.com' }, + Action: 'sts:AssumeRole', + }, + ], + }); + + let roleArn: string; + let _needsPropagationWait = false; + + try { + const createResult = await iamClient.send( + new CreateRoleCommand({ + RoleName: roleName, + AssumeRolePolicyDocument: trustPolicy, + Description: `Auto-created execution role for AgentCore AB test: ${testName}`, + Tags: [ + { Key: 'agentcore:created-by', Value: 'agentcore-cli' }, + { Key: 'agentcore:project-name', Value: projectName }, + { Key: 'agentcore:ab-test-name', Value: testName }, + ], + }) + ); + + roleArn = createResult.Role?.Arn ?? ''; + if (!roleArn) { + throw new Error(`IAM CreateRole succeeded but returned no role ARN for "${roleName}"`); + } + _needsPropagationWait = true; + } catch (err: unknown) { + // Handle retry after a previous failed deploy left the role behind + const errName = (err as { name?: string }).name; + if (errName === 'EntityAlreadyExistsException') { + // IAM role already exists — reuse it + const existing = await iamClient.send(new GetRoleCommand({ RoleName: roleName })); + roleArn = existing.Role?.Arn ?? ''; + if (!roleArn) { + throw new Error(`Role "${roleName}" already exists but ARN could not be retrieved`); + } + } else { + throw err; + } + } + + const policy = JSON.stringify({ + Version: '2012-10-17', + Statement: [ + { + Sid: 'GatewayRuleStatement', + Effect: 'Allow', + Action: [ + 'bedrock-agentcore:CreateGatewayRule', + 'bedrock-agentcore:UpdateGatewayRule', + 'bedrock-agentcore:GetGatewayRule', + 'bedrock-agentcore:DeleteGatewayRule', + 'bedrock-agentcore:ListGatewayRules', + ], + Resource: [`${arnPrefix(region)}:bedrock-agentcore:${region}:${accountId}:gateway/${gatewayId}`], + }, + { + Sid: 'GatewayReadStatement', + Effect: 'Allow', + Action: ['bedrock-agentcore:GetGateway'], + Resource: [`${arnPrefix(region)}:bedrock-agentcore:${region}:${accountId}:gateway/${gatewayId}`], + }, + { + Sid: 'GatewayListStatement', + Effect: 'Allow', + Action: ['bedrock-agentcore:ListGateways'], + Resource: ['*'], + }, + { + Sid: 'OnlineEvaluationConfigStatement', + Effect: 'Allow', + Action: ['bedrock-agentcore:GetOnlineEvaluationConfig', 'bedrock-agentcore:UpdateOnlineEvaluationConfig'], + Resource: onlineEvalConfigArns, + }, + { + Sid: 'ConfigurationBundleReadStatement', + Effect: 'Allow', + Action: ['bedrock-agentcore:GetConfigurationBundle', 'bedrock-agentcore:GetConfigurationBundleVersion'], + Resource: [`${arnPrefix(region)}:bedrock-agentcore:${region}:${accountId}:configuration-bundle/*`], + }, + { + Sid: 'CloudWatchDescribeLogGroups', + Effect: 'Allow', + Action: ['logs:DescribeLogGroups'], + Resource: ['*'], + }, + { + Sid: 'CloudWatchLogReadStatement', + Effect: 'Allow', + Action: [ + 'logs:StartQuery', + 'logs:GetQueryResults', + 'logs:StopQuery', + 'logs:FilterLogEvents', + 'logs:GetLogEvents', + ], + Resource: [ + `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:/aws/bedrock-agentcore/evaluations/*`, + `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:/aws/bedrock-agentcore/evaluations/*:*`, + `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans`, + `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans:*`, + ], + }, + { + Sid: 'CloudWatchIndexPolicyStatement', + Effect: 'Allow', + Action: ['logs:DescribeIndexPolicies', 'logs:PutIndexPolicy'], + Resource: [ + `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans`, + `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans:*`, + ], + }, + ], + }); + + // Re-apply the inline policy (idempotent — covers both new and recovered roles) + await iamClient.send( + new PutRolePolicyCommand({ + RoleName: roleName, + PolicyName: AB_TEST_ROLE_POLICY_NAME, + PolicyDocument: policy, + }) + ); + + // Always wait for IAM policy propagation — both new roles and policy updates on existing roles + await new Promise(resolve => setTimeout(resolve, 15_000)); + + return roleArn; +} + +async function deleteABTestRole(region: string, roleArn: string): Promise { + const credentials = getCredentialProvider(); + const iamClient = new IAMClient({ region, credentials }); + const roleName = roleNameFromArn(roleArn); + + try { + // Must delete inline policies before deleting the role + await iamClient.send( + new DeleteRolePolicyCommand({ + RoleName: roleName, + PolicyName: AB_TEST_ROLE_POLICY_NAME, + }) + ); + } catch { + // Policy may not exist + } + + try { + await iamClient.send(new DeleteRoleCommand({ RoleName: roleName })); + } catch { + // Role may already be deleted or in use — best effort + } +} diff --git a/src/cli/operations/deploy/post-deploy-config-bundles.ts b/src/cli/operations/deploy/post-deploy-config-bundles.ts new file mode 100644 index 000000000..5318c54b1 --- /dev/null +++ b/src/cli/operations/deploy/post-deploy-config-bundles.ts @@ -0,0 +1,348 @@ +import type { AgentCoreProjectSpec, ConfigBundleDeployedState, DeployedState } from '../../../schema'; +import { + createConfigurationBundle, + deleteConfigurationBundle, + getConfigurationBundleVersion, + listConfigurationBundleVersions, + listConfigurationBundles, + updateConfigurationBundle, +} from '../../aws/agentcore-config-bundles'; +import type { ComponentConfigurationMap } from '../../aws/agentcore-config-bundles'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface SetupConfigBundlesOptions { + region: string; + projectSpec: AgentCoreProjectSpec; + /** Existing config bundle deployed state (from deployed-state.json) */ + existingBundles?: Record; +} + +export interface ConfigBundleSetupResult { + bundleName: string; + status: 'created' | 'updated' | 'deleted' | 'skipped' | 'error'; + bundleId?: string; + bundleArn?: string; + versionId?: string; + error?: string; +} + +export interface SetupConfigBundlesResult { + results: ConfigBundleSetupResult[]; + /** Deployed state entries for config bundles (to merge into deployed-state.json) */ + configBundles: Record; + hasErrors: boolean; +} + +// ============================================================================ +// Implementation +// ============================================================================ + +/** + * Create, update, or delete configuration bundles post-deploy. + * + * Pattern: + * 1. For each configBundle in project spec → create or update + * 2. For each bundle in deployed-state but NOT in project spec → delete (reconciliation) + * 3. Return updated deployed state entries + */ +export async function setupConfigBundles(options: SetupConfigBundlesOptions): Promise { + const { region, projectSpec, existingBundles } = options; + const results: ConfigBundleSetupResult[] = []; + const configBundles: Record = {}; + + const specBundleNames = new Set((projectSpec.configBundles ?? []).map(b => b.name)); + const projectName = projectSpec.name; + + // Create or update bundles from the spec + for (const bundleSpec of projectSpec.configBundles ?? []) { + // Prepend project name to the API-side bundle name (no separator for config bundles) + const apiBundleName = `${projectName}${bundleSpec.name}`; + + try { + // Try to update if we have an existing bundle ID + const existingBundle = existingBundles?.[bundleSpec.name]; + let updated = false; + + if (existingBundle) { + try { + // Fetch the exact version we know about — avoids branch-not-found errors + const current = await getConfigurationBundleVersion({ + region, + bundleId: existingBundle.bundleId, + versionId: existingBundle.versionId, + }); + const componentsChanged = !deepEqual(current.components, bundleSpec.components); + const descriptionChanged = (bundleSpec.description ?? undefined) !== (current.description ?? undefined); + + if (!componentsChanged && !descriptionChanged) { + // Nothing changed — skip the update, preserve existing state + configBundles[bundleSpec.name] = { + bundleId: existingBundle.bundleId, + bundleArn: existingBundle.bundleArn, + versionId: existingBundle.versionId, + }; + results.push({ + bundleName: bundleSpec.name, + status: 'skipped', + bundleId: existingBundle.bundleId, + bundleArn: existingBundle.bundleArn, + versionId: existingBundle.versionId, + }); + updated = true; + } else { + // Use the branch from the spec, or fall back to whatever branch the API has + const effectiveBranch = bundleSpec.branchName ?? current.lineageMetadata?.branchName ?? 'mainline'; + const result = await updateConfigurationBundle({ + region, + bundleId: existingBundle.bundleId, + description: bundleSpec.description, + components: bundleSpec.components as ComponentConfigurationMap, + parentVersionIds: [current.versionId], + branchName: effectiveBranch, + commitMessage: bundleSpec.commitMessage ?? `Update ${bundleSpec.name}`, + }); + + configBundles[bundleSpec.name] = { + bundleId: result.bundleId, + bundleArn: result.bundleArn, + versionId: result.versionId, + }; + + results.push({ + bundleName: bundleSpec.name, + status: 'updated', + bundleId: result.bundleId, + bundleArn: result.bundleArn, + versionId: result.versionId, + }); + updated = true; + } + } catch (updateErr) { + // If bundle or branch not found, fall through to find-by-name or create + const msg = updateErr instanceof Error ? updateErr.message : String(updateErr); + if (!msg.includes('404') && !msg.includes('not found')) throw updateErr; + } + } + + if (!updated) { + // Try to find by name via list (handles re-creation after state loss) + const existingByName = await findBundleByName(region, apiBundleName); + + if (existingByName) { + // Fetch versions and pick the newest — avoids branch-not-found errors from getConfigurationBundle + const versions = await listConfigurationBundleVersions({ + region, + bundleId: existingByName.bundleId, + }); + const sorted = [...versions.versions].sort((a, b) => Number(b.versionCreatedAt) - Number(a.versionCreatedAt)); + const latestVersionId = sorted[0]?.versionId; + if (!latestVersionId) throw new Error(`No versions found for bundle ${bundleSpec.name}`); + const current = await getConfigurationBundleVersion({ + region, + bundleId: existingByName.bundleId, + versionId: latestVersionId, + }); + const componentsChanged = !deepEqual(current.components, bundleSpec.components); + const descriptionChanged = (bundleSpec.description ?? undefined) !== (current.description ?? undefined); + + if (!componentsChanged && !descriptionChanged) { + configBundles[bundleSpec.name] = { + bundleId: existingByName.bundleId, + bundleArn: current.bundleArn, + versionId: current.versionId, + }; + results.push({ + bundleName: bundleSpec.name, + status: 'skipped', + bundleId: existingByName.bundleId, + bundleArn: current.bundleArn, + versionId: current.versionId, + }); + } else { + const effectiveBranch = bundleSpec.branchName ?? current.lineageMetadata?.branchName ?? 'mainline'; + const result = await updateConfigurationBundle({ + region, + bundleId: existingByName.bundleId, + description: bundleSpec.description, + components: bundleSpec.components as ComponentConfigurationMap, + parentVersionIds: [current.versionId], + branchName: effectiveBranch, + commitMessage: bundleSpec.commitMessage ?? `Update ${bundleSpec.name}`, + }); + + configBundles[bundleSpec.name] = { + bundleId: result.bundleId, + bundleArn: result.bundleArn, + versionId: result.versionId, + }; + + results.push({ + bundleName: bundleSpec.name, + status: 'updated', + bundleId: result.bundleId, + bundleArn: result.bundleArn, + versionId: result.versionId, + }); + } + } else { + // Create new — omit branchName if not in spec so the API uses its default + const result = await createConfigurationBundle({ + region, + bundleName: apiBundleName, + description: bundleSpec.description, + components: bundleSpec.components as ComponentConfigurationMap, + branchName: bundleSpec.branchName, + commitMessage: bundleSpec.commitMessage ?? `Create ${bundleSpec.name}`, + }); + + configBundles[bundleSpec.name] = { + bundleId: result.bundleId, + bundleArn: result.bundleArn, + versionId: result.versionId, + }; + + results.push({ + bundleName: bundleSpec.name, + status: 'created', + bundleId: result.bundleId, + bundleArn: result.bundleArn, + versionId: result.versionId, + }); + } + } + } catch (err) { + results.push({ + bundleName: bundleSpec.name, + status: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + + // Delete orphaned bundles (in deployed-state but removed from spec) + if (existingBundles) { + for (const [bundleName, bundleState] of Object.entries(existingBundles)) { + if (!specBundleNames.has(bundleName)) { + try { + await deleteConfigurationBundle({ + region, + bundleId: bundleState.bundleId, + }); + + results.push({ + bundleName, + status: 'deleted', + }); + } catch (err) { + results.push({ + bundleName, + status: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + } + } + + return { + results, + configBundles, + hasErrors: results.some(r => r.status === 'error'), + }; +} + +// ============================================================================ +// Helpers +// ============================================================================ + +async function findBundleByName(region: string, bundleName: string): Promise<{ bundleId: string } | undefined> { + try { + const result = await listConfigurationBundles({ region, maxResults: 100 }); + return result.bundles.find(b => b.bundleName === bundleName); + } catch { + return undefined; + } +} + +/** Key-order-independent deep-equal for JSON-serializable objects. */ +function deepEqual(a: unknown, b: unknown): boolean { + if (a === b) return true; + if (a === null || b === null || typeof a !== typeof b) return false; + if (typeof a !== 'object') return false; + + if (Array.isArray(a)) { + if (!Array.isArray(b) || a.length !== b.length) return false; + return a.every((item, i) => deepEqual(item, b[i])); + } + + const aObj = a as Record; + const bObj = b as Record; + const aKeys = Object.keys(aObj); + const bKeys = Object.keys(bObj); + if (aKeys.length !== bKeys.length) return false; + return aKeys.every(key => key in bObj && deepEqual(aObj[key], bObj[key])); +} + +// ============================================================================ +// Component Key Resolution +// ============================================================================ + +/** + * Resolve placeholder component keys (e.g., {{runtime:name}}, {{gateway:name}}) + * to actual ARNs from deployed state. + */ +export function resolveConfigBundleComponentKeys( + projectSpec: AgentCoreProjectSpec, + deployedState: DeployedState, + targetName: string +): AgentCoreProjectSpec { + const resources = deployedState.targets?.[targetName]?.resources; + if (!resources) return projectSpec; + + const resolvedBundles = (projectSpec.configBundles ?? []).map(bundle => { + const resolvedComponents: Record }> = {}; + + for (const [key, value] of Object.entries(bundle.components ?? {})) { + const resolvedKey = resolveComponentKey(key, resources); + resolvedComponents[resolvedKey] = value; + } + + return { ...bundle, components: resolvedComponents }; + }); + + return { ...projectSpec, configBundles: resolvedBundles }; +} + +function resolveComponentKey( + key: string, + resources: NonNullable +): string { + if (key.startsWith('arn:')) return key; + + const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(key); + if (gwMatch) { + const gwName = gwMatch[1]!; + const httpGw = resources.httpGateways?.[gwName]; + if (httpGw) return httpGw.gatewayArn; + const mcpGw = resources.mcp?.gateways?.[gwName]; + if (mcpGw) return mcpGw.gatewayArn; + throw new Error( + `Config bundle references gateway "${gwName}" but it was not found in deployed resources. Ensure the gateway is defined in agentcore.json and deploys successfully.` + ); + } + + const rtMatch = /^\{\{runtime:(.+)\}\}$/.exec(key); + if (rtMatch) { + const rtName = rtMatch[1]!; + const rt = resources.runtimes?.[rtName]; + if (rt) return rt.runtimeArn; + throw new Error( + `Config bundle references runtime "${rtName}" but it was not found in deployed resources. Ensure the runtime is defined in agentcore.json and deploys successfully.` + ); + } + + return key; +} diff --git a/src/cli/operations/deploy/post-deploy-http-gateways.ts b/src/cli/operations/deploy/post-deploy-http-gateways.ts new file mode 100644 index 000000000..db18d28a2 --- /dev/null +++ b/src/cli/operations/deploy/post-deploy-http-gateways.ts @@ -0,0 +1,628 @@ +import type { AgentCoreProjectSpec, DeployedResourceState, HttpGatewayDeployedState } from '../../../schema'; +import { getCredentialProvider } from '../../aws/account'; +import { + createHttpGateway, + createHttpGatewayTarget, + deleteHttpGateway, + deleteHttpGatewayTarget, + getHttpGatewayTarget, + listAllHttpGateways, + listHttpGatewayTargets, + waitForGatewayReady, + waitForTargetReady, +} from '../../aws/agentcore-http-gateways'; +import { + CreateRoleCommand, + DeleteRoleCommand, + DeleteRolePolicyCommand, + GetRoleCommand, + IAMClient, + PutRolePolicyCommand, +} from '@aws-sdk/client-iam'; +import { createHash } from 'node:crypto'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface SetupHttpGatewaysOptions { + region: string; + projectName: string; + projectSpec: AgentCoreProjectSpec; + existingHttpGateways?: Record; + deployedResources?: DeployedResourceState; +} + +export interface HttpGatewaySetupResult { + gatewayName: string; + status: 'created' | 'skipped' | 'deleted' | 'error'; + gatewayId?: string; + gatewayArn?: string; + error?: string; +} + +export interface SetupHttpGatewaysResult { + results: HttpGatewaySetupResult[]; + httpGateways: Record; + hasErrors: boolean; +} + +// ============================================================================ +// Constants +// ============================================================================ + +const HTTP_GATEWAY_ROLE_POLICY_NAME = 'HttpGatewayExecutionPolicy'; + +// ============================================================================ +// Implementation +// ============================================================================ + +/** + * Create or delete HTTP gateways post-deploy. + * + * Pattern: + * 1. For each httpGateway in project spec -> resolve runtime ARN, create or skip + * 2. For each httpGateway in deployed-state but NOT in project spec -> delete (reconciliation) + * 3. Return updated deployed state entries + */ +export async function setupHttpGateways(options: SetupHttpGatewaysOptions): Promise { + const { region, projectName, projectSpec, existingHttpGateways, deployedResources } = options; + const results: HttpGatewaySetupResult[] = []; + const httpGateways: Record = {}; + + // Defensive: Zod .default([]) only fires on undefined, not null. + // If someone has "httpGateways": null in their JSON, it passes through as null. + const httpGatewaySpecs = projectSpec.httpGateways ?? []; + + // Create or skip gateways from the spec + for (const gwSpec of httpGatewaySpecs) { + let resolvedRoleArn: string | undefined; + let roleCreatedByCli = false; + try { + const existingGateway = existingHttpGateways?.[gwSpec.name]; + + if (existingGateway) { + // Already deployed + + // Create or update targets from httpGateways[].targets (for target-based AB testing) + if (gwSpec.targets && gwSpec.targets.length > 0) { + // List existing targets to avoid unnecessary create calls + const existingTargetsByName = new Map(); + try { + const existingTargets = await listHttpGatewayTargets({ + region, + gatewayId: existingGateway.gatewayId, + }); + for (const t of existingTargets.targets) { + existingTargetsByName.set(t.name, { targetId: t.targetId }); + } + } catch { + // If list fails, fall through and let create handle 409s + } + + for (const tgt of gwSpec.targets) { + const existingTarget = existingTargetsByName.get(tgt.name); + if (existingTarget) { + // Target exists by name — check if qualifier matches + try { + const targetDetails = await getHttpGatewayTarget({ + region, + gatewayId: existingGateway.gatewayId, + targetId: existingTarget.targetId, + }); + const httpConfig = ( + targetDetails.targetConfiguration as + | { + http?: { + agentcoreRuntime?: { qualifier?: string }; + runtimeTargetConfiguration?: { qualifier?: string }; + }; + } + | undefined + )?.http; + const existingQualifier = + httpConfig?.agentcoreRuntime?.qualifier ?? httpConfig?.runtimeTargetConfiguration?.qualifier; + const specQualifier = tgt.qualifier ?? 'DEFAULT'; + if (existingQualifier === specQualifier) { + // Qualifier matches — skip + continue; + } + // Qualifier differs — delete old target and recreate + await deleteHttpGatewayTarget({ + region, + gatewayId: existingGateway.gatewayId, + targetId: existingTarget.targetId, + }); + } catch { + // If get/delete fails, fall through to create which will handle conflicts + } + } + try { + const tgtRuntime = deployedResources?.runtimes?.[tgt.runtimeRef]; + if (!tgtRuntime) continue; + const tgtResult = await createHttpGatewayTarget({ + region, + gatewayId: existingGateway.gatewayId, + targetName: tgt.name, + runtimeArn: tgtRuntime.runtimeArn, + qualifier: tgt.qualifier, + }); + await waitForTargetReady({ + region, + gatewayId: existingGateway.gatewayId, + targetId: tgtResult.targetId, + }); + } catch (tgtErr) { + if (tgtErr instanceof Error && tgtErr.message.includes('409')) continue; + // Non-fatal + } + } + } + + httpGateways[gwSpec.name] = existingGateway; + results.push({ + gatewayName: gwSpec.name, + status: 'skipped', + gatewayId: existingGateway.gatewayId, + gatewayArn: existingGateway.gatewayArn, + }); + continue; + } + + // Try to find by name via list (handles re-creation after state loss) + const existingByName = await findHttpGatewayByName(region, gwSpec.name); + if (existingByName) { + console.warn( + `Warning: HTTP gateway "${gwSpec.name}" found by name but local state was lost. Target and role state may be incomplete — consider re-deploying.` + ); + httpGateways[gwSpec.name] = { + gatewayId: existingByName.gatewayId, + gatewayArn: existingByName.gatewayArn, + // targetId, roleArn, roleCreatedByCli unknown after state-loss recovery + }; + results.push({ + gatewayName: gwSpec.name, + status: 'skipped', + gatewayId: existingByName.gatewayId, + gatewayArn: existingByName.gatewayArn, + }); + continue; + } + + // Resolve runtime ARN from deployed state + const runtimeState = deployedResources?.runtimes?.[gwSpec.runtimeRef]; + if (!runtimeState) { + results.push({ + gatewayName: gwSpec.name, + status: 'error', + error: `Runtime "${gwSpec.runtimeRef}" not found in deployed resources. Deploy the runtime before creating an HTTP gateway.`, + }); + continue; + } + const runtimeArn = runtimeState.runtimeArn; + if (gwSpec.roleArn) { + resolvedRoleArn = gwSpec.roleArn; + } else { + resolvedRoleArn = await getOrCreateHttpGatewayRole({ + region, + projectName, + gatewayName: gwSpec.name, + runtimeArn, + }); + roleCreatedByCli = true; + } + + // Create gateway and wait for it to become READY before adding targets + // Creating HTTP gateway for runtime + const createResult = await createHttpGateway({ + region, + name: gwSpec.name, + roleArn: resolvedRoleArn, + }); + + const readyGateway = await waitForGatewayReady({ + region, + gatewayId: createResult.gatewayId, + }); + + // Create target pointing to the runtime + let targetId: string | undefined; + try { + const targetResult = await createHttpGatewayTarget({ + region, + gatewayId: createResult.gatewayId, + targetName: gwSpec.runtimeRef, + runtimeArn, + }); + + targetId = targetResult.targetId; + + // Wait for target to become ready + // Waiting for gateway target to become ready + await waitForTargetReady({ + region, + gatewayId: createResult.gatewayId, + targetId: targetResult.targetId, + }); + } catch (targetErr) { + // Rollback: delete target (if created), wait for deletion, then delete gateway + try { + if (targetId) { + await deleteHttpGatewayTarget({ region, gatewayId: createResult.gatewayId, targetId }); + } + } catch { + // Best-effort target cleanup + } + try { + await deleteHttpGateway({ region, gatewayId: createResult.gatewayId }); + } catch { + // Best-effort gateway rollback + } + + // Always clean up auto-created role on target failure, regardless of gateway rollback result + if (roleCreatedByCli && resolvedRoleArn) { + try { + await deleteHttpGatewayRole(region, resolvedRoleArn); + } catch { + // Best-effort role cleanup + } + } + + results.push({ + gatewayName: gwSpec.name, + status: 'error', + error: `Target creation failed, gateway rolled back: ${targetErr instanceof Error ? targetErr.message : String(targetErr)}`, + }); + continue; + } + + // Create additional targets from httpGateways[].targets (for target-based AB testing) + if (gwSpec.targets && gwSpec.targets.length > 0) { + for (const tgt of gwSpec.targets) { + try { + const tgtRuntime = deployedResources?.runtimes?.[tgt.runtimeRef]; + if (!tgtRuntime) { + // Runtime not deployed, skip this target + continue; + } + const tgtResult = await createHttpGatewayTarget({ + region, + gatewayId: createResult.gatewayId, + targetName: tgt.name, + runtimeArn: tgtRuntime.runtimeArn, + qualifier: tgt.qualifier, + }); + await waitForTargetReady({ + region, + gatewayId: createResult.gatewayId, + targetId: tgtResult.targetId, + }); + } catch (tgtErr) { + // 409 = already exists, skip + if (tgtErr instanceof Error && tgtErr.message.includes('409')) continue; + // Non-fatal: log but continue + } + } + } + + httpGateways[gwSpec.name] = { + gatewayId: createResult.gatewayId, + gatewayArn: createResult.gatewayArn, + gatewayUrl: readyGateway.gatewayUrl, + targetId, + roleArn: resolvedRoleArn, + roleCreatedByCli, + }; + + results.push({ + gatewayName: gwSpec.name, + status: 'created', + gatewayId: createResult.gatewayId, + gatewayArn: createResult.gatewayArn, + }); + } catch (err) { + // If we auto-created a role, clean it up on failure + if (roleCreatedByCli && resolvedRoleArn) { + try { + await deleteHttpGatewayRole(region, resolvedRoleArn); + } catch { + // Best-effort role cleanup + } + } + results.push({ + gatewayName: gwSpec.name, + status: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + + // Orphaned gateways are deleted by deleteOrphanedHttpGateways() which runs + // as a separate pre-pass. No deletion loop here. + + return { + results, + httpGateways, + hasErrors: results.some(r => r.status === 'error'), + }; +} + +// ============================================================================ +// Shared Gateway Deletion +// ============================================================================ + +/** + * Delete an HTTP gateway and all its targets. Best-effort — target failures + * are warned but don't prevent gateway deletion attempt. + * + * Order: targets → gateway → role + */ +export async function deleteHttpGatewayWithTargets(options: { + region: string; + gatewayId: string; + gatewayName: string; + knownTargetId?: string; + roleArn?: string; + roleCreatedByCli?: boolean; +}): Promise<{ success: boolean; error?: string }> { + const { region, gatewayId, gatewayName, knownTargetId, roleArn, roleCreatedByCli } = options; + + const targetIds: string[] = []; + if (knownTargetId) { + targetIds.push(knownTargetId); + } + try { + const targets = await listHttpGatewayTargets({ region, gatewayId, maxResults: 100 }); + for (const t of targets.targets) { + if (!targetIds.includes(t.targetId)) { + targetIds.push(t.targetId); + } + } + } catch { + // Best-effort — proceed with whatever IDs we have + } + + for (const targetId of targetIds) { + try { + await deleteHttpGatewayTarget({ region, gatewayId, targetId }); + } catch (err) { + console.warn( + `Warning: Failed to delete target ${targetId} on gateway "${gatewayName}": ${err instanceof Error ? err.message : String(err)}` + ); + } + } + + const deleteResult = await deleteHttpGateway({ region, gatewayId }); + if (!deleteResult.success) { + return { success: false, error: deleteResult.error }; + } + + if (roleCreatedByCli && roleArn) { + try { + await deleteHttpGatewayRole(region, roleArn); + } catch { + // Best-effort role cleanup + } + } + + return { success: true }; +} + +/** + * Delete orphaned HTTP gateways (in deployed-state but removed from spec). + * Call before setupHttpGateways. + */ +export async function deleteOrphanedHttpGateways(options: { + region: string; + projectSpec: AgentCoreProjectSpec; + existingHttpGateways?: Record; +}): Promise<{ results: HttpGatewaySetupResult[]; hasErrors: boolean }> { + const { region, projectSpec, existingHttpGateways } = options; + if (!existingHttpGateways) return { results: [], hasErrors: false }; + + const specGatewayNames = new Set((projectSpec.httpGateways ?? []).map(g => g.name)); + const results: HttpGatewaySetupResult[] = []; + + for (const [gwName, gwState] of Object.entries(existingHttpGateways)) { + if (!specGatewayNames.has(gwName)) { + try { + const result = await deleteHttpGatewayWithTargets({ + region, + gatewayId: gwState.gatewayId, + gatewayName: gwName, + knownTargetId: gwState.targetId, + roleArn: gwState.roleArn, + roleCreatedByCli: gwState.roleCreatedByCli, + }); + + results.push({ + gatewayName: gwName, + status: result.success ? 'deleted' : 'error', + error: result.error, + }); + } catch (err) { + results.push({ + gatewayName: gwName, + status: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + } + + return { + results, + hasErrors: results.some(r => r.status === 'error'), + }; +} + +// ============================================================================ +// Gateway Trace Delivery +// ============================================================================ + +// ============================================================================ +// Helpers +// ============================================================================ + +async function findHttpGatewayByName( + region: string, + name: string +): Promise<{ gatewayId: string; gatewayArn: string } | undefined> { + try { + const gateways = await listAllHttpGateways({ region }); + return gateways.find(gw => gw.name === name); + } catch (err) { + console.warn( + `Warning: Could not list HTTP gateways to check for existing "${name}": ${err instanceof Error ? err.message : String(err)}` + ); + return undefined; + } +} + +// ============================================================================ +// IAM Role Management +// ============================================================================ + +/** + * Generate a project-scoped role name following the CDK pattern: + * AgentCore-{ProjectName}-HttpGw{GatewayName}-{Hash} + */ +function generateRoleName(projectName: string, gatewayName: string): string { + const base = `AgentCore-${projectName}-HttpGw${gatewayName}`; + // Use deterministic hash so retries produce the same role name + const hash = createHash('sha256').update(`${projectName}:${gatewayName}`).digest('hex').slice(0, 8); + // IAM role names max 64 chars + return `${base.slice(0, 55)}-${hash}`; +} + +/** + * Extract role name from ARN: arn:aws:iam::123456789012:role/RoleName -> RoleName + */ +function roleNameFromArn(roleArn: string): string { + const parts = roleArn.split('/'); + return parts[parts.length - 1] ?? roleArn; +} + +interface CreateHttpGatewayRoleOptions { + region: string; + projectName: string; + gatewayName: string; + runtimeArn: string; +} + +async function getOrCreateHttpGatewayRole(options: CreateHttpGatewayRoleOptions): Promise { + const { region, projectName, gatewayName } = options; + const credentials = getCredentialProvider(); + const iamClient = new IAMClient({ region, credentials }); + + const roleName = generateRoleName(projectName, gatewayName); + + const trustPolicy = JSON.stringify({ + Version: '2012-10-17', + Statement: [ + { + Effect: 'Allow', + Principal: { Service: 'bedrock-agentcore.amazonaws.com' }, + Action: 'sts:AssumeRole', + }, + ], + }); + + const policy = JSON.stringify({ + Version: '2012-10-17', + Statement: [ + { + Sid: 'InvokeRuntimeStatement', + Effect: 'Allow', + Action: [ + 'bedrock-agentcore:InvokeRuntime', + 'bedrock-agentcore:InvokeAgent', + 'bedrock-agentcore:InvokeAgentRuntime', + ], + // Resource must be '*' because the gateway service invokes runtimes using + // a resource identifier that doesn't match the deployed runtime ARN format. + // This matches the A/B testing guide's gateway role policy. + Resource: '*', + }, + ], + }); + + let roleArn: string; + let needsPropagationWait = false; + + try { + const createResult = await iamClient.send( + new CreateRoleCommand({ + RoleName: roleName, + AssumeRolePolicyDocument: trustPolicy, + Description: `Auto-created execution role for AgentCore HTTP gateway: ${gatewayName}`, + Tags: [ + { Key: 'agentcore:created-by', Value: 'agentcore-cli' }, + { Key: 'agentcore:project-name', Value: projectName }, + { Key: 'agentcore:http-gateway-name', Value: gatewayName }, + ], + }) + ); + + roleArn = createResult.Role?.Arn ?? ''; + if (!roleArn) { + throw new Error(`IAM CreateRole succeeded but returned no role ARN for "${roleName}"`); + } + needsPropagationWait = true; + } catch (err: unknown) { + // Handle retry after a previous failed deploy left the role behind + const errName = (err as { name?: string }).name; + if (errName === 'EntityAlreadyExistsException') { + // IAM role already exists — reusing + const existing = await iamClient.send(new GetRoleCommand({ RoleName: roleName })); + roleArn = existing.Role?.Arn ?? ''; + if (!roleArn) { + throw new Error(`Role "${roleName}" already exists but ARN could not be retrieved`); + } + } else { + throw new Error( + `Failed to create IAM role "${roleName}" for HTTP gateway "${gatewayName}": ${err instanceof Error ? err.message : String(err)}` + ); + } + } + + // Re-apply the inline policy (idempotent — covers both new and recovered roles) + await iamClient.send( + new PutRolePolicyCommand({ + RoleName: roleName, + PolicyName: HTTP_GATEWAY_ROLE_POLICY_NAME, + PolicyDocument: policy, + }) + ); + + if (needsPropagationWait) { + // Waiting for IAM role propagation (~15s) + await new Promise(resolve => setTimeout(resolve, 15_000)); + } + + return roleArn; +} + +export async function deleteHttpGatewayRole(region: string, roleArn: string): Promise { + const credentials = getCredentialProvider(); + const iamClient = new IAMClient({ region, credentials }); + const roleName = roleNameFromArn(roleArn); + + try { + // Must delete inline policies before deleting the role + await iamClient.send( + new DeleteRolePolicyCommand({ + RoleName: roleName, + PolicyName: HTTP_GATEWAY_ROLE_POLICY_NAME, + }) + ); + } catch { + // Policy may not exist + } + + try { + await iamClient.send(new DeleteRoleCommand({ RoleName: roleName })); + } catch { + // Role may already be deleted or in use -- best effort + } +} diff --git a/src/cli/operations/deploy/post-deploy-observability.ts b/src/cli/operations/deploy/post-deploy-observability.ts index 295392629..0616a65dc 100644 --- a/src/cli/operations/deploy/post-deploy-observability.ts +++ b/src/cli/operations/deploy/post-deploy-observability.ts @@ -1,4 +1,4 @@ -import { readCliConfig } from '../../../lib/schemas/io/cli-config'; +import { readGlobalConfigSync } from '../../../lib/schemas/io/global-config'; import { enableTransactionSearch } from '../../aws/transaction-search'; export interface TransactionSearchSetupOptions { @@ -31,7 +31,7 @@ export async function setupTransactionSearch( return { success: true }; } - const config = readCliConfig(); + const config = readGlobalConfigSync(); if (config.disableTransactionSearch) { return { success: true }; } diff --git a/src/cli/operations/deploy/post-deploy-online-evals.ts b/src/cli/operations/deploy/post-deploy-online-evals.ts new file mode 100644 index 000000000..d1012898d --- /dev/null +++ b/src/cli/operations/deploy/post-deploy-online-evals.ts @@ -0,0 +1,80 @@ +import type { OnlineEvalDeployedState } from '../../../schema/schemas/deployed-state'; +import type { OnlineEvalConfig } from '../../../schema/schemas/primitives/online-eval-config'; +import { updateOnlineEvalExecutionStatus } from '../../aws/agentcore-control'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface EnableOnlineEvalsOptions { + region: string; + onlineEvalConfigs: OnlineEvalConfig[]; + deployedOnlineEvalConfigs: Record; +} + +export interface OnlineEvalEnableResult { + configName: string; + status: 'enabled' | 'skipped' | 'error'; + error?: string; +} + +export interface EnableOnlineEvalsResult { + results: OnlineEvalEnableResult[]; + hasErrors: boolean; +} + +// ============================================================================ +// Implementation +// ============================================================================ + +/** + * Enable online eval configs that have `enableOnCreate: true` in the project spec. + * + * CFN does not support EnableOnCreate on `AWS::BedrockAgentCore::OnlineEvaluationConfig`, + * so configs always deploy as DISABLED. This post-deploy step enables them via API. + * + * Callers should only pass newly deployed configs (not previously existing ones) to + * avoid re-enabling configs a customer intentionally disabled. + */ +export async function enableOnlineEvalConfigs(options: EnableOnlineEvalsOptions): Promise { + const { region, onlineEvalConfigs, deployedOnlineEvalConfigs } = options; + const results: OnlineEvalEnableResult[] = []; + + for (const config of onlineEvalConfigs) { + // Default enableOnCreate to true when not explicitly set + if (config.enableOnCreate === false) { + results.push({ configName: config.name, status: 'skipped' }); + continue; + } + + const deployed = deployedOnlineEvalConfigs[config.name]; + if (!deployed) { + results.push({ + configName: config.name, + status: 'error', + error: `Online eval config "${config.name}" not found in deployed state`, + }); + continue; + } + + try { + await updateOnlineEvalExecutionStatus({ + region, + onlineEvaluationConfigId: deployed.onlineEvaluationConfigId, + executionStatus: 'ENABLED', + }); + results.push({ configName: config.name, status: 'enabled' }); + } catch (err) { + results.push({ + configName: config.name, + status: 'error', + error: err instanceof Error ? err.message : String(err), + }); + } + } + + return { + results, + hasErrors: results.some(r => r.status === 'error'), + }; +} diff --git a/src/cli/operations/deploy/preflight.ts b/src/cli/operations/deploy/preflight.ts index a3b499369..34a60fdc2 100644 --- a/src/cli/operations/deploy/preflight.ts +++ b/src/cli/operations/deploy/preflight.ts @@ -70,6 +70,7 @@ export async function validateProject(): Promise { cdkProject.validate(); const configIO = new ConfigIO({ baseDir: configRoot }); + const projectSpec = await configIO.readProjectSpec(); const awsTargets = await configIO.resolveAWSDeploymentTargets(); diff --git a/src/cli/operations/deploy/teardown.ts b/src/cli/operations/deploy/teardown.ts index 28a8f326a..2e38f2576 100644 --- a/src/cli/operations/deploy/teardown.ts +++ b/src/cli/operations/deploy/teardown.ts @@ -1,8 +1,11 @@ import { CONFIG_DIR, ConfigIO } from '../../../lib'; import type { AwsDeploymentTarget } from '../../../schema'; import { withTargetRegion } from '../../aws'; +import { deleteConfigurationBundle } from '../../aws/agentcore-config-bundles'; import { CdkToolkitWrapper, silentIoHost } from '../../cdk/toolkit-lib'; import { type DiscoveredStack, findStack } from '../../cloudformation/stack-discovery'; +import { deleteOrphanedABTests } from './post-deploy-ab-tests'; +import { deleteOrphanedHttpGateways } from './post-deploy-http-gateways'; import { StackSelectionStrategy } from '@aws-cdk/toolkit-lib'; import { existsSync } from 'fs'; import { join } from 'path'; @@ -111,6 +114,83 @@ export async function performStackTeardown(targetName: string): Promise dt.target.name === targetName); + + // Clean up imperatively-created resources before stack destruction. + // Ordering: AB tests first (they create rules on gateways), then gateways, then bundles. + // Delegates to the existing orphan-cleanup functions with an empty spec so everything + // is treated as orphaned — reuses stop/poll/delete/role-cleanup logic without duplication. + try { + const deployedState = await configIO.readDeployedState(); + const resources = deployedState.targets?.[targetName]?.resources; + + if (resources?.httpGateways || resources?.configBundles || resources?.abTests) { + let region = deployedTarget?.target.region; + if (!region) { + try { + const targets = await configIO.resolveAWSDeploymentTargets(); + const matchingTarget = targets.find(t => t.name === targetName); + region = matchingTarget?.region; + } catch { + // Can't resolve region + } + } + if (!region) { + console.warn('Warning: Could not determine region for resource cleanup — resources may need manual deletion'); + } + if (region) { + const projectSpec = await configIO.readProjectSpec(); + const emptySpec = { ...projectSpec, abTests: [], httpGateways: [] }; + + if (resources.abTests) { + const abResult = await deleteOrphanedABTests({ + region, + projectSpec: emptySpec, + existingABTests: resources.abTests, + }); + for (const r of abResult.results) { + if (r.status === 'deleted') { + console.log(`Deleted AB test "${r.testName}"`); + } else if (r.error) { + console.warn(`Warning: Failed to delete AB test "${r.testName}": ${r.error}`); + } + } + } + + if (resources.httpGateways) { + const gwResult = await deleteOrphanedHttpGateways({ + region, + projectSpec: emptySpec, + existingHttpGateways: resources.httpGateways, + }); + for (const r of gwResult.results) { + if (r.status === 'deleted') { + console.log(`Deleted HTTP gateway "${r.gatewayName}"`); + } else if (r.error) { + console.warn(`Warning: Failed to delete HTTP gateway "${r.gatewayName}": ${r.error}`); + } + } + } + + for (const [bundleName, bundleState] of Object.entries(resources.configBundles ?? {})) { + try { + await deleteConfigurationBundle({ region, bundleId: bundleState.bundleId }); + console.log(`Deleted config bundle "${bundleName}"`); + } catch (err) { + console.warn( + `Warning: Error during config bundle "${bundleName}" cleanup: ${err instanceof Error ? err.message : String(err)}` + ); + } + } + } + } + } catch (err) { + // Only suppress "file not found" — other errors (corrupt state, permissions) should warn + const msg = err instanceof Error ? err.message : String(err); + if (!msg.includes('ENOENT') && !msg.includes('not found') && !msg.includes('does not exist')) { + console.warn(`Warning: Could not read deployed state for resource cleanup: ${msg}`); + } + } + if (deployedTarget) { await destroyTarget({ target: deployedTarget, cdkProjectDir }); } diff --git a/src/cli/operations/dev/__tests__/config.test.ts b/src/cli/operations/dev/__tests__/config.test.ts index c7a681553..75047d37d 100644 --- a/src/cli/operations/dev/__tests__/config.test.ts +++ b/src/cli/operations/dev/__tests__/config.test.ts @@ -22,6 +22,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project); @@ -50,6 +53,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project); @@ -78,6 +84,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -112,6 +121,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(() => getDevConfig(workingDir, project, undefined, 'NonExistentAgent')).toThrow( @@ -141,6 +153,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(() => getDevConfig(workingDir, project, undefined, 'NodeAgent')).toThrow('Dev mode only supports Python'); @@ -168,6 +183,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -198,6 +216,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; // No configRoot provided @@ -228,6 +249,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -258,6 +282,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -287,6 +314,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -316,6 +346,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -345,6 +378,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -374,6 +410,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -404,6 +443,9 @@ describe('getDevConfig', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const config = getDevConfig(workingDir, project, '/test/project/agentcore'); @@ -447,6 +489,9 @@ describe('getAgentPort', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(getAgentPort(project, 'Agent1', 8080)).toBe(8080); @@ -466,6 +511,9 @@ describe('getAgentPort', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(getAgentPort(project, 'NonExistent', 9000)).toBe(9000); @@ -490,6 +538,9 @@ describe('getDevSupportedAgents', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(getDevSupportedAgents(project)).toEqual([]); @@ -517,6 +568,9 @@ describe('getDevSupportedAgents', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; expect(getDevSupportedAgents(project)).toEqual([]); @@ -552,6 +606,9 @@ describe('getDevSupportedAgents', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const supported = getDevSupportedAgents(project); @@ -581,6 +638,9 @@ describe('getDevSupportedAgents', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const supported = getDevSupportedAgents(project); @@ -618,6 +678,9 @@ describe('getDevSupportedAgents', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const supported = getDevSupportedAgents(project); diff --git a/src/cli/operations/eval/batch-eval-storage.ts b/src/cli/operations/eval/batch-eval-storage.ts new file mode 100644 index 000000000..3145120ba --- /dev/null +++ b/src/cli/operations/eval/batch-eval-storage.ts @@ -0,0 +1,75 @@ +import { findConfigRoot } from '../../../lib'; +import type { EvaluationResults } from '../../aws/agentcore-batch-evaluation'; +import type { BatchEvaluationResult, RunBatchEvaluationCommandResult } from './run-batch-evaluation'; +import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'fs'; +import { join } from 'path'; + +const BATCH_EVAL_RESULTS_DIR = 'batch-eval-results'; + +export interface BatchEvalRunRecord { + name: string; + batchEvaluationId: string; + status: string; + startedAt?: string; + completedAt?: string; + evaluators: string[]; + results: BatchEvaluationResult[]; + evaluationResults?: EvaluationResults; +} + +function getResultsDir(): string { + const configRoot = findConfigRoot(); + if (!configRoot) { + throw new Error('No agentcore project found. Run `agentcore create` first.'); + } + return join(configRoot, '.cli', BATCH_EVAL_RESULTS_DIR); +} + +export function saveBatchEvalRun(result: RunBatchEvaluationCommandResult): string { + const dir = getResultsDir(); + mkdirSync(dir, { recursive: true }); + + const id = result.batchEvaluationId ?? 'unknown'; + const filePath = join(dir, `${id}.json`); + + const record: BatchEvalRunRecord = { + name: result.name ?? 'unknown', + batchEvaluationId: id, + status: result.status ?? 'unknown', + startedAt: result.startedAt, + completedAt: result.completedAt, + evaluators: result.results.map(r => r.evaluatorId), + results: result.results, + evaluationResults: result.evaluationResults, + }; + + writeFileSync(filePath, JSON.stringify(record, null, 2)); + return filePath; +} + +export function loadBatchEvalRun(batchEvaluationId: string): BatchEvalRunRecord { + const dir = getResultsDir(); + const jsonName = batchEvaluationId.endsWith('.json') ? batchEvaluationId : `${batchEvaluationId}.json`; + const filePath = join(dir, jsonName); + + if (!existsSync(filePath)) { + throw new Error(`Batch evaluation run "${batchEvaluationId}" not found at ${filePath}`); + } + + return JSON.parse(readFileSync(filePath, 'utf-8')) as BatchEvalRunRecord; +} + +export function listBatchEvalRuns(): BatchEvalRunRecord[] { + const dir = getResultsDir(); + + if (!existsSync(dir)) { + return []; + } + + const files = readdirSync(dir) + .filter(f => f.endsWith('.json')) + .sort() + .reverse(); + + return files.map(f => JSON.parse(readFileSync(join(dir, f), 'utf-8')) as BatchEvalRunRecord); +} diff --git a/src/cli/operations/eval/run-batch-evaluation.ts b/src/cli/operations/eval/run-batch-evaluation.ts new file mode 100644 index 000000000..0962f4e0a --- /dev/null +++ b/src/cli/operations/eval/run-batch-evaluation.ts @@ -0,0 +1,347 @@ +/** + * Orchestrates running a BatchEvaluation: + * 1. Resolve agent from deployed state (for serviceNames / logGroupNames) + * 2. Build evaluators + dataSourceConfig + * 3. Call StartBatchEvaluation + * 4. Poll GetBatchEvaluation until terminal status + * 5. Return results + */ +import { ConfigIO } from '../../../lib'; +import type { DeployedState } from '../../../schema'; +import { generateClientToken, getBatchEvaluation, startBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; +import type { + CloudWatchFilterConfig, + EvaluationResults, + GetBatchEvaluationResult, + SessionMetadataEntry, +} from '../../aws/agentcore-batch-evaluation'; +import { detectRegion } from '../../aws/region'; +import { ExecLogger } from '../../logging/exec-logger'; +import { CloudWatchLogsClient, GetLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface RunBatchEvaluationOptions { + /** Agent name (from project config) */ + agent: string; + /** Evaluator IDs (Builtin.* or custom) */ + evaluators: string[]; + /** Optional name for the batch evaluation */ + name?: string; + /** Region override */ + region?: string; + /** Specific session IDs to evaluate (optional — filters CloudWatch source) */ + sessionIds?: string[]; + /** Lookback window in days (optional — filters CloudWatch source by time range) */ + lookbackDays?: number; + /** Session metadata with ground truth (assertions, expected trajectory, turns) */ + sessionMetadata?: SessionMetadataEntry[]; + /** Poll interval in ms */ + pollIntervalMs?: number; + /** Progress callback */ + onProgress?: (status: string, message: string) => void; + /** Called once the batch evaluation has been created, with ID and region for cancellation */ + onStarted?: (info: { batchEvaluationId: string; region: string }) => void; +} + +export interface BatchEvaluationResult { + evaluatorId: string; + score?: number; + label?: string; + explanation?: string; + error?: string; +} + +export interface RunBatchEvaluationCommandResult { + success: boolean; + error?: string; + batchEvaluationId?: string; + name?: string; + status?: string; + results: BatchEvaluationResult[]; + evaluationResults?: EvaluationResults; + startedAt?: string; + completedAt?: string; + logFilePath?: string; +} + +// ============================================================================ +// Constants +// ============================================================================ + +const DEFAULT_POLL_INTERVAL_MS = 10_000; +const TERMINAL_STATUSES = new Set(['COMPLETED', 'COMPLETED_WITH_ERRORS', 'FAILED', 'STOPPED', 'CANCELLED']); + +// ============================================================================ +// Implementation +// ============================================================================ + +export async function runBatchEvaluationCommand( + options: RunBatchEvaluationOptions +): Promise { + const { agent, evaluators, pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, onProgress } = options; + + let logger: ExecLogger | undefined; + try { + logger = new ExecLogger({ command: 'batch-evaluate' }); + } catch { + // Non-fatal + } + + try { + // 1. Read project config and deployed state + logger?.startStep('Load project config'); + const configIO = new ConfigIO(); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + + // Use the deployed target region (from aws-targets) rather than generic detectRegion() + const targetRegion = awsTargets.length > 0 ? awsTargets[0]!.region : undefined; + const { region: detectedRegion } = await detectRegion(); + const region = options.region ?? targetRegion ?? detectedRegion; + const stage = process.env.AGENTCORE_STAGE?.toLowerCase() ?? 'prod'; + logger?.log(`Region: ${region}, Stage: ${stage}`); + logger?.endStep('success'); + + // 2. Resolve agent from deployed state + logger?.startStep('Resolve agent'); + const agentState = resolveAgentState(deployedState, agent); + if (!agentState) { + const error = `Agent "${agent}" not deployed. Run \`agentcore deploy\` first.`; + logger?.log(error, 'error'); + logger?.endStep('error', error); + logger?.finalize(false); + return { success: false, error, results: [], logFilePath: logger?.logFilePath }; + } + + const runtimeId = agentState.runtimeId; + // Service name in CW logs uses project_agent format without the CDK hash suffix + const serviceName = `${projectSpec.name}_${agent}.DEFAULT`; + const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-DEFAULT`; + + logger?.log(`Agent: ${agent} (runtime: ${runtimeId})`); + logger?.log(`Service name: ${serviceName}`); + logger?.log(`Log group: ${runtimeLogGroup}`); + logger?.endStep('success'); + + // 2b. Resolve evaluator names to deployed IDs + // Handles: "Builtin.Correctness", "arn:aws:...:evaluator/Builtin.Correctness", or custom evaluator names + const targetResources = Object.values(deployedState.targets).find(t => t.resources?.runtimes?.[agent])?.resources; + const resolvedEvaluators = evaluators.map(name => { + // Extract short name from ARN if passed (e.g. "arn:aws:bedrock-agentcore:::evaluator/Builtin.Correctness" → "Builtin.Correctness") + const shortName = name.includes('evaluator/') ? name.split('evaluator/').pop()! : name; + if (shortName.startsWith('Builtin.')) return shortName; + const deployed = targetResources?.evaluators?.[shortName]; + if (deployed?.evaluatorId) { + logger?.log(`Resolved evaluator "${shortName}" → ${deployed.evaluatorId}`); + return deployed.evaluatorId; + } + logger?.log(`Evaluator "${shortName}" not found in deployed state, passing as-is`, 'warn'); + return shortName; + }); + + // 3. Start the batch evaluation + logger?.startStep('Start batch evaluation'); + let evalName: string; + if (options.name) { + if (!/^[a-zA-Z][a-zA-Z0-9_]{0,47}$/.test(options.name)) { + return { + success: false, + error: `Batch evaluation name must start with a letter and contain only letters, digits, and underscores (max 48 chars). Got: "${options.name}"`, + results: [], + logFilePath: logger?.logFilePath, + }; + } + evalName = options.name; + } else { + evalName = `${projectSpec.name}_${agent}_${Date.now()}`.replace(/[^a-zA-Z0-9_]/g, '_').slice(0, 48); + } + + onProgress?.('starting', `Starting batch evaluation "${evalName}"...`); + + // Build optional filter config for CloudWatch filtering + // API requires either sessionIds OR timeRange, not both — sessionIds takes precedence + // Merge explicit sessionIds with any sessionIds from sessionMetadata (deduplicated) + const metadataSessionIds = options.sessionMetadata?.map(m => m.sessionId).filter(Boolean) ?? []; + const explicitSessionIds = options.sessionIds ?? []; + const effectiveSessionIds = [...new Set([...explicitSessionIds, ...metadataSessionIds])]; + const hasSessionIds = effectiveSessionIds.length > 0; + + const filterConfig: CloudWatchFilterConfig | undefined = (() => { + if (hasSessionIds) { + return { sessionIds: effectiveSessionIds }; + } + if (options.lookbackDays) { + const endTime = new Date().toISOString(); + const startTime = new Date(Date.now() - options.lookbackDays * 24 * 60 * 60 * 1000).toISOString(); + return { timeRange: { startTime, endTime } }; + } + return undefined; + })(); + + const startPayload = { + region, + name: evalName, + evaluators: resolvedEvaluators.map(id => ({ evaluatorId: id })), + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: [serviceName], + logGroupNames: [runtimeLogGroup], + ...(filterConfig ? { filterConfig } : {}), + }, + }, + ...(options.sessionMetadata && options.sessionMetadata.length > 0 + ? { evaluationMetadata: { sessionMetadata: options.sessionMetadata } } + : {}), + clientToken: generateClientToken(), + }; + + logger?.log(`Request payload:\n${JSON.stringify(startPayload, null, 2)}`); + + const startResult = await startBatchEvaluation(startPayload); + + logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); + logger?.endStep('success'); + + onProgress?.('running', `Batch evaluation started (ID: ${startResult.batchEvaluationId})`); + onProgress?.('running', 'This may take a few minutes...'); + options.onStarted?.({ batchEvaluationId: startResult.batchEvaluationId, region }); + + // 4. Poll for completion + logger?.startStep('Poll for completion'); + let current: GetBatchEvaluationResult = { + batchEvaluationId: startResult.batchEvaluationId, + batchEvaluationArn: startResult.batchEvaluationArn, + name: startResult.name, + status: startResult.status, + }; + + while (!TERMINAL_STATUSES.has(current.status)) { + await sleep(pollIntervalMs); + + current = await getBatchEvaluation({ + region, + batchEvaluationId: startResult.batchEvaluationId, + }); + + onProgress?.('polling', `Status: ${current.status}`); + logger?.log(`Poll status: ${current.status}`); + } + + if (current.status !== 'COMPLETED' && current.status !== 'COMPLETED_WITH_ERRORS') { + const reasons = current.errorDetails?.join('; ') ?? ''; + const error = `Batch evaluation finished with status: ${current.status}${reasons ? ` — ${reasons}` : ''}`; + logger?.log(error, 'error'); + logger?.log(`Full poll response:\n${JSON.stringify(current, null, 2)}`, 'error'); + logger?.endStep('error', error); + logger?.finalize(false); + return { + success: false, + error, + batchEvaluationId: startResult.batchEvaluationId, + name: evalName, + status: current.status, + results: [], + logFilePath: logger?.logFilePath, + }; + } + + logger?.endStep('success'); + + // 5. Fetch results from CloudWatch output logs + logger?.startStep('Fetch results'); + let results: BatchEvaluationResult[] = []; + + const cwDest = current.outputConfig?.cloudWatchConfig; + if (cwDest) { + try { + results = await fetchResultsFromCloudWatch(region, cwDest.logGroupName, cwDest.logStreamName); + logger?.log(`Fetched ${results.length} result(s) from CloudWatch`); + } catch (cwErr: unknown) { + logger?.log(`Failed to fetch CW results: ${cwErr instanceof Error ? cwErr.message : String(cwErr)}`, 'error'); + } + } + + logger?.endStep('success'); + + logger?.log(`Results: ${JSON.stringify(results, null, 2)}`); + logger?.finalize(true); + + return { + success: true, + batchEvaluationId: startResult.batchEvaluationId, + name: evalName, + status: current.status, + results, + evaluationResults: current.evaluationResults, + startedAt: current.createdAt, + completedAt: current.updatedAt ?? new Date().toISOString(), + logFilePath: logger?.logFilePath, + }; + } catch (err) { + const error = err instanceof Error ? err.message : String(err); + logger?.log(error, 'error'); + logger?.finalize(false); + return { success: false, error, results: [], logFilePath: logger?.logFilePath }; + } +} + +// ============================================================================ +// Helpers +// ============================================================================ + +function resolveAgentState( + deployedState: DeployedState, + agentName: string +): { runtimeId: string; runtimeArn: string; roleArn?: string } | undefined { + for (const target of Object.values(deployedState.targets)) { + const agent = target.resources?.runtimes?.[agentName]; + if (agent) return agent; + } + return undefined; +} + +async function fetchResultsFromCloudWatch( + region: string, + logGroupName: string, + logStreamName: string +): Promise { + const client = new CloudWatchLogsClient({ region }); + const response = await client.send( + new GetLogEventsCommand({ + logGroupName, + logStreamName, + startFromHead: true, + }) + ); + + const results: BatchEvaluationResult[] = []; + for (const event of response.events ?? []) { + if (!event.message) continue; + try { + const parsed = JSON.parse(event.message) as Record; + const attrs = (parsed.attributes ?? {}) as Record; + const evaluatorId = attrs['gen_ai.evaluation.name'] as string | undefined; + if (!evaluatorId) continue; + + results.push({ + evaluatorId, + score: attrs['gen_ai.evaluation.score.value'] as number | undefined, + label: attrs['gen_ai.evaluation.score.label'] as string | undefined, + explanation: attrs['gen_ai.evaluation.explanation'] as string | undefined, + }); + } catch { + // Skip non-JSON or malformed entries + } + } + return results; +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/src/cli/operations/eval/run-eval.ts b/src/cli/operations/eval/run-eval.ts index d130438ff..90cd519c7 100644 --- a/src/cli/operations/eval/run-eval.ts +++ b/src/cli/operations/eval/run-eval.ts @@ -1,12 +1,12 @@ import { getCredentialProvider } from '../../aws'; import { evaluate } from '../../aws/agentcore'; +import type { EvaluationReferenceInput } from '../../aws/agentcore'; import { getEvaluator } from '../../aws/agentcore-control'; import { DEFAULT_ENDPOINT_NAME } from '../../constants'; import type { DeployedProjectConfig } from '../resolve-agent'; import { loadDeployedProjectConfig, resolveAgent } from '../resolve-agent'; import { generateFilename, saveEvalRun } from './storage'; import type { EvalEvaluatorResult, EvalRunResult, EvalSessionScore, RunEvalOptions, SessionInfo } from './types'; -import type { EvaluationReferenceInput } from '@aws-sdk/client-bedrock-agentcore'; import { CloudWatchLogsClient, GetQueryResultsCommand, StartQueryCommand } from '@aws-sdk/client-cloudwatch-logs'; import type { ResultField } from '@aws-sdk/client-cloudwatch-logs'; import type { DocumentType } from '@smithy/types'; diff --git a/src/cli/operations/fetch-access/list-gateways.ts b/src/cli/operations/fetch-access/list-gateways.ts index 03102ff26..0e70559ce 100644 --- a/src/cli/operations/fetch-access/list-gateways.ts +++ b/src/cli/operations/fetch-access/list-gateways.ts @@ -30,5 +30,17 @@ export async function listGateways( }); } + // Include HTTP gateways (auto-created for A/B testing) + const deployedHttpGateways = target.resources?.httpGateways ?? {}; + for (const httpGateway of projectSpec.httpGateways ?? []) { + const deployed = deployedHttpGateways[httpGateway.name]; + if (!deployed?.gatewayArn) continue; + + gateways.push({ + name: httpGateway.name, + authType: 'AWS_IAM', + }); + } + return gateways; } diff --git a/src/cli/operations/recommendation/__tests__/apply-to-bundle.test.ts b/src/cli/operations/recommendation/__tests__/apply-to-bundle.test.ts new file mode 100644 index 000000000..5e0fb668a --- /dev/null +++ b/src/cli/operations/recommendation/__tests__/apply-to-bundle.test.ts @@ -0,0 +1,199 @@ +import type { ConfigIO } from '../../../../lib'; +import type { RecommendationResult } from '../../../aws/agentcore-recommendation'; +import { applyRecommendationToBundle } from '../apply-to-bundle'; +import { describe, expect, it, vi } from 'vitest'; + +const { RUNTIME_ARN, BUNDLE_ARN, NEW_VERSION_ID } = vi.hoisted(() => ({ + RUNTIME_ARN: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/myAgent-abc123', + BUNDLE_ARN: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:configuration-bundle/MyBundle-xyz789', + NEW_VERSION_ID: 'v2-recommendation', +})); + +vi.mock('../../../aws/agentcore-config-bundles', () => ({ + getConfigurationBundleVersion: vi.fn().mockResolvedValue({ + bundleArn: BUNDLE_ARN, + bundleId: 'MyBundle-xyz789', + bundleName: 'MyBundle', + versionId: NEW_VERSION_ID, + components: { + [RUNTIME_ARN]: { + configuration: { + systemPrompt: 'new improved prompt', + temperature: 0.8, + }, + }, + }, + lineageMetadata: { + commitMessage: 'Recommendation applied', + }, + createdAt: '2026-04-12T00:00:00Z', + versionCreatedAt: '2026-04-12T00:00:00Z', + }), +})); + +function makeConfigIO(spec: Record, deployedState?: Record) { + const writeSpecSpy = vi.fn().mockResolvedValue(undefined); + const writeDeployedStateSpy = vi.fn().mockResolvedValue(undefined); + const configIO = { + readProjectSpec: vi.fn().mockResolvedValue(spec), + writeProjectSpec: writeSpecSpy, + readDeployedState: vi.fn().mockResolvedValue( + deployedState ?? { + targets: { + default: { + resources: { + configBundles: { + MyBundle: { + bundleId: 'MyBundle-xyz789', + bundleArn: BUNDLE_ARN, + versionId: 'v1', + }, + }, + }, + }, + }, + } + ), + writeDeployedState: writeDeployedStateSpy, + } as unknown as ConfigIO; + return { configIO, writeSpecSpy, writeDeployedStateSpy }; +} + +function makeSpec(systemPrompt = 'old prompt') { + return { + name: 'testProject', + configBundles: [ + { + name: 'MyBundle', + type: 'ConfigurationBundle', + components: { + [RUNTIME_ARN]: { + configuration: { + systemPrompt, + temperature: 0.7, + }, + }, + }, + branchName: 'main', + commitMessage: 'Initial', + }, + ], + }; +} + +describe('applyRecommendationToBundle', () => { + it('syncs local config from server-created version by bundle name', async () => { + const spec = makeSpec(); + const { configIO, writeSpecSpy, writeDeployedStateSpy } = makeConfigIO(spec); + + const result: RecommendationResult = { + systemPromptRecommendationResult: { + recommendedSystemPrompt: 'new improved prompt', + configurationBundle: { bundleArn: BUNDLE_ARN, versionId: NEW_VERSION_ID }, + }, + }; + + const applyResult = await applyRecommendationToBundle( + { bundleName: 'MyBundle', result, region: 'us-east-1' }, + configIO + ); + + expect(applyResult.success).toBe(true); + expect(applyResult.newVersionId).toBe(NEW_VERSION_ID); + + // Verify spec was written with server components + expect(writeSpecSpy).toHaveBeenCalledTimes(1); + const writtenSpec = writeSpecSpy.mock.calls[0]![0]; + expect(writtenSpec.configBundles[0].components[RUNTIME_ARN].configuration.systemPrompt).toBe('new improved prompt'); + // Server version has temperature 0.8 (not local 0.7) + expect(writtenSpec.configBundles[0].components[RUNTIME_ARN].configuration.temperature).toBe(0.8); + // Commit message from lineage metadata + expect(writtenSpec.configBundles[0].commitMessage).toBe('Recommendation applied'); + + // Verify deployed state was updated with new version + expect(writeDeployedStateSpy).toHaveBeenCalledTimes(1); + const writtenState = writeDeployedStateSpy.mock.calls[0]![0]; + expect(writtenState.targets.default.resources.configBundles.MyBundle.versionId).toBe(NEW_VERSION_ID); + }); + + it('syncs local config by bundle ARN via deployed state', async () => { + const spec = makeSpec(); + const { configIO } = makeConfigIO(spec); + + const result: RecommendationResult = { + systemPromptRecommendationResult: { + recommendedSystemPrompt: 'ARN-resolved prompt', + configurationBundle: { bundleArn: BUNDLE_ARN, versionId: NEW_VERSION_ID }, + }, + }; + + const applyResult = await applyRecommendationToBundle( + { bundleArn: BUNDLE_ARN, result, region: 'us-east-1' }, + configIO + ); + + expect(applyResult.success).toBe(true); + expect(applyResult.newVersionId).toBe(NEW_VERSION_ID); + }); + + it('syncs tool description recommendation result', async () => { + const spec = makeSpec(); + const { configIO } = makeConfigIO(spec); + + const result: RecommendationResult = { + toolDescriptionRecommendationResult: { + tools: [{ toolName: 'search', recommendedToolDescription: 'new desc' }], + configurationBundle: { bundleArn: BUNDLE_ARN, versionId: NEW_VERSION_ID }, + }, + }; + + const applyResult = await applyRecommendationToBundle( + { bundleName: 'MyBundle', result, region: 'us-east-1' }, + configIO + ); + + expect(applyResult.success).toBe(true); + expect(applyResult.newVersionId).toBe(NEW_VERSION_ID); + }); + + it('returns error when result has no configurationBundle', async () => { + const spec = makeSpec(); + const { configIO, writeSpecSpy } = makeConfigIO(spec); + + const result: RecommendationResult = { + systemPromptRecommendationResult: { + recommendedSystemPrompt: 'new prompt', + }, + }; + + const applyResult = await applyRecommendationToBundle( + { bundleName: 'MyBundle', result, region: 'us-east-1' }, + configIO + ); + + expect(applyResult.success).toBe(false); + expect(applyResult.error).toContain('does not contain a new config bundle version'); + expect(writeSpecSpy).not.toHaveBeenCalled(); + }); + + it('returns error when bundle not found in agentcore.json', async () => { + const spec = makeSpec(); + const { configIO, writeSpecSpy } = makeConfigIO(spec); + + const result: RecommendationResult = { + systemPromptRecommendationResult: { + recommendedSystemPrompt: 'new', + configurationBundle: { bundleArn: BUNDLE_ARN, versionId: NEW_VERSION_ID }, + }, + }; + + const applyResult = await applyRecommendationToBundle( + { bundleName: 'NonExistent', result, region: 'us-east-1' }, + configIO + ); + + expect(applyResult.success).toBe(false); + expect(applyResult.error).toContain('NonExistent'); + expect(writeSpecSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts b/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts new file mode 100644 index 000000000..4395edd23 --- /dev/null +++ b/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts @@ -0,0 +1,224 @@ +import { fetchSessionSpans } from '../fetch-session-spans'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockSearchLogs = vi.fn(); + +vi.mock('../../../aws/cloudwatch', () => ({ + searchLogs: (...args: unknown[]) => mockSearchLogs(...args), +})); + +/** + * Helper: create an async generator from an array of log events. + */ +async function* fakeLogStream(events: { timestamp: number; message: string }[]) { + for (const e of events) { + yield await Promise.resolve(e); + } +} + +/** Helper: create an async generator that throws on first iteration. */ +// eslint-disable-next-line require-yield +async function* fakeErrorStream(error: Error): AsyncGenerator<{ timestamp: number; message: string }> { + await Promise.resolve(); + throw error; +} + +const SESSION_ID = 'sess-abc-123'; + +function makeSpanRecord(traceId: string, spanId: string) { + return { + timestamp: Date.now(), + message: JSON.stringify({ + traceId, + spanId, + scope: { name: 'strands.telemetry.tracer' }, + attributes: { 'session.id': SESSION_ID }, + body: {}, + }), + }; +} + +function makeLogRecord(traceId: string, spanId: string, sessionId: string) { + return { + timestamp: Date.now(), + message: JSON.stringify({ + traceId, + spanId, + attributes: { 'session.id': sessionId }, + body: { + input: { messages: [{ content: { content: 'hello' }, role: 'user' }] }, + output: { messages: [{ content: { content: 'hi' }, role: 'assistant' }] }, + }, + }), + }; +} + +describe('fetchSessionSpans', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('combines span records and log records for the same session', async () => { + const spanEvents = [makeSpanRecord('trace1', 'span1'), makeSpanRecord('trace1', 'span2')]; + const logEvents = [makeLogRecord('trace1', 'span3', SESSION_ID)]; + + // First call = aws/spans, second call = runtime log group + mockSearchLogs.mockReturnValueOnce(fakeLogStream(spanEvents)).mockReturnValueOnce(fakeLogStream(logEvents)); + + const result = await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'myproject_MyAgent-QMd093Gl4O', + sessionId: SESSION_ID, + }); + + expect(result.spans).toHaveLength(3); + expect(result.spanRecordCount).toBe(2); + expect(result.logRecordCount).toBe(1); + }); + + it('filters out log records from other sessions', async () => { + const spanEvents = [makeSpanRecord('trace1', 'span1')]; + const logEvents = [ + makeLogRecord('trace1', 'span2', SESSION_ID), + makeLogRecord('trace1', 'span3', 'other-session-id'), + ]; + + mockSearchLogs.mockReturnValueOnce(fakeLogStream(spanEvents)).mockReturnValueOnce(fakeLogStream(logEvents)); + + const result = await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'myproject_MyAgent-QMd093Gl4O', + sessionId: SESSION_ID, + }); + + expect(result.spans).toHaveLength(2); + expect(result.logRecordCount).toBe(1); + }); + + it('returns empty spans when no records found', async () => { + mockSearchLogs.mockReturnValueOnce(fakeLogStream([])).mockReturnValueOnce(fakeLogStream([])); + + const result = await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'myproject_MyAgent-QMd093Gl4O', + sessionId: SESSION_ID, + }); + + expect(result.spans).toHaveLength(0); + expect(result.spanRecordCount).toBe(0); + expect(result.logRecordCount).toBe(0); + }); + + it('handles ResourceNotFoundException gracefully (log group does not exist)', async () => { + // Spans log group works, runtime log group does not exist + mockSearchLogs + .mockReturnValueOnce(fakeLogStream([makeSpanRecord('t1', 's1')])) + .mockReturnValueOnce( + fakeErrorStream(new Error('ResourceNotFoundException: The specified log group does not exist')) + ); + + const result = await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'myproject_MyAgent-QMd093Gl4O', + sessionId: SESSION_ID, + }); + + // Should still return span records from aws/spans + expect(result.spans).toHaveLength(1); + expect(result.spanRecordCount).toBe(1); + expect(result.logRecordCount).toBe(0); + }); + + it('rethrows non-ResourceNotFoundException errors', async () => { + mockSearchLogs + .mockReturnValueOnce(fakeLogStream([])) + .mockReturnValueOnce(fakeErrorStream(new Error('AccessDeniedException: Not authorized'))); + + await expect( + fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'myproject_MyAgent-QMd093Gl4O', + sessionId: SESSION_ID, + }) + ).rejects.toThrow('AccessDeniedException'); + }); + + it('skips unparseable log messages', async () => { + const spanEvents = [{ timestamp: Date.now(), message: 'not-valid-json' }, makeSpanRecord('trace1', 'span1')]; + + mockSearchLogs.mockReturnValueOnce(fakeLogStream(spanEvents)).mockReturnValueOnce(fakeLogStream([])); + + const result = await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'myproject_MyAgent-QMd093Gl4O', + sessionId: SESSION_ID, + }); + + expect(result.spans).toHaveLength(1); + }); + + it('uses correct log group names', async () => { + mockSearchLogs.mockReturnValueOnce(fakeLogStream([])).mockReturnValueOnce(fakeLogStream([])); + + await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'myproject_MyAgent-QMd093Gl4O', + sessionId: SESSION_ID, + lookbackDays: 3, + }); + + expect(mockSearchLogs).toHaveBeenCalledTimes(2); + + // First call: aws/spans + const spanCall = mockSearchLogs.mock.calls[0]![0]; + expect(spanCall.logGroupName).toBe('aws/spans'); + expect(spanCall.filterPattern).toContain(SESSION_ID); + + // Second call: runtime log group + const logCall = mockSearchLogs.mock.calls[1]![0]; + expect(logCall.logGroupName).toBe('/aws/bedrock-agentcore/runtimes/myproject_MyAgent-QMd093Gl4O-DEFAULT'); + expect(logCall.filterPattern).toContain('"body" "input"'); + }); + + it('calls onProgress callback', async () => { + mockSearchLogs + .mockReturnValueOnce(fakeLogStream([makeSpanRecord('t1', 's1')])) + .mockReturnValueOnce(fakeLogStream([])); + + const progress: string[] = []; + await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'rt-123', + sessionId: SESSION_ID, + onProgress: msg => progress.push(msg), + }); + + expect(progress.length).toBeGreaterThan(0); + expect(progress.some(m => m.includes('span records'))).toBe(true); + }); + + it('matches log records by session ID in body (fallback)', async () => { + // Log record with session ID only in body, not in attributes + const logEvent = { + timestamp: Date.now(), + message: JSON.stringify({ + traceId: 'trace1', + spanId: 'span1', + attributes: {}, + body: { + input: { messages: [{ content: { content: `session ${SESSION_ID} data` }, role: 'user' }] }, + }, + }), + }; + + mockSearchLogs.mockReturnValueOnce(fakeLogStream([])).mockReturnValueOnce(fakeLogStream([logEvent])); + + const result = await fetchSessionSpans({ + region: 'us-east-1', + runtimeId: 'rt-123', + sessionId: SESSION_ID, + }); + + expect(result.logRecordCount).toBe(1); + }); +}); diff --git a/src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts b/src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts new file mode 100644 index 000000000..f6a60b6e8 --- /dev/null +++ b/src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts @@ -0,0 +1,134 @@ +import { listAllRecommendations, loadRecommendationRun, saveRecommendationRun } from '../recommendation-storage'; +import type { RunRecommendationCommandResult } from '../types'; +import { existsSync, mkdirSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockFindConfigRoot = vi.fn(); + +vi.mock('../../../../lib', () => ({ + findConfigRoot: () => mockFindConfigRoot(), +})); + +function makeTmpDir(): string { + const dir = join(tmpdir(), `recommendation-storage-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); + mkdirSync(dir, { recursive: true }); + return dir; +} + +function makeResult(overrides: Partial = {}): RunRecommendationCommandResult { + return { + success: true, + recommendationId: 'rec-123', + status: 'COMPLETED', + startedAt: '2026-03-24T10:00:00.000Z', + completedAt: '2026-03-24T10:05:00.000Z', + result: { + systemPromptRecommendationResult: { + recommendedSystemPrompt: 'You are an expert booking assistant.', + }, + }, + ...overrides, + }; +} + +describe('recommendation-storage', () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = makeTmpDir(); + mockFindConfigRoot.mockReturnValue(tmpDir); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + vi.clearAllMocks(); + }); + + describe('saveRecommendationRun', () => { + it('creates directory and writes JSON file', () => { + const result = makeResult(); + const filePath = saveRecommendationRun('rec-123', result, 'SYSTEM_PROMPT_RECOMMENDATION', 'booking-agent', [ + 'Builtin.Helpfulness', + ]); + + expect(filePath).toContain('recommendations'); + expect(filePath).toContain('rec-123.json'); + expect(existsSync(filePath)).toBe(true); + }); + + it('writes valid JSON that can be read back', () => { + const result = makeResult(); + saveRecommendationRun('rec-123', result, 'SYSTEM_PROMPT_RECOMMENDATION', 'booking-agent', [ + 'Builtin.Helpfulness', + ]); + + const loaded = loadRecommendationRun('rec-123'); + expect(loaded.recommendationId).toBe('rec-123'); + expect(loaded.type).toBe('SYSTEM_PROMPT_RECOMMENDATION'); + expect(loaded.agent).toBe('booking-agent'); + expect(loaded.evaluators).toEqual(['Builtin.Helpfulness']); + expect(loaded.result?.systemPromptRecommendationResult?.recommendedSystemPrompt).toBe( + 'You are an expert booking assistant.' + ); + }); + }); + + describe('loadRecommendationRun', () => { + it('loads a previously saved recommendation', () => { + saveRecommendationRun('rec-123', makeResult(), 'SYSTEM_PROMPT_RECOMMENDATION', 'agent', ['eval']); + const loaded = loadRecommendationRun('rec-123'); + expect(loaded.status).toBe('COMPLETED'); + }); + + it('accepts filename with .json extension', () => { + saveRecommendationRun('rec-123', makeResult(), 'SYSTEM_PROMPT_RECOMMENDATION', 'agent', ['eval']); + const loaded = loadRecommendationRun('rec-123.json'); + expect(loaded.recommendationId).toBe('rec-123'); + }); + + it('throws for a non-existent recommendation', () => { + expect(() => loadRecommendationRun('nonexistent')).toThrow('not found'); + }); + }); + + describe('listAllRecommendations', () => { + it('returns empty array when no recommendations exist', () => { + expect(listAllRecommendations()).toEqual([]); + }); + + it('returns saved recommendations in reverse order', () => { + saveRecommendationRun( + 'rec-aaa', + makeResult({ recommendationId: 'rec-aaa' }), + 'SYSTEM_PROMPT_RECOMMENDATION', + 'agent', + ['eval'] + ); + saveRecommendationRun( + 'rec-zzz', + makeResult({ recommendationId: 'rec-zzz' }), + 'TOOL_DESCRIPTION_RECOMMENDATION', + 'agent', + ['eval'] + ); + + const all = listAllRecommendations(); + expect(all).toHaveLength(2); + expect(all[0]!.recommendationId).toBe('rec-zzz'); + expect(all[1]!.recommendationId).toBe('rec-aaa'); + }); + }); + + describe('error when no config root', () => { + it('throws when findConfigRoot returns null', () => { + mockFindConfigRoot.mockReturnValue(null); + expect(() => + saveRecommendationRun('rec-123', makeResult(), 'SYSTEM_PROMPT_RECOMMENDATION', 'agent', ['eval']) + ).toThrow('No agentcore project found'); + }); + }); +}); diff --git a/src/cli/operations/recommendation/__tests__/run-recommendation.test.ts b/src/cli/operations/recommendation/__tests__/run-recommendation.test.ts new file mode 100644 index 000000000..b26a59b32 --- /dev/null +++ b/src/cli/operations/recommendation/__tests__/run-recommendation.test.ts @@ -0,0 +1,700 @@ +import { runRecommendationCommand } from '../run-recommendation'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// Mock dependencies — paths are relative to the file under test (run-recommendation.ts) +const mockReadProjectSpec = vi.fn().mockResolvedValue({ name: 'test-project' }); +const mockReadDeployedState = vi.fn().mockResolvedValue({ + targets: { + default: { + resources: { + runtimes: { + MyAgent: { + runtimeId: 'rt-abc123', + runtimeArn: 'arn:aws:bedrock:us-east-1:998846730471:agent-runtime/rt-abc123', + }, + }, + evaluators: { + MyEvaluator: { + evaluatorArn: 'arn:aws:bedrock-agentcore:us-east-1:998846730471:evaluator/my-eval-abc1234567', + }, + }, + }, + }, + }, +}); + +vi.mock('../../../../lib', () => ({ + ConfigIO: class { + readProjectSpec = mockReadProjectSpec; + readDeployedState = mockReadDeployedState; + resolveAWSDeploymentTargets = vi.fn().mockResolvedValue([{ region: 'us-east-1' }]); + }, +})); + +vi.mock('../../../aws/region', () => ({ + detectRegion: vi.fn().mockResolvedValue({ region: 'us-east-1' }), +})); + +const mockStartRecommendation = vi.fn(); +const mockGetRecommendation = vi.fn(); + +vi.mock('../../../aws/agentcore-recommendation', () => ({ + startRecommendation: (...args: unknown[]) => mockStartRecommendation(...args), + getRecommendation: (...args: unknown[]) => mockGetRecommendation(...args), +})); + +const mockFetchSessionSpans = vi.fn(); +vi.mock('../fetch-session-spans', () => ({ + fetchSessionSpans: (...args: unknown[]) => mockFetchSessionSpans(...args), +})); + +const mockReadFileSync = vi.fn(); +vi.mock('fs', async () => { + const actual = await vi.importActual('fs'); + return { ...actual, readFileSync: (...args: unknown[]) => mockReadFileSync(...args) }; +}); + +describe('runRecommendationCommand', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('returns error when agent is not deployed', async () => { + mockReadDeployedState.mockResolvedValueOnce({ targets: {} }); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'NonExistentAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'You are helpful.', + traceSource: 'cloudwatch', + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('NonExistentAgent'); + expect(result.error).toContain('not deployed'); + }); + + it('returns error when evaluator cannot be resolved', async () => { + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['UnknownEvaluator'], + inputSource: 'inline', + inlineContent: 'You are helpful.', + traceSource: 'cloudwatch', + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('UnknownEvaluator'); + expect(result.error).toContain('not found'); + }); + + it('returns result on COMPLETED status', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-001', + recommendationArn: 'arn:rec-001', + name: 'test-rec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-001', + status: 'COMPLETED', + createdAt: '2026-03-30T00:00:00Z', + completedAt: '2026-03-30T00:01:00Z', + recommendationResult: { + systemPromptRecommendationResult: { + recommendedSystemPrompt: 'Optimized prompt', + explanation: 'Made clearer', + }, + }, + }); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'You are helpful.', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + expect(result.success).toBe(true); + expect(result.recommendationId).toBe('rec-001'); + expect(result.status).toBe('COMPLETED'); + expect(result.result?.systemPromptRecommendationResult?.recommendedSystemPrompt).toBe('Optimized prompt'); + }); + + it('returns error on FAILED status', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-002', + recommendationArn: 'arn:rec-002', + name: 'test-rec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-002', + status: 'FAILED', + }); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'You are helpful.', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('FAILED'); + expect(result.recommendationId).toBe('rec-002'); + }); + + it('expands Builtin.* evaluator to full ARN in startRecommendation call', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-003', + status: 'COMPLETED', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-003', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const evaluators = callArgs.recommendationConfig.systemPromptRecommendationConfig.evaluationConfig.evaluators; + expect(evaluators[0].evaluatorArn).toBe('arn:aws:bedrock-agentcore:::evaluator/Builtin.Toxicity'); + }); + + it('uses account ID from runtime ARN in log group ARN', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-004', + status: 'COMPLETED', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-004', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const logGroupArn = + callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces.cloudwatchLogs.logGroupArns[0]; + expect(logGroupArn).toContain(':998846730471:'); + expect(logGroupArn).not.toContain(':*:'); + }); + + it('resolves custom evaluator from deployed state', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-005', + status: 'COMPLETED', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-005', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['MyEvaluator'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const evaluators = callArgs.recommendationConfig.systemPromptRecommendationConfig.evaluationConfig.evaluators; + expect(evaluators[0].evaluatorArn).toBe( + 'arn:aws:bedrock-agentcore:us-east-1:998846730471:evaluator/my-eval-abc1234567' + ); + }); + + it('builds TOOL_DESCRIPTION_RECOMMENDATION config with toolName:description pairs', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-006', + status: 'COMPLETED', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-006', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'TOOL_DESCRIPTION_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + tools: ['search:Search the web for info', 'calculate:Perform math calculations'], + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const tools = + callArgs.recommendationConfig.toolDescriptionRecommendationConfig.toolDescription.toolDescriptionText.tools; + expect(tools).toHaveLength(2); + expect(tools[0].toolName).toBe('search'); + expect(tools[0].toolDescription.text).toBe('Search the web for info'); + expect(tools[1].toolName).toBe('calculate'); + expect(tools[1].toolDescription.text).toBe('Perform math calculations'); + }); + + it('catches and returns errors from startRecommendation', async () => { + mockStartRecommendation.mockRejectedValue(new Error('API timeout')); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('API timeout'); + }); + + it('retries transient poll failures and succeeds', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-retry-ok', + recommendationArn: 'arn:rec-retry-ok', + name: 'test-rec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }); + + // First poll fails, second succeeds + mockGetRecommendation.mockRejectedValueOnce(new Error('fetch failed')).mockResolvedValueOnce({ + recommendationId: 'rec-retry-ok', + status: 'COMPLETED', + recommendationResult: { + systemPromptRecommendationResult: { recommendedSystemPrompt: 'Better prompt' }, + }, + }); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + expect(result.success).toBe(true); + expect(result.recommendationId).toBe('rec-retry-ok'); + expect(mockGetRecommendation).toHaveBeenCalledTimes(2); + }); + + it('fails after max consecutive poll retries', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-retry-fail', + recommendationArn: 'arn:rec-retry-fail', + name: 'test-rec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }); + + mockGetRecommendation.mockRejectedValue(new Error('fetch failed')); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('consecutive errors'); + expect(result.error).toContain('fetch failed'); + expect(result.error).toContain('rec-retry-fail'); + expect(mockGetRecommendation).toHaveBeenCalledTimes(3); + }); + + it('times out after max poll duration', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-timeout', + recommendationArn: 'arn:rec-timeout', + name: 'test-rec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-timeout', + status: 'IN_PROGRESS', + }); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + maxPollDurationMs: 0, // Immediately timeout + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('Polling timed out'); + expect(result.error).toContain('rec-timeout'); + }); + + it('reads system prompt from file when inputSource is file', async () => { + mockReadFileSync.mockReturnValue('You are a healthcare assistant.'); + + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-file', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-file', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Helpfulness'], + inputSource: 'file', + promptFile: '/tmp/prompt.txt', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + expect(mockReadFileSync).toHaveBeenCalledWith('/tmp/prompt.txt', 'utf-8'); + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const systemPrompt = callArgs.recommendationConfig.systemPromptRecommendationConfig.systemPrompt; + expect(systemPrompt.text).toBe('You are a healthcare assistant.'); + }); + + it('uses inline sessionSpans from spans-file trace source', async () => { + const fakeSpans = [ + { traceId: 't1', spanId: 's1', body: {} }, + { traceId: 't1', spanId: 's2', body: {} }, + ]; + mockReadFileSync.mockReturnValue(JSON.stringify(fakeSpans)); + + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-spans', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-spans', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'spans-file', + spansFile: '/tmp/spans.json', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const traces = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces; + expect(traces.sessionSpans).toHaveLength(2); + expect(traces.cloudwatchLogs).toBeUndefined(); + }); + + it('wraps single span object in array for spans-file', async () => { + const singleSpan = { traceId: 't1', spanId: 's1', body: {} }; + mockReadFileSync.mockReturnValue(JSON.stringify(singleSpan)); + + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-single', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-single', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'spans-file', + spansFile: '/tmp/single.json', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const traces = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces; + expect(traces.sessionSpans).toHaveLength(1); + }); + + it('auto-fetches spans for tool-desc with sessions trace source', async () => { + mockFetchSessionSpans.mockResolvedValue({ + spans: [ + { traceId: 't1', spanId: 's1', body: {} }, + { traceId: 't1', spanId: 's2', body: {} }, + ], + spanRecordCount: 1, + logRecordCount: 1, + }); + + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-autofetch', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-autofetch', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'TOOL_DESCRIPTION_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + tools: ['add_numbers:Add two numbers together'], + traceSource: 'sessions', + sessionIds: ['session-abc'], + pollIntervalMs: 0, + }); + + expect(mockFetchSessionSpans).toHaveBeenCalledWith( + expect.objectContaining({ + region: 'us-east-1', + runtimeId: 'rt-abc123', + sessionId: 'session-abc', + }) + ); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const traces = callArgs.recommendationConfig.toolDescriptionRecommendationConfig.agentTraces; + expect(traces.sessionSpans).toHaveLength(2); + expect(traces.cloudwatchLogs).toBeUndefined(); + }); + + it('throws when auto-fetch returns zero spans', async () => { + mockFetchSessionSpans.mockResolvedValue({ + spans: [], + spanRecordCount: 0, + logRecordCount: 0, + }); + + const result = await runRecommendationCommand({ + type: 'TOOL_DESCRIPTION_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + tools: ['add_numbers:Add numbers'], + traceSource: 'sessions', + sessionIds: ['session-empty'], + pollIntervalMs: 0, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('No spans found'); + }); + + it('derives service name from runtimeId by stripping hash suffix', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-svc', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-svc', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const serviceNames = + callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces.cloudwatchLogs.serviceNames; + // runtimeId 'rt-abc123' → service name 'rt.DEFAULT' (strips '-abc123' suffix) + expect(serviceNames[0]).toBe('rt.DEFAULT'); + }); + + it('auto-fetches spans for system-prompt with sessions trace source', async () => { + mockFetchSessionSpans.mockResolvedValue({ spans: [{ sessionId: 'sess-1', spans: [] }] }); + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-sid', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-sid', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'sessions', + sessionIds: ['sess-1'], + pollIntervalMs: 0, + }); + + expect(mockFetchSessionSpans).toHaveBeenCalledWith(expect.objectContaining({ sessionId: 'sess-1' })); + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const traces = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces; + expect(traces.sessionSpans).toBeDefined(); + expect(traces.cloudwatchLogs).toBeUndefined(); + }); + + it('builds cloudwatch config with two log group ARNs', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-cw', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-cw', + status: 'COMPLETED', + recommendationResult: {}, + }); + + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + lookbackDays: 3, + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const cwConfig = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces.cloudwatchLogs; + expect(cwConfig.logGroupArns).toHaveLength(2); + expect(cwConfig.logGroupArns[0]).toContain('/aws/bedrock-agentcore/runtimes/rt-abc123-DEFAULT'); + expect(cwConfig.logGroupArns[1]).toContain('aws/spans'); + expect(cwConfig.startTime).toBeDefined(); + expect(cwConfig.endTime).toBeDefined(); + }); + + it('extracts failure details from statusReasons and result error fields', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-fail-detail', + recommendationArn: 'arn:rec-fail-detail', + name: 'test', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + requestId: 'start-req-id', + }); + + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-fail-detail', + status: 'FAILED', + requestId: 'poll-req-id', + statusReasons: ['Insufficient trace data'], + recommendationResult: { + systemPromptRecommendationResult: { + errorCode: 'INSUFFICIENT_DATA', + errorMessage: 'Not enough traces to generate recommendation', + }, + }, + }); + + const result = await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: ['Builtin.Toxicity'], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('Insufficient trace data'); + expect(result.error).toContain('INSUFFICIENT_DATA'); + expect(result.error).toContain('Not enough traces'); + // Request IDs are logged to file only, not included in the error message + }); + + it('passes full ARN evaluator as-is', async () => { + mockStartRecommendation.mockResolvedValue({ + recommendationId: 'rec-arn', + status: 'COMPLETED', + }); + mockGetRecommendation.mockResolvedValue({ + recommendationId: 'rec-arn', + status: 'COMPLETED', + recommendationResult: {}, + }); + + const fullArn = 'arn:aws:bedrock-agentcore:us-east-1:123456789012:evaluator/custom-eval'; + await runRecommendationCommand({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'MyAgent', + evaluators: [fullArn], + inputSource: 'inline', + inlineContent: 'test', + traceSource: 'cloudwatch', + pollIntervalMs: 0, + }); + + const callArgs = mockStartRecommendation.mock.calls[0]![0]; + const evaluators = callArgs.recommendationConfig.systemPromptRecommendationConfig.evaluationConfig.evaluators; + expect(evaluators[0].evaluatorArn).toBe(fullArn); + }); +}); diff --git a/src/cli/operations/recommendation/apply-to-bundle.ts b/src/cli/operations/recommendation/apply-to-bundle.ts new file mode 100644 index 000000000..bf9060d10 --- /dev/null +++ b/src/cli/operations/recommendation/apply-to-bundle.ts @@ -0,0 +1,140 @@ +/** + * Syncs local agentcore.json after the server applies a recommendation to a + * config bundle. + * + * When a recommendation uses a config bundle as input, the server automatically + * creates a new bundle version with the recommended changes applied. The + * recommendation result includes the new version's bundleArn and versionId. + * + * This module fetches that new version via GetConfigurationBundleVersion and + * updates the local agentcore.json components to match the server state. + */ +import { ConfigIO } from '../../../lib'; +import { getConfigurationBundleVersion } from '../../aws/agentcore-config-bundles'; +import type { RecommendationResult } from '../../aws/agentcore-recommendation'; + +export interface ApplyRecommendationOptions { + /** Config bundle name in agentcore.json (used by CLI) */ + bundleName?: string; + /** Config bundle ARN (used by TUI — resolved to name via deployed state) */ + bundleArn?: string; + /** The recommendation result from the API (contains new bundle version info) */ + result: RecommendationResult; + /** AWS region for fetching the new bundle version */ + region: string; +} + +export interface ApplyRecommendationResult { + success: boolean; + error?: string; + /** New version ID that was synced from the server */ + newVersionId?: string; +} + +/** + * Extract the bundleId from a bundle ARN. + * ARN format: arn:aws:bedrock-agentcore:{region}:{account}:configuration-bundle/{bundleId} + */ +function extractBundleIdFromArn(arn: string): string | undefined { + const match = /configuration-bundle\/(.+)$/.exec(arn); + return match?.[1]; +} + +/** + * Sync local agentcore.json after the server creates a new config bundle version + * from a recommendation. Fetches the new version and updates local components. + */ +export async function applyRecommendationToBundle( + options: ApplyRecommendationOptions, + configIO: ConfigIO = new ConfigIO() +): Promise { + const { result, region } = options; + + // Extract the new bundle version from the recommendation result + const resultBundle = + result.systemPromptRecommendationResult?.configurationBundle ?? + result.toolDescriptionRecommendationResult?.configurationBundle; + + if (!resultBundle) { + return { + success: false, + error: + 'Recommendation result does not contain a new config bundle version. The server may not have applied the recommendation to the bundle.', + }; + } + + const bundleId = extractBundleIdFromArn(resultBundle.bundleArn); + if (!bundleId) { + return { + success: false, + error: `Could not extract bundle ID from ARN: ${resultBundle.bundleArn}`, + }; + } + + // Fetch the new version from the server + const newVersion = await getConfigurationBundleVersion({ + region, + bundleId, + versionId: resultBundle.versionId, + }); + + // Read current project spec and deployed state + const [spec, deployedState] = await Promise.all([configIO.readProjectSpec(), configIO.readDeployedState()]); + + // Find the target bundle by name or by matching ARN in deployed state + let bundleName: string | undefined; + if (options.bundleName) { + bundleName = options.bundleName; + } else if (options.bundleArn) { + // TUI stores the ARN — resolve to bundle name via deployed state + for (const targetName of Object.keys(deployedState.targets ?? {})) { + const target = deployedState.targets?.[targetName]; + const bundles = target?.resources?.configBundles; + if (bundles) { + for (const [name, state] of Object.entries(bundles)) { + if (state.bundleArn === options.bundleArn) { + bundleName = name; + break; + } + } + } + if (bundleName) break; + } + } + + const identifier = bundleName ?? options.bundleArn ?? 'unknown'; + const bundle = bundleName ? spec.configBundles?.find(cb => cb.name === bundleName) : undefined; + if (!bundle) { + return { + success: false, + error: `Config bundle "${identifier}" not found in agentcore.json.`, + }; + } + + // Update local bundle components to match the server's new version + bundle.components = newVersion.components as typeof bundle.components; + + // Update commit message from lineage metadata if available + if (newVersion.lineageMetadata?.commitMessage) { + bundle.commitMessage = newVersion.lineageMetadata.commitMessage; + } + + // Write updated spec + await configIO.writeProjectSpec(spec); + + // Update deployed state with the new version ID + for (const targetName of Object.keys(deployedState.targets ?? {})) { + const target = deployedState.targets?.[targetName]; + const bundleState = target?.resources?.configBundles?.[identifier]; + if (bundleState) { + bundleState.versionId = resultBundle.versionId; + break; + } + } + await configIO.writeDeployedState(deployedState); + + return { + success: true, + newVersionId: resultBundle.versionId, + }; +} diff --git a/src/cli/operations/recommendation/constants.ts b/src/cli/operations/recommendation/constants.ts new file mode 100644 index 000000000..c79647c44 --- /dev/null +++ b/src/cli/operations/recommendation/constants.ts @@ -0,0 +1,11 @@ +/** Polling interval in ms for checking recommendation status. */ +export const DEFAULT_POLL_INTERVAL_MS = 5000; + +/** Statuses that indicate a recommendation has reached a terminal state. */ +export const TERMINAL_STATUSES = new Set(['COMPLETED', 'SUCCEEDED', 'FAILED', 'DELETING']); + +/** Max retries for transient poll failures (network errors, 5xx). */ +export const MAX_POLL_RETRIES = 3; + +/** Max total polling duration in ms (30 minutes). */ +export const MAX_POLL_DURATION_MS = 30 * 60 * 1000; diff --git a/src/cli/operations/recommendation/fetch-session-spans.ts b/src/cli/operations/recommendation/fetch-session-spans.ts new file mode 100644 index 000000000..db5e63911 --- /dev/null +++ b/src/cli/operations/recommendation/fetch-session-spans.ts @@ -0,0 +1,158 @@ +/** + * Fetches OTEL span records and log records from CloudWatch for a given session, + * combining them into a SessionSpan[] suitable for inline `sessionSpans` in the + * Recommendation API. + * + * Tool description recommendations require inline sessionSpans (the server-side + * Lambda does NOT support `cloudwatchLogs` for this type). The OTEL mapper needs + * BOTH: + * - Span records from the `aws/spans` log group + * - Log records (with body.input/output.messages) from the runtime log group + * + * Without log records the mapper produces "zero trajectories". + */ +import type { SessionSpan } from '../../aws/agentcore-recommendation'; +import { searchLogs } from '../../aws/cloudwatch'; + +export interface FetchSessionSpansOptions { + /** AWS region */ + region: string; + /** Agent runtime ID, e.g. "myproject_MyAgent-QMd093Gl4O" */ + runtimeId: string; + /** Session ID to filter spans for */ + sessionId: string; + /** Lookback days (default 7) */ + lookbackDays?: number; + /** Progress callback */ + onProgress?: (message: string) => void; +} + +export interface FetchSessionSpansResult { + spans: SessionSpan[]; + spanRecordCount: number; + logRecordCount: number; +} + +/** The log group where OTEL span records are stored (no leading slash). */ +const SPANS_LOG_GROUP = 'aws/spans'; + +/** + * Fetch session spans from both CloudWatch log groups and combine them. + * + * 1. Fetches span records from `aws/spans` filtered by session.id + * 2. Fetches log records from the runtime log group filtered by body+input + * 3. Filters log records client-side by matching session.id + * 4. Returns combined array + */ +export async function fetchSessionSpans(options: FetchSessionSpansOptions): Promise { + const { region, runtimeId, sessionId, lookbackDays = 7, onProgress } = options; + + const runtimeLogGroup = `/aws/bedrock-agentcore/runtimes/${runtimeId}-DEFAULT`; + const endTimeMs = Date.now(); + const startTimeMs = endTimeMs - lookbackDays * 24 * 60 * 60 * 1000; + + // Fetch span records and log records in parallel + onProgress?.('Fetching span records from aws/spans...'); + const [spanRecords, logRecords] = await Promise.all([ + collectLogEvents({ + logGroupName: SPANS_LOG_GROUP, + region, + startTimeMs, + endTimeMs, + filterPattern: `"session.id" "${sessionId}"`, + }), + collectLogEvents({ + logGroupName: runtimeLogGroup, + region, + startTimeMs, + endTimeMs, + // Filter for log records that contain body with input messages + filterPattern: `"body" "input"`, + }), + ]); + + onProgress?.(`Found ${spanRecords.length} span records, ${logRecords.length} log record candidates`); + + // Parse span records — these are already OTEL spans with attributes.session.id + const spans: SessionSpan[] = []; + for (const event of spanRecords) { + try { + const parsed = JSON.parse(event.message) as SessionSpan; + spans.push(parsed); + } catch { + // Skip unparseable records + } + } + + // Parse and filter log records — keep only those matching our session + let logRecordCount = 0; + for (const event of logRecords) { + try { + const parsed = JSON.parse(event.message) as Record; + if (matchesSession(parsed, sessionId)) { + spans.push(parsed as unknown as SessionSpan); + logRecordCount++; + } + } catch { + // Skip unparseable records + } + } + + onProgress?.( + `Combined ${spans.length} spans (${spans.length - logRecordCount} span records + ${logRecordCount} log records)` + ); + + return { + spans, + spanRecordCount: spans.length - logRecordCount, + logRecordCount, + }; +} + +/** + * Check if a parsed log record matches the target session ID. + * Log records may have session.id in attributes or in the traceId/body context. + */ +function matchesSession(record: Record, sessionId: string): boolean { + // Check attributes.session.id (most common) + const attrs = record.attributes as Record | undefined; + if (attrs?.['session.id'] === sessionId) return true; + + // Check nested body for session references + const body = record.body as Record | undefined; + if (body) { + const bodyStr = JSON.stringify(body); + if (bodyStr.includes(sessionId)) return true; + } + + return false; +} + +/** + * Collect all log events from a CloudWatch log group into an array. + * Uses the existing searchLogs async generator. + */ +async function collectLogEvents(options: { + logGroupName: string; + region: string; + startTimeMs: number; + endTimeMs: number; + filterPattern: string; +}): Promise<{ timestamp: number; message: string }[]> { + const events: { timestamp: number; message: string }[] = []; + + try { + for await (const event of searchLogs(options)) { + events.push(event); + } + } catch (err) { + // Log group may not exist yet (e.g. no invocations) — return empty + const msg = err instanceof Error ? err.message : String(err); + if (msg.includes('ResourceNotFoundException') || msg.includes('does not exist')) { + return []; + } + throw err; + } + + return events; +} diff --git a/src/cli/operations/recommendation/index.ts b/src/cli/operations/recommendation/index.ts new file mode 100644 index 000000000..f60a1d798 --- /dev/null +++ b/src/cli/operations/recommendation/index.ts @@ -0,0 +1,18 @@ +export { applyRecommendationToBundle } from './apply-to-bundle'; +export type { ApplyRecommendationOptions, ApplyRecommendationResult } from './apply-to-bundle'; +export { fetchSessionSpans } from './fetch-session-spans'; +export type { FetchSessionSpansOptions, FetchSessionSpansResult } from './fetch-session-spans'; +export { runRecommendationCommand } from './run-recommendation'; +export type { + RunRecommendationCommandOptions, + RunRecommendationCommandResult, + RecommendationType, + RecommendationInputSourceKind, + TraceSourceKind, +} from './types'; +export { + saveRecommendationRun, + loadRecommendationRun, + listAllRecommendations, + type RecommendationRunRecord, +} from './recommendation-storage'; diff --git a/src/cli/operations/recommendation/recommendation-storage.ts b/src/cli/operations/recommendation/recommendation-storage.ts new file mode 100644 index 000000000..e60846574 --- /dev/null +++ b/src/cli/operations/recommendation/recommendation-storage.ts @@ -0,0 +1,84 @@ +import { findConfigRoot } from '../../../lib'; +import type { RecommendationResult, RecommendationType } from '../../aws/agentcore-recommendation'; +import type { RunRecommendationCommandResult } from './types'; +import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'fs'; +import { join } from 'path'; + +const RECOMMENDATIONS_DIR = 'recommendations'; + +export interface RecommendationRunRecord { + recommendationId: string; + type: RecommendationType; + agent: string; + evaluators: string[]; + status: string; + startedAt?: string; + completedAt?: string; + result?: RecommendationResult; +} + +function getRecommendationResultsDir(): string { + const configRoot = findConfigRoot(); + if (!configRoot) { + throw new Error('No agentcore project found. Run `agentcore create` first.'); + } + return join(configRoot, '.cli', RECOMMENDATIONS_DIR); +} + +export function saveRecommendationRun( + recommendationId: string, + result: RunRecommendationCommandResult, + type: RecommendationType, + agent: string, + evaluators: string[] +): string { + const dir = getRecommendationResultsDir(); + mkdirSync(dir, { recursive: true }); + + const filePath = join(dir, `${recommendationId}.json`); + + const record: RecommendationRunRecord = { + recommendationId, + type, + agent, + evaluators, + status: result.status ?? 'unknown', + startedAt: result.startedAt, + completedAt: result.completedAt, + result: result.result, + }; + + writeFileSync(filePath, JSON.stringify(record, null, 2)); + return filePath; +} + +export function loadRecommendationRun(recommendationId: string): RecommendationRunRecord { + const dir = getRecommendationResultsDir(); + const jsonName = recommendationId.endsWith('.json') ? recommendationId : `${recommendationId}.json`; + const filePath = join(dir, jsonName); + + if (!existsSync(filePath)) { + throw new Error(`Recommendation "${recommendationId}" not found at ${filePath}`); + } + + return JSON.parse(readFileSync(filePath, 'utf-8')) as RecommendationRunRecord; +} + +export function listAllRecommendations(): RecommendationRunRecord[] { + const configRoot = findConfigRoot(); + if (!configRoot) { + throw new Error('No agentcore project found. Run `agentcore create` first.'); + } + + const dir = join(configRoot, '.cli', RECOMMENDATIONS_DIR); + if (!existsSync(dir)) { + return []; + } + + const files = readdirSync(dir) + .filter(f => f.endsWith('.json')) + .sort() + .reverse(); + + return files.map(f => JSON.parse(readFileSync(join(dir, f), 'utf-8')) as RecommendationRunRecord); +} diff --git a/src/cli/operations/recommendation/run-recommendation.ts b/src/cli/operations/recommendation/run-recommendation.ts new file mode 100644 index 000000000..0423cfe32 --- /dev/null +++ b/src/cli/operations/recommendation/run-recommendation.ts @@ -0,0 +1,610 @@ +/** + * Orchestrates running a Recommendation: + * 1. Resolve agent and evaluator from project + * 2. Build recommendationConfig from CLI inputs + * 3. Call StartRecommendation (creates resource, returns 202) + * 4. Poll GetRecommendation until terminal status + * 5. Return result with optimized artifact + */ +import { ConfigIO } from '../../../lib'; +import type { DeployedState } from '../../../schema'; +import type { + RecommendationConfig, + RecommendationResult, + RecommendationType, + SessionSpan, +} from '../../aws/agentcore-recommendation'; +import { getRecommendation, startRecommendation } from '../../aws/agentcore-recommendation'; +import { arnPrefix } from '../../aws/partition'; +import { detectRegion } from '../../aws/region'; +import { ExecLogger } from '../../logging/exec-logger'; +import { DEFAULT_POLL_INTERVAL_MS, MAX_POLL_DURATION_MS, MAX_POLL_RETRIES, TERMINAL_STATUSES } from './constants'; +import { fetchSessionSpans } from './fetch-session-spans'; +import type { RunRecommendationCommandOptions, RunRecommendationCommandResult } from './types'; +import { readFileSync } from 'fs'; + +export async function runRecommendationCommand( + options: RunRecommendationCommandOptions +): Promise { + const { pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, onProgress } = options; + let logger: ExecLogger | undefined; + try { + logger = new ExecLogger({ command: 'recommend' }); + } catch { + // Logger creation can fail in tests or when no project root exists — non-fatal + } + + try { + logger?.startStep('Load project config'); + // 1. Read project config and deployed state + const configIO = new ConfigIO(); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + + const targetRegion = awsTargets.length > 0 ? awsTargets[0]!.region : undefined; + const { region: detectedRegion } = await detectRegion(); + const region = options.region ?? targetRegion ?? detectedRegion; + const stage = process.env.AGENTCORE_STAGE?.toLowerCase() ?? 'prod'; + logger?.log(`Region: ${region}, Stage: ${stage}`); + logger?.endStep('success'); + + // 2. Resolve agent from deployed state (needed for log group ARNs) + logger?.startStep('Resolve agent and evaluators'); + const agentState = resolveAgentState(deployedState, options.agent); + if (!agentState) { + logger?.log(`Agent "${options.agent}" not found in deployed state`, 'error'); + logger?.endStep('error', `Agent "${options.agent}" not deployed`); + logger?.finalize(false); + return { + success: false, + error: `Agent "${options.agent}" not deployed. Run \`agentcore deploy\` first.`, + logFilePath: logger?.logFilePath, + }; + } + logger?.log(`Agent: ${options.agent} (runtime: ${agentState.runtimeId})`); + + // 3. Resolve evaluator ID/ARN (API accepts exactly one for system-prompt, none for tool-desc) + const evaluatorIds: string[] = []; + for (const evaluator of options.evaluators) { + const evaluatorId = resolveEvaluatorId(deployedState, evaluator, region); + if (!evaluatorId) { + return { + success: false, + error: `Evaluator "${evaluator}" not found in deployed state. Use a Builtin.* name, a full ARN, or deploy a custom evaluator first.`, + logFilePath: logger?.logFilePath, + }; + } + evaluatorIds.push(evaluatorId); + } + if (options.type === 'SYSTEM_PROMPT_RECOMMENDATION' && evaluatorIds.length !== 1) { + return { + success: false, + error: 'System prompt recommendations require exactly one evaluator.', + logFilePath: logger?.logFilePath, + }; + } + logger?.log(`Evaluators: ${evaluatorIds.join(', ') || '(none)'}`); + logger?.endStep('success'); + + // 4. Read input content (if from file) + let inlineContent: string | undefined; + if (options.inputSource === 'file' && options.promptFile) { + inlineContent = readFileSync(options.promptFile, 'utf-8'); + } else if (options.inputSource === 'inline') { + inlineContent = options.inlineContent; + } + + // Validate that system prompt content is non-empty (API rejects empty text) + if ( + options.type === 'SYSTEM_PROMPT_RECOMMENDATION' && + options.inputSource !== 'config-bundle' && + !inlineContent?.trim() + ) { + return { + success: false, + error: 'System prompt content is required. Provide via --inline, --prompt-file, or --bundle-name.', + logFilePath: logger?.logFilePath, + }; + } + + // 5. Extract account ID from agent runtime ARN + const accountId = extractAccountIdFromArn(agentState.runtimeArn); + + // 5b. Resolve config bundle ARN from deployed state (if using config bundle) + let bundleArn: string | undefined; + if (options.inputSource === 'config-bundle' && options.bundleName) { + if (options.bundleName.startsWith('arn:')) { + // Already an ARN (e.g. from TUI which stores the ARN directly) + bundleArn = options.bundleName; + } else { + // Human-readable name (e.g. from CLI --bundle-name flag) — resolve from deployed state + for (const targetName of Object.keys(deployedState.targets ?? {})) { + const target = deployedState.targets?.[targetName]; + const bundle = target?.resources?.configBundles?.[options.bundleName]; + if (bundle?.bundleArn) { + bundleArn = bundle.bundleArn; + break; + } + } + if (!bundleArn) { + return { + success: false, + error: `Config bundle "${options.bundleName}" not found in deployed state. Run \`agentcore deploy\` first.`, + logFilePath: logger?.logFilePath, + }; + } + } + logger?.log(`Resolved bundle ARN: ${bundleArn}`); + } + + // 5c. Resolve short-form systemPromptJsonPath (e.g. "systemPrompt") to full JSONPath + let resolvedSystemPromptJsonPath = options.systemPromptJsonPath; + if ( + options.inputSource === 'config-bundle' && + options.bundleName && + resolvedSystemPromptJsonPath && + !resolvedSystemPromptJsonPath.startsWith('$') + ) { + // User provided a short field name like "systemPrompt" — resolve from agentcore.json + const bundleName = options.bundleName.startsWith('arn:') + ? // Find bundle name from ARN by matching deployed state + Object.values(deployedState.targets) + .flatMap(t => Object.entries(t.resources?.configBundles ?? {})) + .find(([, b]) => b.bundleArn === options.bundleName)?.[0] + : options.bundleName; + + if (bundleName) { + const projBundle = projectSpec.configBundles?.find(b => b.name === bundleName); + if (projBundle?.components) { + const subPath = resolvedSystemPromptJsonPath; + // Use the first component key, resolved to a real ARN + const firstComponentKey = Object.keys(projBundle.components)[0]; + if (firstComponentKey) { + const resolvedKey = resolveComponentKeyForJsonPath(firstComponentKey, deployedState); + resolvedSystemPromptJsonPath = `$.${resolvedKey}.configuration.${subPath}`; + logger?.log(`Resolved short JSONPath "${subPath}" → "${resolvedSystemPromptJsonPath}"`); + } + } + } + } + + // 6. Build recommendationConfig based on type + const recommendationConfig = await buildRecommendationConfig({ + type: options.type, + inlineContent, + bundleArn, + bundleVersion: options.bundleVersion, + systemPromptJsonPath: resolvedSystemPromptJsonPath, + toolDescJsonPaths: options.toolDescJsonPaths, + inputSource: options.inputSource, + tools: options.tools, + traceSource: options.traceSource, + lookbackDays: options.lookbackDays, + sessionIds: options.sessionIds, + spansFile: options.spansFile, + runtimeId: agentState.runtimeId, + accountId, + region, + evaluatorIds, + onProgress, + logger, + }); + + // 7. Start the recommendation + logger?.startStep('Start recommendation'); + const recommendationName = options.recommendationName ?? `${projectSpec.name}_${options.agent}_${Date.now()}`; + onProgress?.('starting', `Starting recommendation "${recommendationName}"...`); + + const startPayload = { + region, + name: recommendationName, + type: options.type, + recommendationConfig, + }; + logger?.log(`Request payload:\n${JSON.stringify(startPayload, null, 2)}`); + + const startResult = await startRecommendation(startPayload); + + logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); + logger?.endStep('success'); + onProgress?.('started', `Recommendation created: ${startResult.recommendationId} (status: ${startResult.status})`); + options.onStarted?.({ recommendationId: startResult.recommendationId, region }); + + // 8. Poll GetRecommendation until terminal status + logger?.startStep('Poll for completion'); + const maxDurationMs = options.maxPollDurationMs ?? MAX_POLL_DURATION_MS; + const pollStartTime = Date.now(); + let currentStatus = startResult.status; + let consecutiveFailures = 0; + + while (!TERMINAL_STATUSES.has(currentStatus)) { + await sleep(pollIntervalMs); + + // Check max poll duration + if (Date.now() - pollStartTime > maxDurationMs) { + logger?.log(`Max poll duration (${maxDurationMs}ms) exceeded`, 'error'); + logger?.endStep('error', 'Poll timeout'); + logger?.finalize(false); + return { + success: false, + error: `Polling timed out after ${Math.round(maxDurationMs / 60000)} minutes. The recommendation may still be running server-side.\nRecommendation ID: ${startResult.recommendationId}`, + recommendationId: startResult.recommendationId, + status: currentStatus, + logFilePath: logger?.logFilePath, + }; + } + + // Poll with retry for transient failures + let pollResult; + try { + pollResult = await getRecommendation({ + region, + recommendationId: startResult.recommendationId, + }); + consecutiveFailures = 0; + } catch (pollErr) { + consecutiveFailures++; + const pollErrMsg = pollErr instanceof Error ? pollErr.message : String(pollErr); + logger?.log(`Poll attempt failed (${consecutiveFailures}/${MAX_POLL_RETRIES}): ${pollErrMsg}`, 'error'); + + if (consecutiveFailures >= MAX_POLL_RETRIES) { + logger?.endStep('error', `${MAX_POLL_RETRIES} consecutive poll failures`); + logger?.finalize(false); + return { + success: false, + error: `Polling failed after ${MAX_POLL_RETRIES} consecutive errors: ${pollErrMsg}\nThe recommendation may still be running server-side.\nRecommendation ID: ${startResult.recommendationId}`, + recommendationId: startResult.recommendationId, + status: currentStatus, + logFilePath: logger?.logFilePath, + }; + } + onProgress?.('polling', `Poll error, retrying (${consecutiveFailures}/${MAX_POLL_RETRIES})...`); + continue; + } + + currentStatus = pollResult.status; + onProgress?.('polling', `Status: ${currentStatus}`); + + if (TERMINAL_STATUSES.has(currentStatus)) { + if (currentStatus === 'COMPLETED' || currentStatus === 'SUCCEEDED') { + logger?.log(`Completed. Result:\n${JSON.stringify(pollResult.recommendationResult, null, 2)}`); + logger?.endStep('success'); + logger?.finalize(true); + return { + success: true, + recommendationId: startResult.recommendationId, + status: currentStatus, + result: pollResult.recommendationResult, + region, + startedAt: pollResult.createdAt, + completedAt: pollResult.completedAt, + logFilePath: logger?.logFilePath, + }; + } + + // Extract error details from the FAILED response + const failureDetails = extractFailureDetails(pollResult); + logger?.log(`Terminal status: ${currentStatus}`, 'error'); + logger?.log(`Full poll response:\n${JSON.stringify(pollResult, null, 2)}`, 'error'); + if (failureDetails) logger?.log(`Failure details: ${failureDetails}`, 'error'); + logger?.endStep('error', `Status: ${currentStatus}`); + logger?.finalize(false); + // Log request IDs for debugging (only in log file, not shown in TUI) + const requestIds = [ + startResult.requestId ? `Start: ${startResult.requestId}` : '', + pollResult.requestId ? `Poll: ${pollResult.requestId}` : '', + ] + .filter(Boolean) + .join(', '); + if (requestIds) logger?.log(`Request IDs: ${requestIds}`, 'error'); + + return { + success: false, + error: failureDetails + ? `Recommendation failed: ${failureDetails}` + : `Recommendation finished with status: ${currentStatus}`, + recommendationId: startResult.recommendationId, + status: currentStatus, + logFilePath: logger?.logFilePath, + }; + } + } + + // Should not reach here, but handle gracefully + logger?.log(`Unexpected terminal status: ${currentStatus}`, 'error'); + logger?.endStep('error', `Unexpected status: ${currentStatus}`); + logger?.finalize(false); + return { + success: false, + error: `Recommendation ended with unexpected status: ${currentStatus}`, + recommendationId: startResult.recommendationId, + status: currentStatus, + logFilePath: logger?.logFilePath, + }; + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + logger?.log(`Error: ${errorMsg}`, 'error'); + logger?.endStep('error', errorMsg); + logger?.finalize(false); + return { + success: false, + error: errorMsg, + logFilePath: logger?.logFilePath, + }; + } +} + +// ============================================================================ +// Helpers +// ============================================================================ + +function resolveAgentState( + deployedState: DeployedState, + agentName: string +): { runtimeId: string; runtimeArn: string } | undefined { + for (const target of Object.values(deployedState.targets)) { + const agent = target.resources?.runtimes?.[agentName]; + if (agent) return agent; + } + return undefined; +} + +/** + * Resolve an evaluator name to a full ARN. + * Returns undefined if the evaluator cannot be resolved. + */ +function resolveEvaluatorId(deployedState: DeployedState, evaluator: string, region: string): string | undefined { + // Already a full ARN — use as-is + if (evaluator.startsWith('arn:')) { + return evaluator; + } + // Builtin shorthand → expand to full ARN + if (evaluator.startsWith('Builtin.')) { + return `${arnPrefix(region)}:bedrock-agentcore:::evaluator/${evaluator}`; + } + // Look up custom evaluator from deployed state + for (const target of Object.values(deployedState.targets)) { + const evalState = target.resources?.evaluators?.[evaluator]; + if (evalState) return evalState.evaluatorArn; + } + return undefined; +} + +/** + * Extract the 12-digit AWS account ID from an ARN. + * Falls back to '*' if the ARN format is unexpected. + */ +function extractAccountIdFromArn(arn: string): string { + const parts = arn.split(':'); + return parts[4] && /^\d{12}$/.test(parts[4]) ? parts[4] : '*'; +} + +interface BuildConfigOptions { + type: RecommendationType; + inlineContent?: string; + bundleArn?: string; + bundleVersion?: string; + systemPromptJsonPath?: string; + toolDescJsonPaths?: { toolName: string; toolDescriptionJsonPath: string }[]; + inputSource: string; + tools?: string[]; + traceSource: string; + lookbackDays?: number; + sessionIds?: string[]; + spansFile?: string; + runtimeId: string; + accountId: string; + region: string; + evaluatorIds: string[]; + onProgress?: (status: string, message: string) => void; + logger?: ExecLogger; +} + +async function buildRecommendationConfig(opts: BuildConfigOptions): Promise { + // Build agent traces — either from a spans file (inline session spans) or CloudWatch + let agentTraces; + + if (opts.traceSource === 'spans-file' && opts.spansFile) { + // Explicit spans file — read and use as inline sessionSpans + const spansContent = readFileSync(opts.spansFile, 'utf-8'); + const sessionSpans = JSON.parse(spansContent) as SessionSpan | SessionSpan[]; + agentTraces = { + sessionSpans: Array.isArray(sessionSpans) ? sessionSpans : [sessionSpans], + }; + } else if (opts.traceSource === 'sessions' && opts.sessionIds && opts.sessionIds.length > 0) { + // Session IDs selected — auto-fetch from both log groups and use inline sessionSpans. + // The CloudWatch trace config does not support filtering by multiple session IDs, + // so we fetch spans client-side and send them inline. + opts.onProgress?.('fetching-spans', 'Fetching session spans from CloudWatch...'); + opts.logger?.log( + 'Auto-fetching spans for selected sessions (CloudWatch config does not support session ID filtering)' + ); + + const allSpans = []; + for (const sessionId of opts.sessionIds) { + const result = await fetchSessionSpans({ + region: opts.region, + runtimeId: opts.runtimeId, + sessionId, + lookbackDays: opts.lookbackDays ?? 7, + onProgress: msg => { + opts.logger?.log(msg); + opts.onProgress?.('fetching-spans', msg); + }, + }); + allSpans.push(...result.spans); + } + + if (allSpans.length === 0) { + throw new Error( + 'No spans found for the specified session(s). Ensure the agent has been invoked and traces have propagated to CloudWatch (may take 5-10 minutes).' + ); + } + + opts.logger?.log(`Total spans fetched: ${allSpans.length}`); + opts.onProgress?.('fetching-spans', `Fetched ${allSpans.length} spans`); + agentTraces = { sessionSpans: allSpans }; + } else { + // Lookback-based path — use cloudwatchLogs with time range + const runtimeLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:/aws/bedrock-agentcore/runtimes/${opts.runtimeId}-DEFAULT`; + const spansLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:aws/spans`; + + // Derive service name: strip the random hash suffix from runtimeId + // runtimeId format: {project}_{agent}-{hash} → serviceName: {project}_{agent}.DEFAULT + const serviceName = opts.runtimeId.replace(/-[^-]+$/, '.DEFAULT'); + + const lookbackDays = opts.lookbackDays ?? 7; + agentTraces = { + cloudwatchLogs: { + logGroupArns: [runtimeLogGroupArn, spansLogGroupArn], + serviceNames: [serviceName], + startTime: new Date(Date.now() - lookbackDays * 24 * 60 * 60 * 1000).toISOString(), + endTime: new Date().toISOString(), + }, + }; + } + + const evaluationConfig: import('../../aws/agentcore-recommendation').RecommendationEvaluationConfig = { + evaluators: [{ evaluatorArn: opts.evaluatorIds[0]! }], + }; + + // Validate required fields for config-bundle source (API requires all three) + if (opts.inputSource === 'config-bundle' && opts.bundleArn && !opts.bundleVersion) { + throw new Error('Config bundle version is required. Provide --bundle-version or deploy the bundle first.'); + } + + if (opts.inputSource === 'config-bundle' && opts.bundleArn) { + if (opts.type === 'SYSTEM_PROMPT_RECOMMENDATION' && !opts.systemPromptJsonPath) { + throw new Error( + 'Config bundle requires --system-prompt-json-path to locate the system prompt field.\n' + + "Use the field name (e.g. --system-prompt-json-path 'systemPrompt') and it will be resolved from agentcore.json.\n" + + "Or provide the full JSONPath (e.g. '$.ARN.configuration.systemPrompt')." + ); + } + if (opts.type === 'TOOL_DESCRIPTION_RECOMMENDATION' && !opts.toolDescJsonPaths?.length) { + throw new Error( + 'Config bundle requires --tool-desc-json-path to locate tool description fields.\n' + + "Example: --tool-desc-json-path 'toolName:$.ARN.configuration.toolDescription'" + ); + } + } + + if (opts.type === 'SYSTEM_PROMPT_RECOMMENDATION') { + return { + systemPromptRecommendationConfig: { + systemPrompt: + opts.inputSource === 'config-bundle' && opts.bundleArn + ? { + configurationBundle: { + bundleArn: opts.bundleArn, + versionId: opts.bundleVersion!, + systemPromptJsonPath: opts.systemPromptJsonPath, + }, + } + : { text: opts.inlineContent ?? '' }, + agentTraces, + evaluationConfig, + }, + }; + } + + // TOOL_DESCRIPTION_RECOMMENDATION + if (opts.inputSource === 'config-bundle' && opts.bundleArn && opts.toolDescJsonPaths?.length) { + // Config bundle source — pass bundle reference with JSON paths for server-side resolution + return { + toolDescriptionRecommendationConfig: { + toolDescription: { + configurationBundle: { + bundleArn: opts.bundleArn, + versionId: opts.bundleVersion!, + tools: opts.toolDescJsonPaths, + }, + }, + agentTraces, + }, + }; + } + + // Inline/file source — parse "toolName:description" pairs from tools array + const toolEntries = (opts.tools ?? []).map(t => { + const colonIdx = t.indexOf(':'); + if (colonIdx > 0) { + return { toolName: t.slice(0, colonIdx), toolDescription: { text: t.slice(colonIdx + 1) } }; + } + return { toolName: t, toolDescription: { text: opts.inlineContent ?? '' } }; + }); + + return { + toolDescriptionRecommendationConfig: { + toolDescription: { + toolDescriptionText: { + tools: toolEntries, + }, + }, + agentTraces, + }, + }; +} + +/** + * Extract error details from a FAILED recommendation response. + * The API populates errorCode/errorMessage in the result, and statusReasons at top level. + */ +function extractFailureDetails(pollResult: { + statusReasons?: string[]; + recommendationResult?: RecommendationResult; +}): string | undefined { + const parts: string[] = []; + + if (pollResult.statusReasons?.length) { + parts.push(pollResult.statusReasons.join('; ')); + } + + const result = pollResult.recommendationResult; + if (result) { + const errorSource = result.systemPromptRecommendationResult ?? result.toolDescriptionRecommendationResult; + if (errorSource) { + if (errorSource.errorCode) parts.push(`[${errorSource.errorCode}]`); + if (errorSource.errorMessage) parts.push(errorSource.errorMessage); + } + } + + return parts.length > 0 ? parts.join(' ') : undefined; +} + +/** + * Resolve a component key (which may be a placeholder like {{runtime:name}}) + * to its real ARN from deployed state. Returns the key unchanged if not a placeholder. + */ +function resolveComponentKeyForJsonPath(key: string, deployedState: DeployedState): string { + if (key.startsWith('arn:')) return key; + + const rtMatch = /^\{\{runtime:(.+)\}\}$/.exec(key); + if (rtMatch) { + const rtName = rtMatch[1]!; + for (const target of Object.values(deployedState.targets)) { + const rt = target.resources?.runtimes?.[rtName]; + if (rt) return rt.runtimeArn; + } + } + + const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(key); + if (gwMatch) { + const gwName = gwMatch[1]!; + for (const target of Object.values(deployedState.targets)) { + const httpGw = target.resources?.httpGateways?.[gwName]; + if (httpGw) return httpGw.gatewayArn; + const mcpGw = target.resources?.mcp?.gateways?.[gwName]; + if (mcpGw) return mcpGw.gatewayArn; + } + } + + return key; +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/src/cli/operations/recommendation/types.ts b/src/cli/operations/recommendation/types.ts new file mode 100644 index 000000000..426ba84a8 --- /dev/null +++ b/src/cli/operations/recommendation/types.ts @@ -0,0 +1,72 @@ +/** + * Shared types for the recommendation feature. + */ +import type { RecommendationResult, RecommendationType } from '../../aws/agentcore-recommendation'; + +export type { RecommendationType } from '../../aws/agentcore-recommendation'; + +/** CLI-facing input source kind (maps to API config shape). */ +export type RecommendationInputSourceKind = 'config-bundle' | 'inline' | 'file'; + +/** CLI-facing trace source kind (maps to API agentTraces shape). */ +export type TraceSourceKind = 'cloudwatch' | 'sessions' | 'spans-file'; + +export interface RunRecommendationCommandOptions { + /** What to optimize */ + type: RecommendationType; + /** Agent name (from project) */ + agent: string; + /** Evaluator name, Builtin.* ID, or ARN (API accepts exactly one for system-prompt) */ + evaluators: string[]; + /** Input source kind */ + inputSource: RecommendationInputSourceKind; + /** Config bundle name (when inputSource is 'config-bundle') */ + bundleName?: string; + /** Config bundle version (when inputSource is 'config-bundle') */ + bundleVersion?: string; + /** JSONPath to the system prompt field within the config bundle (when inputSource is 'config-bundle') */ + systemPromptJsonPath?: string; + /** Tool name → JSONPath pairs for tool descriptions within the config bundle (when inputSource is 'config-bundle') */ + toolDescJsonPaths?: { toolName: string; toolDescriptionJsonPath: string }[]; + /** Inline content (when inputSource is 'inline') */ + inlineContent?: string; + /** File path (when inputSource is 'file') */ + promptFile?: string; + /** Specific tool names and descriptions (for TOOL_DESCRIPTION_RECOMMENDATION) */ + tools?: string[]; + /** Trace source kind */ + traceSource: TraceSourceKind; + /** Lookback days (when traceSource is 'cloudwatch') */ + lookbackDays?: number; + /** Session IDs (when traceSource is 'sessions') — used to filter CloudWatch traces */ + sessionIds?: string[]; + /** Path to JSON file containing session spans (when traceSource is 'spans-file') */ + spansFile?: string; + /** Region override */ + region?: string; + /** Optional recommendation name */ + recommendationName?: string; + /** Poll interval in ms */ + pollIntervalMs?: number; + /** Max polling duration in ms before timing out */ + maxPollDurationMs?: number; + /** Progress callback */ + onProgress?: (status: string, message: string) => void; + /** Called once the recommendation has been created, with ID and region for cancellation */ + onStarted?: (info: { recommendationId: string; region: string }) => void; +} + +export interface RunRecommendationCommandResult { + success: boolean; + error?: string; + recommendationId?: string; + status?: string; + /** The recommendation result from the API (populated on COMPLETED) */ + result?: RecommendationResult; + /** Resolved AWS region used for the recommendation */ + region?: string; + startedAt?: string; + completedAt?: string; + /** Path to the execution log file */ + logFilePath?: string; +} diff --git a/src/cli/primitives/ABTestPrimitive.ts b/src/cli/primitives/ABTestPrimitive.ts new file mode 100644 index 000000000..9dd973571 --- /dev/null +++ b/src/cli/primitives/ABTestPrimitive.ts @@ -0,0 +1,728 @@ +import { findConfigRoot } from '../../lib'; +import type { ABTest } from '../../schema/schemas/primitives/ab-test'; +import { ABTestSchema } from '../../schema/schemas/primitives/ab-test'; +import { getErrorMessage } from '../errors'; +import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types'; +import { requireTTY } from '../tui/guards/tty'; +import { BasePrimitive } from './BasePrimitive'; +import type { AddResult, AddScreenComponent, RemovableResource } from './types'; +import type { Command } from '@commander-js/extra-typings'; + +export type GatewayChoice = { type: 'create-new' } | { type: 'existing-http'; name: string }; + +export interface AddABTestOptions { + name: string; + description?: string; + agent: string; + gatewayChoice?: GatewayChoice; + roleArn?: string; + controlBundle: string; + controlVersion: string; + treatmentBundle: string; + treatmentVersion: string; + controlWeight: number; + treatmentWeight: number; + onlineEval: string; + trafficHeaderName?: string; + maxDurationDays?: number; + enableOnCreate?: boolean; +} + +export interface AddTargetBasedABTestOptions { + name: string; + description?: string; + gateway: string; + runtime: string; + roleArn?: string; + controlEndpoint: string; + treatmentEndpoint: string; + controlWeight: number; + treatmentWeight: number; + controlOnlineEval: string; + treatmentOnlineEval: string; + gatewayFilter?: string; + enableOnCreate?: boolean; +} + +export type RemovableABTest = RemovableResource; + +/** + * ABTestPrimitive handles all A/B test add/remove operations. + * + * A/B tests split traffic between two config bundle versions (control vs + * treatment) through a gateway, with online evaluation tracking performance. + * They are created via direct API calls (not CloudFormation) and stored in + * agentcore.json for lifecycle management. + */ +export class ABTestPrimitive extends BasePrimitive { + readonly kind = 'ab-test' as const; + readonly label = 'AB Test'; + override readonly article = 'an'; + readonly primitiveSchema = ABTestSchema; + + async add(options: AddABTestOptions): Promise> { + try { + const abTest = await this.createABTest(options); + return { success: true, abTestName: abTest.name }; + } catch (err) { + return { success: false, error: getErrorMessage(err) }; + } + } + + async remove(testName: string, options?: { deleteGateway?: boolean }): Promise { + try { + const project = await this.readProjectSpec(); + + const index = (project.abTests ?? []).findIndex(t => t.name === testName); + if (index === -1) { + return { success: false, error: `AB test "${testName}" not found.` }; + } + + const removedTest = project.abTests[index]!; + project.abTests.splice(index, 1); + + // Cascade: remove auto-created online eval configs for target-based tests + // Only remove eval configs that were auto-created (matching the {testName}_eval_ prefix pattern) + if (removedTest.mode === 'target-based' && 'perVariantOnlineEvaluationConfig' in removedTest.evaluationConfig) { + const autoCreatedPrefix = `${testName}_eval_`; + const evalNames = removedTest.evaluationConfig.perVariantOnlineEvaluationConfig + .map(pv => pv.onlineEvaluationConfigArn) + .filter(name => name.startsWith(autoCreatedPrefix)); + project.onlineEvalConfigs = project.onlineEvalConfigs.filter(c => !evalNames.includes(c.name)); + } + + // --delete-gateway: cascade remove gateway targets and orphaned gateways + if (options?.deleteGateway && removedTest.gatewayRef) { + const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(removedTest.gatewayRef); + if (gwMatch) { + const gwName = gwMatch[1]!; + + // Remove gateway targets that were created for this AB test's variants + if (removedTest.mode === 'target-based') { + const targetNames = removedTest.variants + .map(v => v.variantConfiguration.target?.targetName) + .filter((n): n is string => !!n); + const gw = (project.httpGateways ?? []).find(g => g.name === gwName); + if (gw?.targets) { + gw.targets = gw.targets.filter(t => !targetNames.includes(t.name)); + } + } + + // Remove gateway if no other AB tests reference it + const stillReferenced = (project.abTests ?? []).some(t => { + const m = /^\{\{gateway:(.+)\}\}$/.exec(t.gatewayRef); + return m?.[1] === gwName; + }); + if (!stillReferenced) { + project.httpGateways = (project.httpGateways ?? []).filter(gw => gw.name !== gwName); + } + } + } + + await this.writeProjectSpec(project); + + return { success: true }; + } catch (err) { + return { success: false, error: getErrorMessage(err) }; + } + } + + async previewRemove(testName: string): Promise { + const project = await this.readProjectSpec(); + + const abTest = (project.abTests ?? []).find(t => t.name === testName); + if (!abTest) { + throw new Error(`AB test "${testName}" not found.`); + } + + const summary: string[] = [`Removing AB test: ${testName}`]; + const schemaChanges: SchemaChange[] = []; + + const testIndex = (project.abTests ?? []).findIndex(t => t.name === testName); + const afterSpec = { + ...project, + abTests: (project.abTests ?? []).filter(t => t.name !== testName), + httpGateways: [...(project.httpGateways ?? [])], + }; + + // Check if the gateway would be orphaned + const test = (project.abTests ?? [])[testIndex]; + if (test?.gatewayRef) { + const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(test.gatewayRef); + if (gwMatch) { + const gwName = gwMatch[1]; + const otherTests = (project.abTests ?? []).filter((_, i) => i !== testIndex); + const stillReferenced = otherTests.some(t => { + const m = /^\{\{gateway:(.+)\}\}$/.exec(t.gatewayRef); + return m && m[1] === gwName; + }); + if (!stillReferenced) { + summary.push(`Also removing HTTP gateway: ${gwName} (no other AB tests reference it)`); + afterSpec.httpGateways = (project.httpGateways ?? []).filter(gw => gw.name !== gwName); + } + } + } + + schemaChanges.push({ + file: 'agentcore/agentcore.json', + before: project, + after: afterSpec, + }); + + return { summary, directoriesToDelete: [], schemaChanges }; + } + + async getRemovable(): Promise { + try { + const project = await this.readProjectSpec(); + return (project.abTests ?? []).map(t => ({ name: t.name })); + } catch { + return []; + } + } + + async getAllNames(): Promise { + try { + const project = await this.readProjectSpec(); + return (project.abTests ?? []).map(t => t.name); + } catch { + return []; + } + } + + registerCommands(addCmd: Command, removeCmd: Command): void { + const abTestCmd = addCmd + .command('ab-test') + .description('[preview] Add an A/B test to the project') + .option('--mode ', 'config-bundle (default) or target-based') + .option('--name ', 'AB test name') + .option('--description ', 'AB test description') + .option('--runtime ', 'Runtime agent to A/B test') + .option('--role-arn ', 'IAM role ARN (auto-created if not provided)') + .option('--control-bundle ', 'Control config bundle name or ARN') + .option('--control-version ', 'Control config bundle version') + .option('--treatment-bundle ', 'Treatment config bundle name or ARN') + .option('--treatment-version ', 'Treatment config bundle version') + .option('--control-endpoint ', 'Endpoint qualifier for control') + .option('--treatment-endpoint ', 'Endpoint qualifier for treatment') + .option('--control-weight ', 'Traffic weight for control (1-100)', parseInt) + .option('--treatment-weight ', 'Traffic weight for treatment (1-100)', parseInt) + .option('--gateway ', 'HTTP gateway name') + .option('--online-eval ', 'Online evaluation config name or ARN') + .option('--control-online-eval ', 'Eval config name or ARN for control') + .option('--treatment-online-eval ', 'Eval config name or ARN for treatment') + .option('--gateway-filter ', 'Path pattern for routing') + .option('--traffic-header ', 'Header name for traffic routing') + // Hidden deprecated aliases for backwards compatibility + .option('--control-qualifier ', '') + .option('--treatment-qualifier ', '') + // TODO(post-preview): Re-enable --max-duration once configurable duration is launched. + // .option('--max-duration ', 'Maximum duration in days (1-90)', parseInt) + .option('--enable', 'Enable the AB test on creation') + .option('--json', 'Output as JSON'); + + // Hide mode-specific and deprecated flags from the default options list. + // They are shown in the grouped help text below instead. + const hiddenFromDefaultHelp = new Set([ + '--runtime', + '--control-bundle', + '--control-version', + '--treatment-bundle', + '--treatment-version', + '--online-eval', + '--traffic-header', + '--control-endpoint', + '--treatment-endpoint', + '--control-online-eval', + '--treatment-online-eval', + '--gateway-filter', + '--control-qualifier', + '--treatment-qualifier', + ]); + for (const opt of abTestCmd.options) { + if (hiddenFromDefaultHelp.has(opt.long ?? '')) { + opt.hidden = true; + } + } + + // Add grouped help text after the default options section + abTestCmd.addHelpText( + 'after', + ` +Config-Bundle Mode (--mode config-bundle) -- default + Split traffic between two config bundle versions. + --runtime Runtime agent to A/B test + --control-bundle Control config bundle name or ARN + --control-version Control config bundle version + --treatment-bundle Treatment config bundle name or ARN + --treatment-version Treatment config bundle version + --online-eval Online evaluation config name or ARN + --traffic-header Header name for traffic routing + +Target-Based Mode (--mode target-based) + Route traffic to different runtime endpoints. + --control-endpoint Endpoint for control target + --treatment-endpoint Endpoint for treatment target + --control-online-eval Eval config name or ARN for control + --treatment-online-eval Eval config name or ARN for treatment + --gateway-filter Path pattern for routing +` + ); + + abTestCmd.action( + async (cliOptions: { + mode?: string; + name?: string; + description?: string; + runtime?: string; + gateway?: string; + roleArn?: string; + controlBundle?: string; + controlVersion?: string; + treatmentBundle?: string; + treatmentVersion?: string; + controlEndpoint?: string; + controlQualifier?: string; // deprecated alias for --control-endpoint + treatmentEndpoint?: string; + treatmentQualifier?: string; // deprecated alias for --treatment-endpoint + controlWeight?: number; + treatmentWeight?: number; + onlineEval?: string; + controlOnlineEval?: string; + treatmentOnlineEval?: string; + gatewayFilter?: string; + trafficHeader?: string; + maxDuration?: number; + enable?: boolean; + json?: boolean; + }) => { + try { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + // Resolve deprecated aliases (--control-qualifier -> --control-endpoint, etc.) + const resolvedControlEndpoint = cliOptions.controlEndpoint ?? cliOptions.controlQualifier; + const resolvedTreatmentEndpoint = cliOptions.treatmentEndpoint ?? cliOptions.treatmentQualifier; + + if (cliOptions.name || cliOptions.json) { + const fail = (error: string) => { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + console.error(error); + } + process.exit(1); + }; + + const mode = cliOptions.mode ?? 'config-bundle'; + if (mode !== 'config-bundle' && mode !== 'target-based') { + fail(`Invalid --mode "${mode}". Must be one of: config-bundle, target-based`); + } + + if (!cliOptions.name) fail('--name is required'); + + // Target-based mode + if (mode === 'target-based') { + // Cross-validation: reject config-bundle flags + if (cliOptions.controlBundle) fail('--control-bundle cannot be used with --mode target-based'); + if (cliOptions.treatmentBundle) fail('--treatment-bundle cannot be used with --mode target-based'); + if (cliOptions.controlVersion) fail('--control-version cannot be used with --mode target-based'); + if (cliOptions.treatmentVersion) fail('--treatment-version cannot be used with --mode target-based'); + if (cliOptions.onlineEval) fail('--online-eval cannot be used with --mode target-based'); + + // Required flags + if (!cliOptions.gateway) fail('--gateway is required for target-based mode'); + if (!cliOptions.runtime) fail('--runtime is required for target-based mode'); + if (!resolvedControlEndpoint) fail('--control-endpoint is required for target-based mode'); + if (!resolvedTreatmentEndpoint) fail('--treatment-endpoint is required for target-based mode'); + if (cliOptions.controlWeight === undefined) fail('--control-weight is required'); + if (cliOptions.treatmentWeight === undefined) fail('--treatment-weight is required'); + + // Eval: require both online eval config names + if (!cliOptions.controlOnlineEval || !cliOptions.treatmentOnlineEval) { + fail( + '--control-online-eval and --treatment-online-eval are required. Create eval configs first with: agentcore add online-eval --endpoint ' + ); + } + + const result = await this.addTargetBased({ + name: cliOptions.name!, + description: cliOptions.description, + gateway: cliOptions.gateway!, + runtime: cliOptions.runtime!, + roleArn: cliOptions.roleArn, + controlEndpoint: resolvedControlEndpoint!, + treatmentEndpoint: resolvedTreatmentEndpoint!, + controlWeight: cliOptions.controlWeight!, + treatmentWeight: cliOptions.treatmentWeight!, + controlOnlineEval: cliOptions.controlOnlineEval!, + treatmentOnlineEval: cliOptions.treatmentOnlineEval!, + gatewayFilter: cliOptions.gatewayFilter, + enableOnCreate: cliOptions.enable, + }); + + if (cliOptions.json) { + console.log(JSON.stringify(result)); + } else if (result.success) { + console.log(`Added target-based AB test '${result.abTestName}'`); + } else { + console.error(result.error); + } + process.exit(result.success ? 0 : 1); + return; + } + + // Config-bundle mode (default) + // Cross-validation: reject target-based flags + if (cliOptions.gatewayFilter) fail('--gateway-filter requires --mode target-based'); + if (cliOptions.controlOnlineEval) fail('--control-online-eval requires --mode target-based'); + if (cliOptions.treatmentOnlineEval) fail('--treatment-online-eval requires --mode target-based'); + + if (!cliOptions.gateway && !cliOptions.runtime) + fail('--runtime is required (unless --gateway is provided)'); + if (!cliOptions.controlBundle) fail('--control-bundle is required'); + if (!cliOptions.controlVersion) fail('--control-version is required'); + if (!cliOptions.treatmentBundle) fail('--treatment-bundle is required'); + if (!cliOptions.treatmentVersion) fail('--treatment-version is required'); + if (cliOptions.controlWeight === undefined) fail('--control-weight is required'); + if (cliOptions.treatmentWeight === undefined) fail('--treatment-weight is required'); + if (!cliOptions.onlineEval) fail('--online-eval is required'); + + const result = await this.add({ + name: cliOptions.name!, + description: cliOptions.description, + agent: cliOptions.runtime ?? '', + gatewayChoice: cliOptions.gateway + ? { type: 'existing-http', name: cliOptions.gateway } + : { type: 'create-new' }, + roleArn: cliOptions.roleArn!, + controlBundle: cliOptions.controlBundle!, + controlVersion: cliOptions.controlVersion!, + treatmentBundle: cliOptions.treatmentBundle!, + treatmentVersion: cliOptions.treatmentVersion!, + controlWeight: cliOptions.controlWeight!, + treatmentWeight: cliOptions.treatmentWeight!, + onlineEval: cliOptions.onlineEval!, + trafficHeaderName: cliOptions.trafficHeader, + maxDurationDays: cliOptions.maxDuration, + enableOnCreate: cliOptions.enable, + }); + + if (cliOptions.json) { + console.log(JSON.stringify(result)); + } else if (result.success) { + console.log(`Added AB test '${result.abTestName}'`); + } else { + console.error(result.error); + } + process.exit(result.success ? 0 : 1); + } else { + // TUI fallback + const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/add/AddFlow'), + ]); + const { clear, unmount } = render( + React.createElement(AddFlow, { + isInteractive: false, + initialResource: 'ab-test', + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + } + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(getErrorMessage(error)); + } + process.exit(1); + } + } + ); + + removeCmd + .command(this.kind) + .description(`Remove ${this.article} ${this.label.toLowerCase()} from the project`) + .option('--name ', 'Name of resource to remove [non-interactive]') + .option('-y, --yes', 'Skip confirmation prompt [non-interactive]') + .option('--json', 'Output as JSON [non-interactive]') + .option('--delete-gateway', 'Also remove gateway targets and orphaned gateways (default: false)') + .action(async (cliOptions: { name?: string; yes?: boolean; json?: boolean; deleteGateway?: boolean }) => { + try { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + if (cliOptions.name || cliOptions.yes || cliOptions.json) { + if (!cliOptions.name) { + console.log(JSON.stringify({ success: false, error: '--name is required' })); + process.exit(1); + } + + const result = await this.remove(cliOptions.name, { deleteGateway: cliOptions.deleteGateway }); + console.log( + JSON.stringify({ + success: result.success, + resourceType: this.kind, + resourceName: cliOptions.name, + message: result.success ? `Removed ${this.label.toLowerCase()} '${cliOptions.name}'` : undefined, + error: !result.success ? result.error : undefined, + }) + ); + process.exit(result.success ? 0 : 1); + } else { + // TUI fallback + requireTTY(); + const [{ render }, { default: React }, { RemoveFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/remove'), + ]); + const { clear, unmount } = render( + React.createElement(RemoveFlow, { + isInteractive: false, + force: cliOptions.yes, + initialResourceType: this.kind, + initialResourceName: cliOptions.name, + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + } + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(`Error: ${getErrorMessage(error)}`); + } + process.exit(1); + } + }); + } + + addScreen(): AddScreenComponent { + return null; + } + + private async createABTest(options: AddABTestOptions): Promise { + const project = await this.readProjectSpec(); + + this.checkDuplicate(project.abTests ?? [], options.name); + + // Resolve gateway reference based on the user's choice + let gatewayRef: string; + const choice = options.gatewayChoice ?? { type: 'create-new' }; + + if (choice.type === 'existing-http') { + // Reuse an existing HTTP gateway from the project spec + const existing = (project.httpGateways ?? []).find(gw => gw.name === choice.name); + if (!existing) { + throw new Error(`HTTP gateway "${choice.name}" not found in project.`); + } + gatewayRef = `{{gateway:${choice.name}}}`; + } else { + // Create new HTTP gateway — truncate name to fit 48-char limit + const httpGwName = `${options.name.replace(/_/g, '-').slice(0, 44)}-gw`; + const existingGw = (project.httpGateways ?? []).find(gw => gw.name === httpGwName); + if (existingGw) { + if (existingGw.runtimeRef !== options.agent) { + throw new Error( + `HTTP gateway "${httpGwName}" already exists with a different runtime (${existingGw.runtimeRef}). ` + + `Choose a different AB test name to avoid a gateway name collision.` + ); + } + } else { + project.httpGateways ??= []; + project.httpGateways.push({ + name: httpGwName, + runtimeRef: options.agent, + }); + } + gatewayRef = `{{gateway:${httpGwName}}}`; + } + + const abTest: ABTest = { + name: options.name, + mode: 'config-bundle', + ...(options.description && { description: options.description }), + gatewayRef, + ...(options.roleArn && { roleArn: options.roleArn }), + variants: [ + { + name: 'C', + weight: options.controlWeight, + variantConfiguration: { + configurationBundle: { + bundleArn: options.controlBundle, + bundleVersion: options.controlVersion, + }, + }, + }, + { + name: 'T1', + weight: options.treatmentWeight, + variantConfiguration: { + configurationBundle: { + bundleArn: options.treatmentBundle, + bundleVersion: options.treatmentVersion, + }, + }, + }, + ], + evaluationConfig: { + onlineEvaluationConfigArn: options.onlineEval, + }, + ...(options.trafficHeaderName && { + trafficAllocationConfig: { routeOnHeader: { headerName: options.trafficHeaderName } }, + }), + ...(options.maxDurationDays !== undefined && { maxDurationDays: options.maxDurationDays }), + ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }), + }; + + project.abTests ??= []; + project.abTests.push(abTest); + await this.writeProjectSpec(project); + + return abTest; + } + + async addTargetBased(options: AddTargetBasedABTestOptions): Promise> { + try { + const abTest = await this.createTargetBasedABTest(options); + return { success: true, abTestName: abTest.name }; + } catch (err) { + return { success: false, error: getErrorMessage(err) }; + } + } + + private async createTargetBasedABTest(options: AddTargetBasedABTestOptions): Promise { + const project = await this.readProjectSpec(); + + this.checkDuplicate(project.abTests ?? [], options.name); + + // Validate runtime exists + const runtime = project.runtimes.find(r => r.name === options.runtime); + if (!runtime) { + throw new Error(`Runtime "${options.runtime}" not found in project.`); + } + + // Validate endpoints exist on the runtime + if (!runtime.endpoints?.[options.controlEndpoint]) { + throw new Error( + `Endpoint "${options.controlEndpoint}" not found on runtime "${options.runtime}". Add it with: agentcore add runtime-endpoint` + ); + } + if (!runtime.endpoints?.[options.treatmentEndpoint]) { + throw new Error( + `Endpoint "${options.treatmentEndpoint}" not found on runtime "${options.runtime}". Add it with: agentcore add runtime-endpoint` + ); + } + + // Auto-generate target names from runtime + qualifier + const controlTarget = `${options.runtime}-${options.controlEndpoint}`; + const treatmentTarget = `${options.runtime}-${options.treatmentEndpoint}`; + + // Auto-create HTTP gateway if it doesn't exist + let existing = (project.httpGateways ?? []).find(gw => gw.name === options.gateway); + if (!existing) { + existing = { + name: options.gateway, + description: `HTTP gateway for AB test ${options.name}`, + runtimeRef: options.runtime, + targets: [ + { name: controlTarget, runtimeRef: options.runtime, qualifier: options.controlEndpoint }, + { name: treatmentTarget, runtimeRef: options.runtime, qualifier: options.treatmentEndpoint }, + ], + }; + project.httpGateways ??= []; + project.httpGateways.push(existing); + } else { + // Gateway exists — ensure targets exist + existing.targets ??= []; + if (!existing.targets.find(t => t.name === controlTarget)) { + existing.targets.push({ + name: controlTarget, + runtimeRef: options.runtime, + qualifier: options.controlEndpoint, + }); + } + if (!existing.targets.find(t => t.name === treatmentTarget)) { + existing.targets.push({ + name: treatmentTarget, + runtimeRef: options.runtime, + qualifier: options.treatmentEndpoint, + }); + } + } + const gatewayRef = `{{gateway:${options.gateway}}}`; + + // Look up online eval configs by name + const controlEvalConfig = project.onlineEvalConfigs.find(c => c.name === options.controlOnlineEval); + if (!controlEvalConfig) { + throw new Error( + `Online eval config '${options.controlOnlineEval}' not found. Create it first with: agentcore add online-eval` + ); + } + const treatmentEvalConfig = project.onlineEvalConfigs.find(c => c.name === options.treatmentOnlineEval); + if (!treatmentEvalConfig) { + throw new Error( + `Online eval config '${options.treatmentOnlineEval}' not found. Create it first with: agentcore add online-eval` + ); + } + + // Store eval names — post-deploy resolveOnlineEvalArn will resolve names to ARNs + const evaluationConfig: ABTest['evaluationConfig'] = { + perVariantOnlineEvaluationConfig: [ + { treatmentName: 'C' as const, onlineEvaluationConfigArn: options.controlOnlineEval }, + { treatmentName: 'T1' as const, onlineEvaluationConfigArn: options.treatmentOnlineEval }, + ], + }; + + const abTest: ABTest = { + name: options.name, + mode: 'target-based', + ...(options.description && { description: options.description }), + gatewayRef, + ...(options.roleArn && { roleArn: options.roleArn }), + variants: [ + { + name: 'C' as const, + weight: options.controlWeight, + variantConfiguration: { + target: { targetName: controlTarget }, + }, + }, + { + name: 'T1' as const, + weight: options.treatmentWeight, + variantConfiguration: { + target: { targetName: treatmentTarget }, + }, + }, + ], + evaluationConfig, + ...(options.gatewayFilter && { + gatewayFilter: { targetPaths: [options.gatewayFilter] }, + }), + ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }), + }; + + project.abTests ??= []; + project.abTests.push(abTest); + await this.writeProjectSpec(project); + + return abTest; + } +} diff --git a/src/cli/primitives/AgentPrimitive.tsx b/src/cli/primitives/AgentPrimitive.tsx index 4702633ed..b9873990b 100644 --- a/src/cli/primitives/AgentPrimitive.tsx +++ b/src/cli/primitives/AgentPrimitive.tsx @@ -25,6 +25,7 @@ import { parseAndNormalizeHeaders } from '../commands/shared/header-utils'; import type { VpcOptions } from '../commands/shared/vpc-utils'; import { VPC_ENDPOINT_WARNING, parseCommaSeparatedList } from '../commands/shared/vpc-utils'; import { getErrorMessage } from '../errors'; +import { createConfigBundleForAgent } from '../operations/agent/config-bundle-defaults'; import { mapGenerateConfigToRenderConfig, mapModelProviderToCredentials, @@ -34,6 +35,19 @@ import { import { executeImportAgent } from '../operations/agent/import'; import { setupPythonProject } from '../operations/python'; import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types'; +import { cliCommandRun } from '../telemetry/cli-command-run.js'; +import { + AgentType, + AuthorizerType, + Build, + Framework, + Language, + Memory, + ModelProvider as ModelProviderEnum, + NetworkMode as NetworkModeEnum, + Protocol, + standardize, +} from '../telemetry/schemas/common-shapes.js'; import { createRenderer } from '../templates'; import { requireTTY } from '../tui/guards/tty'; import type { GenerateConfig, MemoryOption } from '../tui/screens/generate/types'; @@ -76,6 +90,7 @@ export interface AddAgentOptions extends VpcOptions { idleTimeout?: number; maxLifetime?: number; sessionStorageMountPath?: string; + withConfigBundle?: boolean; } /** @@ -253,6 +268,10 @@ export class AgentPrimitive extends BasePrimitive', 'Absolute mount path for session filesystem storage (e.g. /mnt/session-storage) [non-interactive]' ) + .option( + '--with-config-bundle', + 'Create a config bundle wired into the agent template [preview] [non-interactive]' + ) .option('--json', 'Output as JSON [non-interactive]') .action(async options => { if (!findConfigRoot()) { @@ -264,92 +283,107 @@ export class AgentPrimitive extends BasePrimitive { + const validation = validateAddAgentOptions(cliOptions); + if (!validation.valid) { + throw new Error(validation.error); } - process.exit(1); - } - - // Parse custom claims JSON if provided (already validated by validateAddAgentOptions) - const customClaims = cliOptions.customClaims - ? (JSON.parse(cliOptions.customClaims) as CustomClaimValidation[]) - : undefined; - - // Parse request header allowlist if provided - const requestHeaderAllowlist = cliOptions.requestHeaderAllowlist - ? parseAndNormalizeHeaders(cliOptions.requestHeaderAllowlist) - : undefined; - - const result = await this.add({ - name: cliOptions.name!, - type: cliOptions.type ?? 'create', - buildType: (cliOptions.build as BuildType) ?? 'CodeZip', - language: cliOptions.language!, - framework: cliOptions.framework!, - modelProvider: cliOptions.modelProvider!, - apiKey: cliOptions.apiKey, - memory: cliOptions.memory, - protocol: cliOptions.protocol, - networkMode: cliOptions.networkMode, - subnets: cliOptions.subnets, - securityGroups: cliOptions.securityGroups, - requestHeaderAllowlist, - codeLocation: cliOptions.codeLocation, - entrypoint: cliOptions.entrypoint, - bedrockAgentId: cliOptions.agentId, - bedrockAliasId: cliOptions.agentAliasId, - bedrockRegion: cliOptions.region, - authorizerType: cliOptions.authorizerType, - discoveryUrl: cliOptions.discoveryUrl, - allowedAudience: cliOptions.allowedAudience, - allowedClients: cliOptions.allowedClients, - allowedScopes: cliOptions.allowedScopes, - customClaims, - clientId: cliOptions.clientId, - clientSecret: cliOptions.clientSecret, - idleTimeout: cliOptions.idleTimeout ? Number(cliOptions.idleTimeout) : undefined, - maxLifetime: cliOptions.maxLifetime ? Number(cliOptions.maxLifetime) : undefined, - sessionStorageMountPath: cliOptions.sessionStorageMountPath, - }); - if (cliOptions.json) { - console.log(JSON.stringify(result)); - } else if (result.success) { - console.log(`Added agent '${result.agentName}'`); - if (result.agentPath) { - console.log(`Agent code: ${result.agentPath}`); + // Parse custom claims JSON if provided (already validated by validateAddAgentOptions) + const customClaims = cliOptions.customClaims + ? (JSON.parse(cliOptions.customClaims) as CustomClaimValidation[]) + : undefined; + + // Parse request header allowlist if provided + const requestHeaderAllowlist = cliOptions.requestHeaderAllowlist + ? parseAndNormalizeHeaders(cliOptions.requestHeaderAllowlist) + : undefined; + + const result = await this.add({ + name: cliOptions.name!, + type: cliOptions.type ?? 'create', + buildType: (cliOptions.build as BuildType) ?? 'CodeZip', + language: cliOptions.language!, + framework: cliOptions.framework!, + modelProvider: cliOptions.modelProvider!, + apiKey: cliOptions.apiKey, + memory: cliOptions.memory, + protocol: cliOptions.protocol, + networkMode: cliOptions.networkMode, + subnets: cliOptions.subnets, + securityGroups: cliOptions.securityGroups, + requestHeaderAllowlist, + codeLocation: cliOptions.codeLocation, + entrypoint: cliOptions.entrypoint, + bedrockAgentId: cliOptions.agentId, + bedrockAliasId: cliOptions.agentAliasId, + bedrockRegion: cliOptions.region, + authorizerType: cliOptions.authorizerType, + discoveryUrl: cliOptions.discoveryUrl, + allowedAudience: cliOptions.allowedAudience, + allowedClients: cliOptions.allowedClients, + allowedScopes: cliOptions.allowedScopes, + customClaims, + clientId: cliOptions.clientId, + clientSecret: cliOptions.clientSecret, + idleTimeout: cliOptions.idleTimeout ? Number(cliOptions.idleTimeout) : undefined, + maxLifetime: cliOptions.maxLifetime ? Number(cliOptions.maxLifetime) : undefined, + sessionStorageMountPath: cliOptions.sessionStorageMountPath, + withConfigBundle: cliOptions.withConfigBundle, + }); + + if (!result.success) { + throw new Error(result.error); } - if (cliOptions.networkMode === 'VPC') { - console.log(`\x1b[33mNote: ${VPC_ENDPOINT_WARNING}\x1b[0m`); + + if (cliOptions.json) { + console.log(JSON.stringify(result)); + } else { + console.log(`Added agent '${result.agentName}'`); + if (result.agentPath) { + console.log(`Agent code: ${result.agentPath}`); + } + if (cliOptions.networkMode === 'VPC') { + console.log(`\x1b[33mNote: ${VPC_ENDPOINT_WARNING}\x1b[0m`); + } } - } else { - console.error(result.error); - } - process.exit(result.success ? 0 : 1); + return { + language: standardize(Language, cliOptions.language), + framework: standardize(Framework, cliOptions.framework), + model_provider: standardize(ModelProviderEnum, cliOptions.modelProvider), + agent_type: standardize(AgentType, cliOptions.type ?? 'create'), + build: standardize(Build, cliOptions.build ?? 'CodeZip'), + protocol: standardize(Protocol, cliOptions.protocol ?? 'HTTP'), + network_mode: standardize(NetworkModeEnum, cliOptions.networkMode ?? 'PUBLIC'), + authorizer_type: standardize(AuthorizerType, cliOptions.authorizerType ?? 'NONE'), + memory: standardize(Memory, cliOptions.memory ?? 'none'), + }; + }); } else { - // TUI fallback — dynamic imports to avoid pulling ink (async) into registry - requireTTY(); - const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ - import('ink'), - import('react'), - import('../tui/screens/add/AddFlow'), - ]); - const { clear, unmount } = render( - React.createElement(AddFlow, { - isInteractive: false, - initialResource: 'agent', - onExit: () => { - clear(); - unmount(); - process.exit(0); - }, - }) - ); + try { + // TUI fallback — dynamic imports to avoid pulling ink (async) into registry + requireTTY(); + const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/add/AddFlow'), + ]); + const { clear, unmount } = render( + React.createElement(AddFlow, { + isInteractive: false, + initialResource: 'agent', + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + } catch (error) { + console.error(getErrorMessage(error)); + process.exit(1); + } } }); @@ -412,6 +446,7 @@ export class AgentPrimitive extends BasePrimitive }>; + branchName?: string; + commitMessage?: string; +} + +export type RemovableConfigBundle = RemovableResource; + +/** + * ConfigBundlePrimitive handles all configuration bundle add/remove operations. + * + * Configuration bundles are versioned collections of component configurations + * (system prompts, tool configs) keyed by component ARN. They are created via + * direct API calls (not CloudFormation) and stored in agentcore.json for + * lifecycle management. + */ +export class ConfigBundlePrimitive extends BasePrimitive { + readonly kind = 'config-bundle' as const; + readonly label = 'Configuration Bundle'; + override readonly article = 'a'; + readonly primitiveSchema = ConfigBundleSchema; + + async add(options: AddConfigBundleOptions): Promise> { + try { + const bundle = await this.createConfigBundle(options); + return { success: true, bundleName: bundle.name }; + } catch (err) { + return { success: false, error: getErrorMessage(err) }; + } + } + + async remove(bundleName: string): Promise { + try { + const project = await this.readProjectSpec(); + + const index = (project.configBundles ?? []).findIndex(b => b.name === bundleName); + if (index === -1) { + return { success: false, error: `Configuration bundle "${bundleName}" not found.` }; + } + + project.configBundles.splice(index, 1); + await this.writeProjectSpec(project); + + return { success: true }; + } catch (err) { + return { success: false, error: getErrorMessage(err) }; + } + } + + async previewRemove(bundleName: string): Promise { + const project = await this.readProjectSpec(); + + const bundle = (project.configBundles ?? []).find(b => b.name === bundleName); + if (!bundle) { + throw new Error(`Configuration bundle "${bundleName}" not found.`); + } + + const summary: string[] = [`Removing configuration bundle: ${bundleName}`]; + const schemaChanges: SchemaChange[] = []; + + const afterSpec = { + ...project, + configBundles: (project.configBundles ?? []).filter(b => b.name !== bundleName), + }; + + schemaChanges.push({ + file: 'agentcore/agentcore.json', + before: project, + after: afterSpec, + }); + + return { summary, directoriesToDelete: [], schemaChanges }; + } + + async getRemovable(): Promise { + try { + const project = await this.readProjectSpec(); + return (project.configBundles ?? []).map(b => ({ name: b.name })); + } catch { + return []; + } + } + + async getAllNames(): Promise { + try { + const project = await this.readProjectSpec(); + return (project.configBundles ?? []).map(b => b.name); + } catch { + return []; + } + } + + registerCommands(addCmd: Command, removeCmd: Command): void { + addCmd + .command(this.kind) + .description('[preview] Add a configuration bundle to the project') + .option('--name ', 'Bundle name') + .option('--description ', 'Bundle description') + .option( + '--components ', + 'Components map as inline JSON. Keys are ARNs or placeholders: {{runtime:}}, {{gateway:}}. Placeholders resolve to real ARNs at deploy time.' + ) + .option('--components-file ', 'Path to components JSON file (same format as --components)') + .option('--branch ', 'Branch name for versioning') + .option('--commit-message ', 'Commit message for this version') + .option('--json', 'Output as JSON') + .action( + async (cliOptions: { + name?: string; + description?: string; + components?: string; + componentsFile?: string; + branch?: string; + commitMessage?: string; + json?: boolean; + }) => { + try { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + if (cliOptions.name || cliOptions.json) { + const fail = (error: string) => { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + console.error(error); + } + process.exit(1); + }; + + if (!cliOptions.name) { + fail('--name is required in non-interactive mode'); + } + + if (!cliOptions.components && !cliOptions.componentsFile) { + fail('Either --components or --components-file is required'); + } + + let components: Record }>; + if (cliOptions.componentsFile) { + const raw = readFileSync(cliOptions.componentsFile, 'utf-8'); + components = JSON.parse(raw) as Record }>; + } else { + components = JSON.parse(cliOptions.components!) as Record< + string, + { configuration: Record } + >; + } + + const result = await this.add({ + name: cliOptions.name!, + description: cliOptions.description, + components, + branchName: cliOptions.branch, + commitMessage: cliOptions.commitMessage, + }); + + if (cliOptions.json) { + console.log(JSON.stringify(result)); + } else if (result.success) { + console.log(`Added configuration bundle '${result.bundleName}'`); + } else { + console.error(result.error); + } + process.exit(result.success ? 0 : 1); + } else { + // TUI fallback + const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/add/AddFlow'), + ]); + const { clear, unmount } = render( + React.createElement(AddFlow, { + isInteractive: false, + initialResource: 'config-bundle', + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + } + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(getErrorMessage(error)); + } + process.exit(1); + } + } + ); + + this.registerRemoveSubcommand(removeCmd); + } + + addScreen(): AddScreenComponent { + return null; + } + + private async createConfigBundle(options: AddConfigBundleOptions): Promise { + const project = await this.readProjectSpec(); + + this.checkDuplicate(project.configBundles ?? [], options.name); + + const bundle: ConfigBundle = { + name: options.name, + type: 'ConfigurationBundle', + ...(options.description && { description: options.description }), + components: options.components, + branchName: options.branchName ?? 'mainline', + ...(options.commitMessage && { commitMessage: options.commitMessage }), + }; + + project.configBundles ??= []; + project.configBundles.push(bundle); + await this.writeProjectSpec(project); + + return bundle; + } +} diff --git a/src/cli/primitives/CredentialPrimitive.tsx b/src/cli/primitives/CredentialPrimitive.tsx index 52f578235..9607094f8 100644 --- a/src/cli/primitives/CredentialPrimitive.tsx +++ b/src/cli/primitives/CredentialPrimitive.tsx @@ -4,6 +4,8 @@ import { CredentialSchema } from '../../schema'; import { validateAddCredentialOptions } from '../commands/add/validate'; import { getErrorMessage } from '../errors'; import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types'; +import { cliCommandRun } from '../telemetry/cli-command-run.js'; +import { CredentialType, standardize } from '../telemetry/schemas/common-shapes.js'; import { requireTTY } from '../tui/guards/tty'; import { BasePrimitive } from './BasePrimitive'; import { computeDefaultCredentialEnvVarName } from './credential-utils'; @@ -273,23 +275,23 @@ export class CredentialPrimitive extends BasePrimitive { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } - if ( - cliOptions.name || - cliOptions.apiKey || - cliOptions.json || - cliOptions.type || - cliOptions.discoveryUrl || - cliOptions.clientId || - cliOptions.clientSecret || - cliOptions.scopes - ) { - // CLI mode + if ( + cliOptions.name || + cliOptions.apiKey || + cliOptions.json || + cliOptions.type || + cliOptions.discoveryUrl || + cliOptions.clientId || + cliOptions.clientSecret || + cliOptions.scopes + ) { + // CLI mode + await cliCommandRun('add.credential', !!cliOptions.json, async () => { const validation = validateAddCredentialOptions({ name: cliOptions.name, type: cliOptions.type as 'api-key' | 'oauth' | undefined, @@ -301,12 +303,7 @@ export class CredentialPrimitive extends BasePrimitive { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } - if (cliOptions.name || cliOptions.json) { - const fail = (error: string) => { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); + if (cliOptions.name || cliOptions.json) { + await cliCommandRun('add.evaluator', !!cliOptions.json, async () => { + const fail = (error: string): never => { + throw new Error(error); }; if (!cliOptions.name || !cliOptions.level) { @@ -298,9 +295,13 @@ export class EvaluatorPrimitive extends BasePrimitive) => { const cliOptions = rawOptions as unknown as CLIAddGatewayOptions; - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } - + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + await cliCommandRun('add.gateway', !!cliOptions.json, async () => { const validation = validateAddGatewayOptions(cliOptions); if (!validation.valid) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: validation.error })); - } else { - console.error(validation.error); - } - process.exit(1); + throw new Error(validation.error); } // Parse custom claims JSON if provided (already validated) @@ -221,23 +217,30 @@ export class GatewayPrimitive extends BasePrimitive s.trim()) + .filter(Boolean).length + : 0; + return { + authorizer_type: standardize(AuthorizerType, cliOptions.authorizerType ?? 'NONE'), + has_policy_engine: !!cliOptions.policyEngine, + policy_engine_mode: standardize(PolicyEngineMode, cliOptions.policyEngineMode ?? 'log_only'), + semantic_search: cliOptions.semanticSearch !== false, + runtime_count: runtimeCount, + }; + }); }); removeCmd diff --git a/src/cli/primitives/GatewayTargetPrimitive.ts b/src/cli/primitives/GatewayTargetPrimitive.ts index 41a2e6a75..e8a1da996 100644 --- a/src/cli/primitives/GatewayTargetPrimitive.ts +++ b/src/cli/primitives/GatewayTargetPrimitive.ts @@ -14,6 +14,13 @@ import { validateAddGatewayTargetOptions } from '../commands/add/validate'; import { getErrorMessage } from '../errors'; import type { RemovableGatewayTarget } from '../operations/remove/remove-gateway-target'; import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types'; +import { cliCommandRun } from '../telemetry/cli-command-run.js'; +import { + GATEWAY_TARGET_TYPE_MAP, + GatewayTargetHost, + OutboundAuth, + standardize, +} from '../telemetry/schemas/common-shapes.js'; import { getTemplateToolDefinitions, renderGatewayTargetTemplate } from '../templates/GatewayTargetRenderer'; import { requireTTY } from '../tui/guards/tty'; import type { @@ -297,20 +304,15 @@ export class GatewayTargetPrimitive extends BasePrimitive { const validation = await validateAddGatewayTargetOptions(cliOptions); if (!validation.valid) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: validation.error })); - } else { - console.error(validation.error); - } - process.exit(1); + throw new Error(validation.error); } // Map CLI flag values to internal types @@ -321,6 +323,19 @@ export class GatewayTargetPrimitive extends BasePrimitive { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } - if (cliOptions.name || cliOptions.json) { - // CLI mode + if (cliOptions.name || cliOptions.json) { + // CLI mode + await cliCommandRun('add.memory', !!cliOptions.json, async () => { const expiry = cliOptions.expiry ? parseInt(cliOptions.expiry, 10) : undefined; const validation = validateAddMemoryOptions({ name: cliOptions.name, @@ -203,12 +204,7 @@ export class MemoryPrimitive extends BasePrimitive s.trim().toUpperCase()) + .filter(Boolean); + return { + strategy_count: strategyList.length, + strategy_semantic: strategyList.includes('SEMANTIC'), + strategy_summarization: strategyList.includes('SUMMARIZATION'), + strategy_user_preference: strategyList.includes('USER_PREFERENCE'), + strategy_episodic: strategyList.includes('EPISODIC'), + }; + }); + } else { + try { // TUI fallback — dynamic imports to avoid pulling ink (async) into registry requireTTY(); const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ @@ -248,14 +259,10 @@ export class MemoryPrimitive extends BasePrimitive', 'Evaluator name(s), Builtin.* IDs, or ARNs [non-interactive]') .option('--evaluator-arn ', 'Evaluator ARN(s) [non-interactive]') .option('--sampling-rate ', 'Sampling percentage (0.01-100) [non-interactive]') + .option('--endpoint ', 'Runtime endpoint name to scope monitoring [non-interactive]') .option('--enable-on-create', 'Enable evaluation immediately after deploy [non-interactive]') .option('--json', 'Output as JSON [non-interactive]') .action( @@ -118,40 +121,32 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } - if (cliOptions.name || cliOptions.json) { - // Merge --evaluator and --evaluator-arn into a single list - const allEvaluators = [...(cliOptions.evaluator ?? []), ...(cliOptions.evaluatorArn ?? [])]; + if (cliOptions.name || cliOptions.json) { + // Merge --evaluator and --evaluator-arn into a single list + const allEvaluators = [...(cliOptions.evaluator ?? []), ...(cliOptions.evaluatorArn ?? [])]; + await cliCommandRun('add.online-eval', !!cliOptions.json, async () => { if (!cliOptions.name || !cliOptions.runtime || allEvaluators.length === 0 || !cliOptions.samplingRate) { - const error = - '--name, --runtime, --evaluator (and/or --evaluator-arn), and --sampling-rate are all required in non-interactive mode'; - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); + throw new Error( + '--name, --runtime, --evaluator (and/or --evaluator-arn), and --sampling-rate are all required in non-interactive mode' + ); } // Sampling rate as a percentage of requests to evaluate (0.01% to 100%) const samplingRate = parseFloat(cliOptions.samplingRate); if (isNaN(samplingRate) || samplingRate < 0.01 || samplingRate > 100) { - const error = `Invalid --sampling-rate "${cliOptions.samplingRate}". Must be a percentage between 0.01 and 100`; - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); + throw new Error( + `Invalid --sampling-rate "${cliOptions.samplingRate}". Must be a percentage between 0.01 and 100` + ); } const result = await this.add({ @@ -160,17 +155,26 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive r.name === options.agent); + if (!runtime) { + throw new Error(`Runtime "${options.agent}" not found in project.`); + } + if (!runtime.endpoints?.[options.endpoint]) { + throw new Error( + `Endpoint "${options.endpoint}" not found on runtime "${options.agent}". Available endpoints: ${ + runtime.endpoints ? Object.keys(runtime.endpoints).join(', ') : '(none)' + }` + ); + } + } + const config: OnlineEvalConfig = { name: options.name, agent: options.agent, evaluators: options.evaluators, samplingRate: options.samplingRate, ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }), + ...(options.endpoint && { endpoint: options.endpoint }), }; project.onlineEvalConfigs.push(config); diff --git a/src/cli/primitives/PolicyEnginePrimitive.ts b/src/cli/primitives/PolicyEnginePrimitive.ts index a1f887547..bb9b314d8 100644 --- a/src/cli/primitives/PolicyEnginePrimitive.ts +++ b/src/cli/primitives/PolicyEnginePrimitive.ts @@ -3,6 +3,8 @@ import type { AgentCoreProjectSpec, PolicyEngine } from '../../schema'; import { PolicyEngineModeSchema, PolicyEngineSchema } from '../../schema'; import { getErrorMessage } from '../errors'; import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types'; +import { cliCommandRun } from '../telemetry/cli-command-run.js'; +import { AttachMode, standardize } from '../telemetry/schemas/common-shapes.js'; import { requireTTY } from '../tui/guards/tty'; import { BasePrimitive } from './BasePrimitive'; import { SOURCE_CODE_NOTE } from './constants'; @@ -221,20 +223,15 @@ export class PolicyEnginePrimitive extends BasePrimitive { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } - if (cliOptions.name || cliOptions.description || cliOptions.encryptionKeyArn || cliOptions.json) { + if (cliOptions.name || cliOptions.description || cliOptions.encryptionKeyArn || cliOptions.json) { + await cliCommandRun('add.policy-engine', !!cliOptions.json, async () => { if (!cliOptions.name) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: '--name is required' })); - } else { - console.error('--name is required'); - } - process.exit(1); + throw new Error('--name is required'); } const result = await this.add({ @@ -253,15 +250,29 @@ export class PolicyEnginePrimitive extends BasePrimitive s.trim()) + .filter(Boolean).length + : 0; + return { + attach_gateway_count: gatewayCount, + attach_mode: standardize(AttachMode, cliOptions.attachMode ?? 'log_only'), + }; + }); + } else { + try { requireTTY(); const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ import('ink'), @@ -278,14 +289,10 @@ export class PolicyEnginePrimitive extends BasePrimitive { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } - if ( - cliOptions.name || - cliOptions.engine || - cliOptions.source || - cliOptions.statement || - cliOptions.generate || - cliOptions.json - ) { + if ( + cliOptions.name || + cliOptions.engine || + cliOptions.source || + cliOptions.statement || + cliOptions.generate || + cliOptions.json + ) { + await cliCommandRun('add.policy', !!cliOptions.json, async () => { if (!cliOptions.name) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: '--name is required' })); - } else { - console.error('--name is required'); - } - process.exit(1); + throw new Error('--name is required'); } if (!cliOptions.engine) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: '--engine is required' })); - } else { - console.error('--engine is required'); - } - process.exit(1); + throw new Error('--engine is required'); } const result = await this.add({ @@ -335,15 +327,28 @@ export class PolicyPrimitive extends BasePrimitive { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + await cliCommandRun('add.runtime-endpoint', !!cliOptions.json, async () => { const result = await this.add({ runtime: cliOptions.runtime, endpoint: cliOptions.endpoint, @@ -261,23 +262,18 @@ export class RuntimeEndpointPrimitive extends BasePrimitive ({ + ConfigIO: class { + readProjectSpec = mockReadProjectSpec; + writeProjectSpec = mockWriteProjectSpec; + }, + findConfigRoot: () => '/fake/root', +})); + +function makeProject(abTests: { name: string; gatewayRef?: string }[] = []) { + return { + name: 'TestProject', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + memories: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + agentCoreGateways: [], + policyEngines: [], + configBundles: [], + abTests, + httpGateways: [] as { name: string; runtimeRef: string }[], + }; +} + +const validOptions: AddABTestOptions = { + name: 'MyTest', + agent: 'my-agent', + controlBundle: 'arn:bundle:control', + controlVersion: 'v1', + treatmentBundle: 'arn:bundle:treatment', + treatmentVersion: 'v1', + controlWeight: 80, + treatmentWeight: 20, + onlineEval: 'arn:eval:config', +}; + +let primitive: ABTestPrimitive; + +describe('ABTestPrimitive', () => { + beforeEach(() => { + vi.clearAllMocks(); + primitive = new ABTestPrimitive(); + }); + + it('has correct kind, label, and article', () => { + expect(primitive.kind).toBe('ab-test'); + expect(primitive.label).toBe('AB Test'); + // eslint-disable-next-line @typescript-eslint/dot-notation + expect(primitive['article']).toBe('an'); + }); + + describe('add', () => { + it('adds AB test to project spec and returns success', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + const result = await primitive.add(validOptions); + + expect(result.success).toBe(true); + expect(result).toHaveProperty('abTestName', 'MyTest'); + + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenSpec.abTests).toHaveLength(1); + expect(writtenSpec.abTests[0].name).toBe('MyTest'); + expect(writtenSpec.abTests[0].variants).toHaveLength(2); + expect(writtenSpec.abTests[0].variants[0].name).toBe('C'); + expect(writtenSpec.abTests[0].variants[0].weight).toBe(80); + expect(writtenSpec.abTests[0].variants[1].name).toBe('T1'); + expect(writtenSpec.abTests[0].variants[1].weight).toBe(20); + }); + + it('includes optional fields when provided', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + await primitive.add({ + ...validOptions, + description: 'Test description', + roleArn: 'arn:aws:iam::123:role/MyRole', + trafficHeaderName: 'X-AB-Route', + maxDurationDays: 30, + enableOnCreate: true, + }); + + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + const test = writtenSpec.abTests[0]; + expect(test.description).toBe('Test description'); + expect(test.roleArn).toBe('arn:aws:iam::123:role/MyRole'); + expect(test.trafficAllocationConfig).toEqual({ routeOnHeader: { headerName: 'X-AB-Route' } }); + expect(test.maxDurationDays).toBe(30); + expect(test.enableOnCreate).toBe(true); + }); + + it('omits optional fields when not provided', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + await primitive.add(validOptions); + + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + const test = writtenSpec.abTests[0]; + expect(test.description).toBeUndefined(); + expect(test.roleArn).toBeUndefined(); + expect(test.trafficAllocationConfig).toBeUndefined(); + expect(test.maxDurationDays).toBeUndefined(); + expect(test.enableOnCreate).toBeUndefined(); + }); + + it('returns error when AB test name already exists', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'MyTest' }])); + + const result = await primitive.add(validOptions); + + expect(result).toEqual( + expect.objectContaining({ success: false, error: expect.stringContaining('already exists') }) + ); + }); + + it('returns error when readProjectSpec fails', async () => { + mockReadProjectSpec.mockRejectedValue(new Error('disk read error')); + + const result = await primitive.add(validOptions); + + expect(result).toEqual(expect.objectContaining({ success: false, error: 'disk read error' })); + }); + + it('returns error when writeProjectSpec fails', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockRejectedValue(new Error('disk write error')); + + const result = await primitive.add(validOptions); + + expect(result).toEqual(expect.objectContaining({ success: false, error: 'disk write error' })); + }); + + it('returns error when variant weights do not sum to 100', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + + const result = await primitive.add({ + ...validOptions, + controlWeight: 80, + treatmentWeight: 80, + }); + + expect(result.success).toBe(false); + }); + }); + + describe('remove', () => { + it('removes AB test from project spec', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'TestA' }, { name: 'TestB' }])); + mockWriteProjectSpec.mockResolvedValue(undefined); + + const result = await primitive.remove('TestA'); + + expect(result.success).toBe(true); + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenSpec.abTests).toHaveLength(1); + expect(writtenSpec.abTests[0].name).toBe('TestB'); + }); + + it('returns error when AB test not found', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + + const result = await primitive.remove('NonExistent'); + + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('NonExistent'); + expect(result.error).toContain('not found'); + } + }); + + it('returns error when readProjectSpec fails', async () => { + mockReadProjectSpec.mockRejectedValue(new Error('io error')); + + const result = await primitive.remove('Whatever'); + + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toBe('io error'); + } + }); + + it('cascade-deletes orphaned HTTP gateway when last referencing AB test is removed', async () => { + const project = makeProject([{ name: 'TestA', gatewayRef: '{{gateway:TestA-gw}}' }]); + project.httpGateways = [{ name: 'TestA-gw', runtimeRef: 'my-agent' }]; + mockReadProjectSpec.mockResolvedValue(project); + mockWriteProjectSpec.mockResolvedValue(undefined); + + const result = await primitive.remove('TestA'); + + expect(result.success).toBe(true); + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenSpec.abTests).toHaveLength(0); + // Gateway is retained by default — cascade-delete only happens with deleteGateway: true + expect(writtenSpec.httpGateways).toHaveLength(1); + }); + + it('retains HTTP gateway when another AB test still references it', async () => { + const project = makeProject([ + { name: 'TestA', gatewayRef: '{{gateway:shared-gw}}' }, + { name: 'TestB', gatewayRef: '{{gateway:shared-gw}}' }, + ]); + project.httpGateways = [{ name: 'shared-gw', runtimeRef: 'my-agent' }]; + mockReadProjectSpec.mockResolvedValue(project); + mockWriteProjectSpec.mockResolvedValue(undefined); + + const result = await primitive.remove('TestA'); + + expect(result.success).toBe(true); + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenSpec.abTests).toHaveLength(1); + expect(writtenSpec.httpGateways).toHaveLength(1); + expect(writtenSpec.httpGateways[0].name).toBe('shared-gw'); + }); + }); + + describe('previewRemove', () => { + it('returns preview with schema changes', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'TestA' }])); + + const preview = await primitive.previewRemove('TestA'); + + expect(preview.summary[0]).toContain('Removing AB test: TestA'); + expect(preview.schemaChanges).toHaveLength(1); + expect(preview.schemaChanges[0]!.file).toBe('agentcore/agentcore.json'); + expect((preview.schemaChanges[0]!.after as { abTests: unknown[] }).abTests).toHaveLength(0); + }); + + it('throws when AB test not found', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + + await expect(primitive.previewRemove('Missing')).rejects.toThrow('not found'); + }); + }); + + describe('getRemovable', () => { + it('returns AB test names', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'A' }, { name: 'B' }])); + + const result = await primitive.getRemovable(); + + expect(result).toEqual([{ name: 'A' }, { name: 'B' }]); + }); + + it('returns empty array on error', async () => { + mockReadProjectSpec.mockRejectedValue(new Error('fail')); + + expect(await primitive.getRemovable()).toEqual([]); + }); + }); + + describe('getAllNames', () => { + it('returns AB test names as strings', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'X' }, { name: 'Y' }])); + + const result = await primitive.getAllNames(); + + expect(result).toEqual(['X', 'Y']); + }); + + it('returns empty array on error', async () => { + mockReadProjectSpec.mockRejectedValue(new Error('fail')); + + expect(await primitive.getAllNames()).toEqual([]); + }); + }); +}); diff --git a/src/cli/primitives/__tests__/GatewayPrimitive.test.ts b/src/cli/primitives/__tests__/GatewayPrimitive.test.ts index 17a3f0f2c..f812ca380 100644 --- a/src/cli/primitives/__tests__/GatewayPrimitive.test.ts +++ b/src/cli/primitives/__tests__/GatewayPrimitive.test.ts @@ -14,6 +14,9 @@ const defaultProject: AgentCoreProjectSpec = { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const { mockConfigExists, mockReadProjectSpec, mockWriteProjectSpec } = vi.hoisted(() => ({ diff --git a/src/cli/primitives/__tests__/HarnessPrimitive.test.ts b/src/cli/primitives/__tests__/HarnessPrimitive.test.ts index f96347982..5228957f5 100644 --- a/src/cli/primitives/__tests__/HarnessPrimitive.test.ts +++ b/src/cli/primitives/__tests__/HarnessPrimitive.test.ts @@ -44,6 +44,9 @@ const baseProject: AgentCoreProjectSpec = { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; describe('HarnessPrimitive', () => { @@ -518,6 +521,9 @@ describe('HarnessPrimitive', () => { expect(mockWriteProjectSpec).toHaveBeenCalledWith( expect.objectContaining({ harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }) ); expect(rm).toHaveBeenCalledWith('/tmp/test/agentcore/harnesses/test', { recursive: true, force: true }); diff --git a/src/cli/primitives/__tests__/auth-utils.test.ts b/src/cli/primitives/__tests__/auth-utils.test.ts index 2eca7c1a7..d309df4d1 100644 --- a/src/cli/primitives/__tests__/auth-utils.test.ts +++ b/src/cli/primitives/__tests__/auth-utils.test.ts @@ -94,6 +94,9 @@ describe('createManagedOAuthCredential', () => { agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], }; const jwtConfig: JwtConfigOptions = { diff --git a/src/cli/primitives/index.ts b/src/cli/primitives/index.ts index de7326c87..83bff4bbb 100644 --- a/src/cli/primitives/index.ts +++ b/src/cli/primitives/index.ts @@ -1,3 +1,4 @@ +export { ABTestPrimitive } from './ABTestPrimitive'; export { BasePrimitive } from './BasePrimitive'; export { MemoryPrimitive } from './MemoryPrimitive'; export { CredentialPrimitive } from './CredentialPrimitive'; @@ -19,6 +20,8 @@ export { onlineEvalConfigPrimitive, gatewayPrimitive, gatewayTargetPrimitive, + configBundlePrimitive, + abTestPrimitive, runtimeEndpointPrimitive, getPrimitive, } from './registry'; diff --git a/src/cli/primitives/registry.ts b/src/cli/primitives/registry.ts index 20a28c082..6f2145a79 100644 --- a/src/cli/primitives/registry.ts +++ b/src/cli/primitives/registry.ts @@ -1,5 +1,7 @@ +import { ABTestPrimitive } from './ABTestPrimitive'; import { AgentPrimitive } from './AgentPrimitive'; import type { BasePrimitive } from './BasePrimitive'; +import { ConfigBundlePrimitive } from './ConfigBundlePrimitive'; import { CredentialPrimitive } from './CredentialPrimitive'; import { EvaluatorPrimitive } from './EvaluatorPrimitive'; import { GatewayPrimitive } from './GatewayPrimitive'; @@ -25,6 +27,8 @@ export const gatewayPrimitive = new GatewayPrimitive(); export const gatewayTargetPrimitive = new GatewayTargetPrimitive(); export const policyEnginePrimitive = new PolicyEnginePrimitive(); export const policyPrimitive = new PolicyPrimitive(); +export const configBundlePrimitive = new ConfigBundlePrimitive(); +export const abTestPrimitive = new ABTestPrimitive(); export const runtimeEndpointPrimitive = new RuntimeEndpointPrimitive(); /** @@ -41,6 +45,8 @@ export const ALL_PRIMITIVES: BasePrimitive[] = [ gatewayTargetPrimitive, policyEnginePrimitive, policyPrimitive, + configBundlePrimitive, + abTestPrimitive, runtimeEndpointPrimitive, ]; diff --git a/src/cli/project.ts b/src/cli/project.ts index b5cc89bfb..c6bf1f5d0 100644 --- a/src/cli/project.ts +++ b/src/cli/project.ts @@ -19,6 +19,9 @@ export function createDefaultProjectSpec(projectName: string): AgentCoreProjectS agentCoreGateways: [], policyEngines: [], harnesses: [], + configBundles: [], + abTests: [], + httpGateways: [], tags: { 'agentcore:created-by': 'agentcore-cli', 'agentcore:project-name': projectName, diff --git a/src/cli/telemetry/__tests__/client.test.ts b/src/cli/telemetry/__tests__/client.test.ts index e254524bf..96adebafc 100644 --- a/src/cli/telemetry/__tests__/client.test.ts +++ b/src/cli/telemetry/__tests__/client.test.ts @@ -116,18 +116,49 @@ describe('TelemetryClient', () => { expect(sink.metrics[0]!.attrs.check_only).toBe('true'); }); - it('silently drops invalid success payloads', async () => { + it('publishes metric with unknown defaults for incomplete success payloads', async () => { const sink = new InMemorySink(); const client = new TelemetryClient(sink); - // Missing required attrs for 'create' — should silently drop + // Missing required attrs for 'create' — should still publish with 'unknown' defaults await client.withCommandRun( 'create', // @ts-expect-error — intentionally incomplete async () => ({ language: 'python' }) ); - expect(sink.metrics).toHaveLength(0); + expect(sink.metrics).toHaveLength(1); + expect(sink.metrics[0]!.attrs).toMatchObject({ + exit_reason: 'success', + language: 'python', + framework: 'unknown', + model_provider: 'unknown', + }); + }); + + it('defaults invalid attrs to unknown while preserving valid ones', async () => { + const sink = new InMemorySink(); + const client = new TelemetryClient(sink); + + await client.withCommandRun( + 'create', + // @ts-expect-error — intentionally invalid enum value + async () => ({ + language: 'rust', // invalid enum + framework: 'strands', + model_provider: 'bedrock', + memory: 'shortterm', + protocol: 'mcp', + build: 'codezip', + agent_type: 'create', + network_mode: 'public', + has_agent: true, + }) + ); + + expect(sink.metrics).toHaveLength(1); + expect(sink.metrics[0]!.attrs.language).toBe('unknown'); + expect(sink.metrics[0]!.attrs.framework).toBe('strands'); }); it('records cancel when callback returns CANCELLED', async () => { diff --git a/src/cli/telemetry/cli-command-run.ts b/src/cli/telemetry/cli-command-run.ts new file mode 100644 index 000000000..987f05730 --- /dev/null +++ b/src/cli/telemetry/cli-command-run.ts @@ -0,0 +1,71 @@ +import { getErrorMessage } from '../errors'; +import type { AddResult } from '../primitives/types.js'; +import { TelemetryClientAccessor } from './client-accessor.js'; +import type { Command, CommandAttrs } from './schemas/command-run.js'; + +/** + * Run a CLI command with telemetry, standardized error output, and process.exit. + * The callback should throw on failure and return telemetry attrs on success. + * + * If telemetry initialization fails, the command still runs without telemetry — + * telemetry must never block CLI behavior. + */ +export async function cliCommandRun( + command: C, + json: boolean, + fn: () => Promise> +): Promise { + try { + let client; + try { + client = await TelemetryClientAccessor.get(); + } catch { + // Telemetry init failed — run without it + await fn(); + process.exit(0); + } + // withCommandRun records success/failure telemetry, then re-throws on failure + await client.withCommandRun(command, fn); + process.exit(0); + } catch (error) { + if (json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(getErrorMessage(error)); + } + process.exit(1); + } +} + +/** + * Wrap a primitive .add() call with telemetry — used by TUI paths. + * CLI paths use {@link cliCommandRun} instead. + */ +export async function withAddTelemetry>( + command: C, + attrs: CommandAttrs, + fn: () => Promise> +): Promise> { + let client; + try { + client = await TelemetryClientAccessor.get(); + } catch { + return fn(); + } + + let result: AddResult | undefined; + try { + await client.withCommandRun(command, async () => { + result = await fn(); + if (!result.success) throw new Error(result.error); + return attrs; + }); + } catch (err) { + // withCommandRun re-throws after recording failure telemetry. + // result is set if fn() ran; if not, fn() itself threw. + if (!result) { + return { success: false, error: getErrorMessage(err) }; + } + } + return result!; +} diff --git a/src/cli/telemetry/client-accessor.ts b/src/cli/telemetry/client-accessor.ts index c41c261df..04172dae5 100644 --- a/src/cli/telemetry/client-accessor.ts +++ b/src/cli/telemetry/client-accessor.ts @@ -1,4 +1,4 @@ -import { GLOBAL_CONFIG_DIR, readGlobalConfig } from '../global-config.js'; +import { GLOBAL_CONFIG_DIR, readGlobalConfig } from '../../lib/schemas/io/global-config.js'; import { TelemetryClient } from './client.js'; import { resolveAuditFilePath, resolveResourceAttributes } from './config.js'; import { FileSystemSink } from './sinks/filesystem-sink.js'; diff --git a/src/cli/telemetry/client.ts b/src/cli/telemetry/client.ts index 3228f45b1..91dffd94f 100644 --- a/src/cli/telemetry/client.ts +++ b/src/cli/telemetry/client.ts @@ -1,6 +1,6 @@ import { classifyError, isUserError } from './error-classification.js'; import { COMMAND_SCHEMAS, type Command, type CommandAttrs, deriveCommandGroup } from './schemas/command-run.js'; -import { type CommandResult, CommandResultSchema } from './schemas/common-shapes.js'; +import { type CommandResult, CommandResultSchema, resilientParse } from './schemas/common-shapes.js'; import type { MetricSink } from './sinks/metric-sink.js'; import { performance } from 'perf_hooks'; @@ -69,17 +69,24 @@ export class TelemetryClient { durationMs: number ): void { try { + // CommandResult is built internally — hard parse is intentional since + // a metric without a valid exit_reason is meaningless. CommandResultSchema.parse(result); - if (result.exit_reason !== 'failure' && result.exit_reason !== 'cancel') { - COMMAND_SCHEMAS[command].parse(attrs); - } + + // Validate command attrs resiliently: invalid fields default to 'unknown' + // instead of dropping the entire metric. + // On failure/cancel the callback attrs are empty so validation is skipped. + const validatedAttrs = + result.exit_reason !== 'failure' && result.exit_reason !== 'cancel' + ? resilientParse(COMMAND_SCHEMAS[command], attrs as Record) + : attrs; const otelAttrs: Record = { command_group: deriveCommandGroup(command), command, }; - for (const obj of [result, attrs]) { + for (const obj of [result, validatedAttrs]) { for (const [k, v] of Object.entries(obj)) { if (typeof v === 'boolean') { otelAttrs[k] = String(v); diff --git a/src/cli/telemetry/config.ts b/src/cli/telemetry/config.ts index 364d57f68..fbaa3fb13 100644 --- a/src/cli/telemetry/config.ts +++ b/src/cli/telemetry/config.ts @@ -1,5 +1,5 @@ +import { getOrCreateInstallationId, readGlobalConfig } from '../../lib/schemas/io/global-config.js'; import { PACKAGE_VERSION } from '../constants.js'; -import { getOrCreateInstallationId, readGlobalConfig } from '../global-config.js'; import { type ResourceAttributes, ResourceAttributesSchema } from './schemas/common-attributes.js'; import { randomUUID } from 'crypto'; import os from 'os'; diff --git a/src/cli/telemetry/schemas/__tests__/command-run.test.ts b/src/cli/telemetry/schemas/__tests__/command-run.test.ts index 11d293c71..110df1284 100644 --- a/src/cli/telemetry/schemas/__tests__/command-run.test.ts +++ b/src/cli/telemetry/schemas/__tests__/command-run.test.ts @@ -1,6 +1,6 @@ import { COMMAND_SCHEMAS, type Command, type CommandAttrs, deriveCommandGroup } from '../command-run'; import { ResourceAttributesSchema } from '../common-attributes'; -import { CommandResultSchema } from '../common-shapes'; +import { CommandResultSchema, resilientParse } from '../common-shapes'; import { describe, expect, expectTypeOf, it } from 'vitest'; import { z } from 'zod'; @@ -170,3 +170,55 @@ describe('type safety', () => { } }); }); + +describe('resilientParse', () => { + it('passes valid attrs through unchanged', () => { + const attrs = { + language: 'python', + framework: 'strands', + model_provider: 'bedrock', + memory: 'shortterm', + protocol: 'mcp', + build: 'codezip', + agent_type: 'create', + network_mode: 'public', + has_agent: true, + }; + expect(resilientParse(COMMAND_SCHEMAS.create, attrs)).toEqual(attrs); + }); + + it('defaults a single invalid enum field to unknown', () => { + const attrs = { + language: 'rust', // invalid + framework: 'strands', + model_provider: 'bedrock', + memory: 'shortterm', + protocol: 'mcp', + build: 'codezip', + agent_type: 'create', + network_mode: 'public', + has_agent: true, + }; + const result = resilientParse(COMMAND_SCHEMAS.create, attrs); + expect(result.language).toBe('unknown'); + expect(result.framework).toBe('strands'); + }); + + it('defaults missing required fields to unknown', () => { + const result = resilientParse(COMMAND_SCHEMAS.create, { language: 'python' }); + expect(result.language).toBe('python'); + expect(result.framework).toBe('unknown'); + expect(result.model_provider).toBe('unknown'); + }); + + it('defaults all fields to unknown when all are invalid', () => { + const result = resilientParse(COMMAND_SCHEMAS.create, {}); + for (const value of Object.values(result)) { + expect(value).toBe('unknown'); + } + }); + + it('returns empty object for no-attrs schemas', () => { + expect(resilientParse(COMMAND_SCHEMAS['telemetry.disable'], {})).toEqual({}); + }); +}); diff --git a/src/cli/telemetry/schemas/command-run.ts b/src/cli/telemetry/schemas/command-run.ts index dfd127a98..0acfcaf1b 100644 --- a/src/cli/telemetry/schemas/command-run.ts +++ b/src/cli/telemetry/schemas/command-run.ts @@ -157,6 +157,7 @@ export const COMMAND_SCHEMAS = { 'add.gateway-target': AddGatewayTargetAttrs, 'add.policy-engine': AddPolicyEngineAttrs, 'add.policy': AddPolicyAttrs, + 'add.runtime-endpoint': NoAttrs, // deploy deploy: DeployAttrs, diff --git a/src/cli/telemetry/schemas/common-shapes.ts b/src/cli/telemetry/schemas/common-shapes.ts index 5c5e56493..4624883cd 100644 --- a/src/cli/telemetry/schemas/common-shapes.ts +++ b/src/cli/telemetry/schemas/common-shapes.ts @@ -8,6 +8,40 @@ export function safeSchema>(shape: T) { return z.object(shape); } +/** + * Validate each field in a schema individually, defaulting to 'unknown' on failure. + * This ensures a single invalid attribute never blocks the entire metric from being published. + * Keys in attrs not present in the schema are omitted from the result. + */ +export function resilientParse( + schema: z.ZodObject, + attrs: Record +): Record { + const result: Record = {}; + for (const key of Object.keys(schema.shape)) { + const field = schema.shape[key] as z.ZodType; + const parsed = field.safeParse(attrs[key]); + result[key] = parsed.success ? parsed.data : 'unknown'; + } + return result; +} + +/** + * Lowercase a CLI value and parse it through a Zod enum, returning the narrowed type. + * The `as` cast on the failure branch is intentional: invalid values pass through to + * recordCommandRun, where COMMAND_SCHEMAS[command].parse(attrs) validates the full + * attr object in a try/catch — silently dropping the metric if any field is invalid. + * This ensures telemetry never crashes the CLI while keeping the happy-path type-safe. + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export function standardize>(schema: T, value: string | undefined): z.infer { + const lower = (value ?? '').toLowerCase(); + const result = schema.safeParse(lower); + // If the value doesn't match the enum, return the lowercased value anyway — + // recordCommandRun's try/catch will silently drop the invalid metric. + return (result.success ? result.data : lower) as z.infer; +} + // Primitive types export const Count = z.number().int().nonnegative(); @@ -41,7 +75,17 @@ export const GatewayTargetType = z.enum([ 'open-api-schema', 'smithy-model', 'lambda-function-arn', + 'unknown', ]); + +/** Map camelCase CLI target type to kebab-case telemetry enum value. */ +export const GATEWAY_TARGET_TYPE_MAP: Record> = { + apiGateway: 'api-gateway', + openApiSchema: 'open-api-schema', + smithyModel: 'smithy-model', + lambdaFunctionArn: 'lambda-function-arn', + mcpServer: 'mcp-server', +}; export const Language = z.enum(['python', 'typescript', 'other']); export const Level = z.enum(['session', 'trace', 'tool_call']); export const Memory = z.enum(['none', 'shortterm', 'longandshortterm']); diff --git a/src/cli/templates/types.ts b/src/cli/templates/types.ts index 185f7b084..907cc99dd 100644 --- a/src/cli/templates/types.ts +++ b/src/cli/templates/types.ts @@ -72,4 +72,6 @@ export interface AgentRenderConfig { sessionStorageMountPath?: string; /** Whether to wrap entrypoint with opentelemetry-instrument. Defaults to true. */ enableOtel?: boolean; + /** Whether a config bundle is wired into the agent template */ + hasConfigBundle?: boolean; } diff --git a/src/cli/tui/App.tsx b/src/cli/tui/App.tsx index 2f5087329..96b5c7f85 100644 --- a/src/cli/tui/App.tsx +++ b/src/cli/tui/App.tsx @@ -4,8 +4,10 @@ import { LayoutProvider } from './context'; import { CLI_ONLY_EXAMPLES } from './copy'; import { setExitAction } from './exit-action'; import { MissingProjectMessage, WrongDirectoryMessage, getProjectRootMismatch, projectExists } from './guards'; +import { ABTestPickerScreen } from './screens/ab-test'; import { AddFlow } from './screens/add/AddFlow'; import { CliOnlyScreen } from './screens/cli-only'; +import { ConfigBundleFlow } from './screens/config-bundle-hub'; import { CreateScreen } from './screens/create'; import { DeployScreen } from './screens/deploy/DeployScreen'; import { EvalHubScreen, EvalScreen } from './screens/eval'; @@ -15,8 +17,9 @@ import { ImportFlow } from './screens/import'; import { InvokeScreen } from './screens/invoke'; import { OnlineEvalDashboard } from './screens/online-eval'; import { PackageScreen } from './screens/package'; +import { RecommendationFlow, RecommendationHistoryScreen, RecommendationsHubScreen } from './screens/recommendation'; import { RemoveFlow } from './screens/remove'; -import { RunEvalFlow, RunScreen } from './screens/run-eval'; +import { BatchEvalHistoryScreen, RunBatchEvalFlow, RunEvalFlow, RunScreen } from './screens/run-eval'; import { StatusScreen } from './screens/status/StatusScreen'; import { UpdateScreen } from './screens/update'; import { ValidateScreen } from './screens/validate'; @@ -38,6 +41,11 @@ type Route = | { name: 'remove' } | { name: 'run' } | { name: 'run-eval'; from?: 'run' | 'evals' } + | { name: 'run-batch-eval'; from?: 'run' | 'evals' } + | { name: 'batch-eval-history' } + | { name: 'recommendations-hub' } + | { name: 'recommend'; from?: 'recommendations-hub' | 'run' } + | { name: 'recommendation-history' } | { name: 'evals' } | { name: 'eval-runs' } | { name: 'online-evals' } @@ -45,7 +53,9 @@ type Route = | { name: 'validate' } | { name: 'package' } | { name: 'update' } + | { name: 'config-bundle' } | { name: 'import' } + | { name: 'ab-test' } | { name: 'cli-only'; commandId: string }; // Commands that don't require being at the project root @@ -111,6 +121,8 @@ function AppContent() { setRoute({ name: 'evals' }); } else if (id === 'fetch') { setRoute({ name: 'fetch-access' }); + } else if (id === 'recommendations') { + setRoute({ name: 'recommendations-hub' }); } else if (id === 'validate') { setRoute({ name: 'validate' }); } else if (id === 'package') { @@ -123,6 +135,10 @@ function AppContent() { setRoute({ name: 'import' }); } else if (id === 'update') { setRoute({ name: 'update' }); + } else if (id === 'config-bundle') { + setRoute({ name: 'config-bundle' }); + } else if (id === 'ab-test') { + setRoute({ name: 'ab-test' }); } }; @@ -213,6 +229,8 @@ function AppContent() { return ( setRoute({ name: 'run-eval', from: 'run' })} + onRunBatchEval={() => setRoute({ name: 'run-batch-eval', from: 'run' })} + onRunRecommendation={() => setRoute({ name: 'recommend', from: 'run' })} onExit={() => setRoute({ name: 'help' })} /> ); @@ -224,6 +242,8 @@ function AppContent() { onSelect={view => { if (view === 'run-eval') setRoute({ name: 'run-eval', from: 'evals' }); if (view === 'runs') setRoute({ name: 'eval-runs' }); + if (view === 'run-batch-eval') setRoute({ name: 'run-batch-eval', from: 'evals' }); + if (view === 'batch-eval-history') setRoute({ name: 'batch-eval-history' }); if (view === 'online-dashboard') setRoute({ name: 'online-evals' }); }} onExit={() => setRoute({ name: 'help' })} @@ -241,6 +261,36 @@ function AppContent() { ); } + if (route.name === 'run-batch-eval') { + const backRoute = route.from ?? 'run'; + return setRoute({ name: backRoute } as Route)} />; + } + + if (route.name === 'batch-eval-history') { + return setRoute({ name: 'evals' })} />; + } + + if (route.name === 'recommendations-hub') { + return ( + { + if (view === 'run-recommendation') setRoute({ name: 'recommend', from: 'recommendations-hub' }); + if (view === 'recommendation-history') setRoute({ name: 'recommendation-history' }); + }} + onExit={() => setRoute({ name: 'help' })} + /> + ); + } + + if (route.name === 'recommend') { + const backRoute = route.from ?? 'recommendations-hub'; + return setRoute({ name: backRoute } as Route)} />; + } + + if (route.name === 'recommendation-history') { + return setRoute({ name: 'recommendations-hub' })} />; + } + if (route.name === 'eval-runs') { return setRoute({ name: 'evals' })} />; } @@ -274,6 +324,14 @@ function AppContent() { return setRoute({ name: 'help' })} />; } + if (route.name === 'config-bundle') { + return setRoute({ name: 'help' })} />; + } + + if (route.name === 'ab-test') { + return setRoute({ name: 'help' })} />; + } + if (route.name === 'cli-only') { const info = CLI_ONLY_EXAMPLES[route.commandId]; if (info) { diff --git a/src/cli/tui/components/DeployStatus.tsx b/src/cli/tui/components/DeployStatus.tsx index 712c9dd95..c6e78b3e4 100644 --- a/src/cli/tui/components/DeployStatus.tsx +++ b/src/cli/tui/components/DeployStatus.tsx @@ -7,6 +7,8 @@ interface DeployStatusProps { messages: DeployMessage[]; isComplete: boolean; hasError: boolean; + hasPostDeployError?: boolean; + postDeployWarnings?: string[]; } const PROGRESS_BAR_WIDTH = 20; @@ -127,7 +129,13 @@ function ResourceLine({ resource }: { resource: ParsedResource }) { * During deployment: shows last N resource events (type + status only) * After completion: shows success/failure state */ -export function DeployStatus({ messages, isComplete, hasError }: DeployStatusProps) { +export function DeployStatus({ + messages, + isComplete, + hasError, + hasPostDeployError, + postDeployWarnings, +}: DeployStatusProps) { // Parse and filter messages to only meaningful resource updates const parsedResources = messages .map(msg => ({ original: msg, parsed: parseResourceMessage(msg) })) @@ -139,16 +147,19 @@ export function DeployStatus({ messages, isComplete, hasError }: DeployStatusPro // When complete, show final status if (isComplete) { + const hasWarning = hasPostDeployError && !hasError; + const borderColor = hasError ? 'red' : hasWarning ? 'yellow' : 'green'; + const textColor = borderColor; + const bannerText = hasError + ? '✗ Deploy to AWS Failed' + : hasWarning + ? '⚠ Deploy to AWS Complete (with warnings)' + : '✓ Deploy to AWS Complete'; + return ( - - - {hasError ? '✗ Deploy to AWS Failed' : '✓ Deploy to AWS Complete'} + + + {bannerText} {progress && ( @@ -162,6 +173,15 @@ export function DeployStatus({ messages, isComplete, hasError }: DeployStatusPro ))} )} + {hasWarning && postDeployWarnings && postDeployWarnings.length > 0 && ( + + {postDeployWarnings.map((w, i) => ( + + {w} + + ))} + + )} ); } diff --git a/src/cli/tui/components/MultiSelectList.tsx b/src/cli/tui/components/MultiSelectList.tsx index 74f6ef2d5..1f2994f22 100644 --- a/src/cli/tui/components/MultiSelectList.tsx +++ b/src/cli/tui/components/MultiSelectList.tsx @@ -6,6 +6,8 @@ export interface MultiSelectListProps { selectedIndex: number; selectedIds: Set; emptyMessage?: string; + /** Maximum number of visible items before scrolling. Undefined = show all. */ + maxVisibleItems?: number; } export function MultiSelectList(props: MultiSelectListProps) { @@ -18,11 +20,30 @@ export function MultiSelectList(props: MultiSelectList ); } + const { items, selectedIndex, selectedIds, maxVisibleItems } = props; + const needsScroll = maxVisibleItems !== undefined && items.length > maxVisibleItems; + + let visibleItems = items; + let viewportStart = 0; + let viewportEnd = items.length; + + if (needsScroll) { + const halfVisible = Math.floor(maxVisibleItems / 2); + viewportStart = Math.max(0, selectedIndex - halfVisible); + viewportEnd = Math.min(items.length, viewportStart + maxVisibleItems); + if (viewportEnd - viewportStart < maxVisibleItems) { + viewportStart = Math.max(0, viewportEnd - maxVisibleItems); + } + visibleItems = items.slice(viewportStart, viewportEnd); + } + return ( - {props.items.map((item, idx) => { - const isCursor = idx === props.selectedIndex; - const isChecked = props.selectedIds.has(item.id); + {needsScroll && viewportStart > 0 && ↑ {viewportStart} more} + {visibleItems.map((item, idx) => { + const actualIndex = viewportStart + idx; + const isCursor = actualIndex === selectedIndex; + const isChecked = selectedIds.has(item.id); const checkbox = isChecked ? '[✓]' : '[ ]'; return ( @@ -35,6 +56,7 @@ export function MultiSelectList(props: MultiSelectList ); })} + {needsScroll && viewportEnd < items.length && ↓ {items.length - viewportEnd} more} ); } diff --git a/src/cli/tui/components/PathInput.tsx b/src/cli/tui/components/PathInput.tsx index ebb16f956..f2663cea4 100644 --- a/src/cli/tui/components/PathInput.tsx +++ b/src/cli/tui/components/PathInput.tsx @@ -209,8 +209,30 @@ export function PathInput({ return; } - // Enter: Validate and submit the current path + // Enter: If a dropdown item is highlighted, select it first; then validate and submit if (key.return) { + // If there's a highlighted match, auto-select it + if (matches.length > 0 && matches[clampedIndex]) { + const selected = matches[clampedIndex]; + if (selected.isDirectory) { + // Drill into directory + setValue(selected.value); + setCursor(selected.value.length); + setSelectedIndex(0); + return; + } + // It's a file — select and submit it + const validationError = allowCreate + ? validatePathForCreate(selected.value, basePath) + : validatePath(selected.value, basePath, pathType); + if (validationError) { + setError(validationError); + return; + } + onSubmit(selected.value); + return; + } + const trimmed = value.trim(); if (!trimmed) { if (allowEmpty) { diff --git a/src/cli/tui/components/ResourceGraph.tsx b/src/cli/tui/components/ResourceGraph.tsx index 2c1e3f188..922d63344 100644 --- a/src/cli/tui/components/ResourceGraph.tsx +++ b/src/cli/tui/components/ResourceGraph.tsx @@ -21,6 +21,8 @@ const ICONS = { 'online-eval': '↻', 'policy-engine': '▣', policy: '▢', + 'config-bundle': '⬡', + 'ab-test': '⚗', 'runtime-endpoint': '◉', } as const; @@ -104,7 +106,7 @@ function ResourceRow({ )} {invocationUrl && ( - {' '}URL: {invocationUrl} + {' '}Invocation URL: {invocationUrl} )} @@ -131,6 +133,8 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res const mcpRuntimeTools = mcp?.mcpRuntimeTools ?? []; const unassignedTargets = mcp?.unassignedTargets ?? []; const policyEngines = project.policyEngines ?? []; + const configBundles = project.configBundles ?? []; + const abTests = project.abTests ?? []; // Build lookup map and collect pending-removal resources in a single pass const { statusMap, pendingRemovals } = useMemo(() => { @@ -333,6 +337,49 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res )} + {/* Configuration Bundles */} + {configBundles.length > 0 && ( + + Configuration Bundles + {configBundles.map(bundle => { + const rsEntry = statusMap.get(`config-bundle:${bundle.name}`); + return ( + + ); + })} + + )} + + {/* AB Tests */} + {abTests.length > 0 && ( + + AB Tests + {abTests.map(test => { + const rsEntry = statusMap.get(`ab-test:${test.name}`); + return ( + + ); + })} + + )} + {/* Removed locally — still deployed in AWS, will be torn down on next deploy */} {pendingRemovals.length > 0 && ( @@ -464,7 +511,9 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res {ICONS.evaluator} evaluator{' '} {ICONS['online-eval']} online-eval{' '} {ICONS.gateway} gateway{' '} - {ICONS['policy-engine']} policy engine + {ICONS['policy-engine']} policy engine{' '} + {ICONS['config-bundle']} config bundle{' '} + {ICONS['ab-test']} ab test diff --git a/src/cli/tui/components/SelectList.tsx b/src/cli/tui/components/SelectList.tsx index 6163c102a..feea63248 100644 --- a/src/cli/tui/components/SelectList.tsx +++ b/src/cli/tui/components/SelectList.tsx @@ -13,6 +13,8 @@ export function SelectList(props: { items: T[]; selectedIndex: number; emptyMessage?: string; + /** Maximum number of visible items before scrolling. Undefined = show all. */ + maxVisibleItems?: number; }) { if (props.items.length === 0) { return ( @@ -24,10 +26,29 @@ export function SelectList(props: { ); } + const { items, selectedIndex, maxVisibleItems } = props; + const needsScroll = maxVisibleItems !== undefined && items.length > maxVisibleItems; + + let visibleItems = items; + let viewportStart = 0; + let viewportEnd = items.length; + + if (needsScroll) { + const halfVisible = Math.floor(maxVisibleItems / 2); + viewportStart = Math.max(0, selectedIndex - halfVisible); + viewportEnd = Math.min(items.length, viewportStart + maxVisibleItems); + if (viewportEnd - viewportStart < maxVisibleItems) { + viewportStart = Math.max(0, viewportEnd - maxVisibleItems); + } + visibleItems = items.slice(viewportStart, viewportEnd); + } + return ( - {props.items.map((item, idx) => { - const selected = idx === props.selectedIndex; + {needsScroll && viewportStart > 0 && ↑ {viewportStart} more} + {visibleItems.map((item, idx) => { + const actualIndex = viewportStart + idx; + const selected = actualIndex === selectedIndex; const disabled = item.disabled ?? false; return ( @@ -43,6 +64,7 @@ export function SelectList(props: { ); })} + {needsScroll && viewportEnd < items.length && ↓ {items.length - viewportEnd} more} ); } diff --git a/src/cli/tui/components/WizardSelect.tsx b/src/cli/tui/components/WizardSelect.tsx index bd4343813..184720398 100644 --- a/src/cli/tui/components/WizardSelect.tsx +++ b/src/cli/tui/components/WizardSelect.tsx @@ -16,6 +16,8 @@ interface WizardSelectBaseProps { interface WizardSelectProps extends WizardSelectBaseProps { /** Current selected index */ selectedIndex: number; + /** Maximum visible items before scrolling. Undefined = show all. */ + maxVisibleItems?: number; } interface WizardMultiSelectProps extends WizardSelectBaseProps { @@ -23,6 +25,8 @@ interface WizardMultiSelectProps extends WizardSelectBaseProps { cursorIndex: number; /** Currently selected item IDs */ selectedIds: Set; + /** Maximum visible items before scrolling. Undefined = show all. */ + maxVisibleItems?: number; } /** @@ -39,13 +43,25 @@ interface WizardMultiSelectProps extends WizardSelectBaseProps { * /> * ``` */ -export function WizardSelect({ title, description, items, selectedIndex, emptyMessage }: WizardSelectProps) { +export function WizardSelect({ + title, + description, + items, + selectedIndex, + emptyMessage, + maxVisibleItems, +}: WizardSelectProps) { return ( {title} {description && {description}} - + ); @@ -73,6 +89,7 @@ export function WizardMultiSelect({ cursorIndex, selectedIds, emptyMessage, + maxVisibleItems, }: WizardMultiSelectProps) { return ( @@ -84,6 +101,7 @@ export function WizardMultiSelect({ selectedIndex={cursorIndex} selectedIds={selectedIds} emptyMessage={emptyMessage} + maxVisibleItems={maxVisibleItems} /> diff --git a/src/cli/tui/components/__tests__/DeployStatus.test.tsx b/src/cli/tui/components/__tests__/DeployStatus.test.tsx index f13ad796a..fedca8e1a 100644 --- a/src/cli/tui/components/__tests__/DeployStatus.test.tsx +++ b/src/cli/tui/components/__tests__/DeployStatus.test.tsx @@ -155,6 +155,55 @@ describe('DeployStatus', () => { }); }); + describe('warning state (post-deploy errors)', () => { + it('shows warning banner when hasPostDeployError is true', () => { + const { lastFrame } = render( + + ); + const frame = lastFrame()!; + + expect(frame).toContain('⚠'); + expect(frame).toContain('Deploy to AWS Complete (with warnings)'); + }); + + it('shows post-deploy warnings in the banner', () => { + const warnings = ['Config bundle "my-bundle": timeout', 'AB test "test-1": not found']; + const { lastFrame } = render( + + ); + const frame = lastFrame()!; + + expect(frame).toContain('Config bundle "my-bundle": timeout'); + expect(frame).toContain('AB test "test-1": not found'); + }); + + it('warning state takes precedence over complete state', () => { + const { lastFrame } = render( + + ); + const frame = lastFrame()!; + + expect(frame).not.toContain('✓ Deploy to AWS Complete'); + expect(frame).toContain('⚠ Deploy to AWS Complete (with warnings)'); + }); + + it('error state takes precedence over warning state', () => { + const { lastFrame } = render( + + ); + const frame = lastFrame()!; + + expect(frame).toContain('✗ Deploy to AWS Failed'); + expect(frame).not.toContain('with warnings'); + }); + }); + describe('error state details', () => { it('shows last 3 resource events on failure', () => { const messages = [ diff --git a/src/cli/tui/constants.ts b/src/cli/tui/constants.ts index 74762fb91..98ff3841c 100644 --- a/src/cli/tui/constants.ts +++ b/src/cli/tui/constants.ts @@ -36,6 +36,8 @@ export const HELP_TEXT = { STATUS_REFRESH: '↑↓ select · Enter refresh · Esc back · Ctrl+C quit', /** Status screen refresh with target cycling */ STATUS_TARGET_CYCLE: '↑↓ select · Enter refresh · T target · Esc back · Ctrl+C quit', + /** Variant config form */ + VARIANTS_FORM: 'Enter to select · Esc back', } as const; /** diff --git a/src/cli/tui/copy.ts b/src/cli/tui/copy.ts index 2aef3c42c..5ed1bff33 100644 --- a/src/cli/tui/copy.ts +++ b/src/cli/tui/copy.ts @@ -41,15 +41,19 @@ export const COMMAND_DESCRIPTIONS = { remove: 'Remove resources from project config.', status: 'Show deployed resource details and status.', traces: 'View and download agent traces.', - evals: 'View past eval run results.', + evals: 'View saved eval and batch eval results from past runs.', fetch: 'Fetch access info for deployed resources.', - pause: 'Pause an online eval config. Supports --arn for configs outside the project.', - resume: 'Resume a paused online eval config. Supports --arn for configs outside the project.', - run: 'Run on-demand evaluation.', + pause: 'Pause a deployed resource (online eval config, A/B test).', + resume: 'Resume a paused resource (online eval config, A/B test).', + recommend: '[preview] Run optimization recommendations for system prompts and tool descriptions.', + recommendations: '[preview] View recommendation history from past runs.', + run: 'Run evaluations, batch evaluations, or optimization recommendations.', + stop: 'Stop a running batch evaluation or A/B test.', import: 'Import a runtime, memory, or starter toolkit into this project. [experimental]', telemetry: 'Manage anonymous usage analytics preferences.', update: 'Check for and install CLI updates', validate: 'Validate agentcore/ config files.', + 'config-bundle': '[preview] Manage configuration bundle versions and diffs.', } as const; /** @@ -62,7 +66,7 @@ export const CLI_ONLY_EXAMPLES: Record', 'agentcore resume online-eval --arn '], }, + 'run eval': { + description: 'Run on-demand evaluation of runtime traces against one or more evaluators.', + examples: [ + 'agentcore run eval -r MyAgent -e Builtin.Correctness', + 'agentcore run eval -r MyAgent -e Builtin.Faithfulness --lookback 14', + 'agentcore run eval -r MyAgent -e Builtin.Correctness -A "Must mention pricing" --expected-response "The price is $10"', + 'agentcore run eval --runtime-arn --evaluator-arn --region us-east-1', + ], + }, + 'run batch-evaluation': { + description: 'Run evaluators in batch across all agent sessions found in CloudWatch.', + examples: [ + 'agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness', + 'agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness Builtin.Faithfulness --json', + 'agentcore run batch-evaluation -r MyAgent -e Builtin.Completeness -n "weekly-check"', + ], + }, + 'run recommendation': { + description: 'Optimize system prompts or tool descriptions using agent traces.', + examples: [ + 'agentcore run recommendation -t system-prompt -r MyAgent -e Builtin.Correctness --inline "You are a helpful assistant"', + 'agentcore run recommendation -t system-prompt -r MyAgent -e Builtin.Correctness --prompt-file ./prompt.txt', + 'agentcore run recommendation -t tool-description -r MyAgent --tools "search:Searches the web,calc:Does math"', + 'agentcore run recommendation -t system-prompt -r MyAgent -e Builtin.Correctness --bundle-name MyBundle', + ], + }, + stop: { + description: 'Stop a running batch evaluation or A/B test.', + examples: [ + 'agentcore stop batch-evaluation -i ', + 'agentcore stop batch-evaluation -i --json', + 'agentcore stop ab-test ', + ], + }, }; diff --git a/src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx b/src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx new file mode 100644 index 000000000..89182b2e5 --- /dev/null +++ b/src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx @@ -0,0 +1,347 @@ +import { usePanelNavigation } from '../usePanelNavigation.js'; +import { Text } from 'ink'; +import { render } from 'ink-testing-library'; +import React from 'react'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const UP_ARROW = '\x1B[A'; +const DOWN_ARROW = '\x1B[B'; +const ENTER = '\r'; +const ESCAPE = '\x1B'; +const TAB = '\t'; + +afterEach(() => vi.restoreAllMocks()); + +// Wrapper component to test the hook via rendering +function PanelNav({ + isActive = true, + fieldCount = 3, + onExit = vi.fn(), + isFieldDisabled, + isFieldAutoCompleted, + onComplete, + onResult, +}: { + isActive?: boolean; + fieldCount?: number; + onExit?: () => void; + isFieldDisabled?: (column: number, field: number) => boolean; + isFieldAutoCompleted?: (column: number, field: number) => boolean; + onComplete?: () => void; + onResult?: (result: ReturnType) => void; +}) { + const result = usePanelNavigation({ + isActive, + fieldCount, + onExit, + isFieldDisabled, + isFieldAutoCompleted, + onComplete, + }); + + onResult?.(result); + + return ( + + col:{result.position.column} field:{result.position.field} layer:{result.position.layer} + + ); +} + +const delay = (ms = 50) => new Promise(resolve => setTimeout(resolve, ms)); + +describe('usePanelNavigation', () => { + it('starts at column 0, field 0, layer focus', () => { + const { lastFrame } = render(); + expect(lastFrame()).toContain('col:0'); + expect(lastFrame()).toContain('field:0'); + expect(lastFrame()).toContain('layer:focus'); + }); + + describe('Tab switches columns', () => { + it('Tab switches from column 0 to column 1', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(TAB); + await delay(); + + expect(lastFrame()).toContain('col:1'); + }); + + it('Tab switches from column 1 back to column 0', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(TAB); // 0 → 1 + await delay(); + stdin.write(TAB); // 1 → 0 + await delay(); + + expect(lastFrame()).toContain('col:0'); + }); + }); + + describe('Up/Down moves between fields', () => { + it('Down moves to next field', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(DOWN_ARROW); + await delay(); + + expect(lastFrame()).toContain('field:1'); + }); + + it('Up moves to previous field', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(DOWN_ARROW); + stdin.write(DOWN_ARROW); + await delay(); + expect(lastFrame()).toContain('field:2'); + + stdin.write(UP_ARROW); + await delay(); + expect(lastFrame()).toContain('field:1'); + }); + }); + + it('Up at field 0 stays at field 0', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(UP_ARROW); + await delay(); + + expect(lastFrame()).toContain('field:0'); + }); + + it('Down at last field stays at last field', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(DOWN_ARROW); + stdin.write(DOWN_ARROW); // field 2 (last) + await delay(); + expect(lastFrame()).toContain('field:2'); + + stdin.write(DOWN_ARROW); // should stay + await delay(); + expect(lastFrame()).toContain('field:2'); + }); + + it('Enter activates field (layer → active)', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(ENTER); + await delay(); + + expect(lastFrame()).toContain('layer:active'); + }); + + describe('Escape navigation', () => { + it('Escape at field 0 column 0 calls onExit', async () => { + const onExit = vi.fn(); + const { stdin } = render(); + + await delay(); + stdin.write(ESCAPE); + await delay(); + + expect(onExit).toHaveBeenCalledTimes(1); + }); + + it('Escape at field > 0 goes to field 0', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(DOWN_ARROW); + stdin.write(DOWN_ARROW); + await delay(); + expect(lastFrame()).toContain('field:2'); + + stdin.write(ESCAPE); + await delay(); + expect(lastFrame()).toContain('field:0'); + }); + + it('Escape at column 1 field 0 goes to column 0', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(TAB); // go to column 1 + await delay(); + expect(lastFrame()).toContain('col:1'); + + stdin.write(ESCAPE); + await delay(); + expect(lastFrame()).toContain('col:0'); + expect(lastFrame()).toContain('field:0'); + }); + }); + + describe('deactivate auto-advance', () => { + it('deactivate auto-advances to next field in same column', async () => { + const onResult = vi.fn(); + const { stdin } = render(); + + await delay(); + stdin.write(ENTER); // activate field 0 + await delay(); + + const result = onResult.mock.calls[onResult.mock.calls.length - 1]![0]; + expect(result.position.layer).toBe('active'); + }); + }); + + describe('deactivate behavior', () => { + // Harness that auto-deactivates when activated to test the deactivate advance path + function AutoDeactivateHarness({ fieldCount = 3, onComplete }: { fieldCount?: number; onComplete?: () => void }) { + const nav = usePanelNavigation({ + isActive: true, + fieldCount, + onExit: vi.fn(), + onComplete, + }); + + // When activated, immediately deactivate on next render + React.useEffect(() => { + if (nav.position.layer === 'active') { + nav.deactivate(); + } + }, [nav.position.layer, nav.position.column, nav.position.field, nav.deactivate]); + + return ( + + col:{nav.position.column} field:{nav.position.field} layer:{nav.position.layer} + + ); + } + + it('deactivate at field 0 advances to field 1 in same column', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(ENTER); // activate field 0 → auto-deactivate → field 1 + await delay(); + + expect(lastFrame()).toContain('field:1'); + expect(lastFrame()).toContain('col:0'); + expect(lastFrame()).toContain('layer:focus'); + }); + + it('deactivate at last field of column 0 moves to column 1 field 0', async () => { + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(ENTER); // activate field 0 (last in col 0) → auto-deactivate → col 1 field 0 + await delay(); + + expect(lastFrame()).toContain('col:1'); + expect(lastFrame()).toContain('field:0'); + }); + + it('deactivate at last field of column 1 calls onComplete', async () => { + const onComplete = vi.fn(); + const { lastFrame, stdin } = render(); + + await delay(); + // Move to column 1 first + stdin.write(ENTER); // col 0 field 0 → deactivate → col 1 field 0 + await delay(); + + expect(lastFrame()).toContain('col:1'); + expect(lastFrame()).toContain('field:0'); + + stdin.write(ENTER); // col 1 field 0 (last) → deactivate → onComplete + await delay(100); + + expect(onComplete).toHaveBeenCalled(); + }); + }); + + describe('isFieldFocused/isFieldActive/isColumnActive', () => { + it('isFieldFocused returns true for current position in focus layer', () => { + let resultRef: ReturnType | undefined; + render( + { + resultRef = r; + }} + /> + ); + + expect(resultRef!.isFieldFocused(0, 0)).toBe(true); + expect(resultRef!.isFieldFocused(0, 1)).toBe(false); + expect(resultRef!.isFieldFocused(1, 0)).toBe(false); + }); + + it('isFieldActive returns false in focus layer', () => { + let resultRef: ReturnType | undefined; + render( + { + resultRef = r; + }} + /> + ); + + expect(resultRef!.isFieldActive(0, 0)).toBe(false); + }); + + it('isColumnActive returns true for current column', () => { + let resultRef: ReturnType | undefined; + render( + { + resultRef = r; + }} + /> + ); + + expect(resultRef!.isColumnActive(0)).toBe(true); + expect(resultRef!.isColumnActive(1)).toBe(false); + }); + }); + + describe('disabled fields are skipped', () => { + it('Down skips disabled field', async () => { + const isFieldDisabled = (_col: number, field: number) => field === 1; + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(DOWN_ARROW); // should skip field 1 and land on field 2 + await delay(); + + expect(lastFrame()).toContain('field:2'); + }); + + it('Up skips disabled field', async () => { + const isFieldDisabled = (_col: number, field: number) => field === 1; + const { lastFrame, stdin } = render(); + + await delay(); + stdin.write(DOWN_ARROW); // skip 1 → field 2 + await delay(); + expect(lastFrame()).toContain('field:2'); + + stdin.write(UP_ARROW); // skip 1 → field 0 + await delay(); + expect(lastFrame()).toContain('field:0'); + }); + + it('stays in place when all remaining fields are disabled', async () => { + const { lastFrame, stdin } = render( f === 1} />); + + await delay(); + // field 0, only field 1 exists and is disabled → stay at 0 + stdin.write(DOWN_ARROW); + await delay(); + + expect(lastFrame()).toContain('field:0'); + }); + }); +}); diff --git a/src/cli/tui/hooks/useCreateABTest.ts b/src/cli/tui/hooks/useCreateABTest.ts new file mode 100644 index 000000000..e54666074 --- /dev/null +++ b/src/cli/tui/hooks/useCreateABTest.ts @@ -0,0 +1,93 @@ +import type { AddTargetBasedABTestOptions } from '../../primitives/ABTestPrimitive'; +import { abTestPrimitive } from '../../primitives/registry'; +import type { GatewayChoice } from '../screens/ab-test/types'; +import { useCallback, useEffect, useState } from 'react'; + +interface CreateABTestConfig { + name: string; + description?: string; + agent: string; + gatewayChoice?: GatewayChoice; + controlBundle: string; + controlVersion: string; + treatmentBundle: string; + treatmentVersion: string; + controlWeight: number; + treatmentWeight: number; + onlineEval: string; + maxDuration?: number; + enableOnCreate?: boolean; +} + +export function useCreateABTest() { + const [status, setStatus] = useState<{ state: 'idle' | 'loading' | 'success' | 'error'; error?: string }>({ + state: 'idle', + }); + + const create = useCallback(async (config: CreateABTestConfig) => { + setStatus({ state: 'loading' }); + try { + const addResult = await abTestPrimitive.add({ + name: config.name, + description: config.description, + agent: config.agent, + gatewayChoice: config.gatewayChoice, + controlBundle: config.controlBundle, + controlVersion: config.controlVersion, + treatmentBundle: config.treatmentBundle, + treatmentVersion: config.treatmentVersion, + controlWeight: config.controlWeight, + treatmentWeight: config.treatmentWeight, + onlineEval: config.onlineEval, + maxDurationDays: config.maxDuration, + enableOnCreate: config.enableOnCreate, + }); + if (!addResult.success) { + throw new Error(addResult.error ?? 'Failed to create AB test'); + } + setStatus({ state: 'success' }); + return { ok: true as const, testName: config.name }; + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to create AB test.'; + setStatus({ state: 'error', error: message }); + return { ok: false as const, error: message }; + } + }, []); + + const createTargetBased = useCallback(async (config: Omit) => { + setStatus({ state: 'loading' }); + try { + const addResult = await abTestPrimitive.addTargetBased(config); + if (!addResult.success) { + throw new Error(addResult.error ?? 'Failed to create target-based AB test'); + } + setStatus({ state: 'success' }); + return { ok: true as const, testName: config.name }; + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to create target-based AB test.'; + setStatus({ state: 'error', error: message }); + return { ok: false as const, error: message }; + } + }, []); + + const reset = useCallback(() => { + setStatus({ state: 'idle' }); + }, []); + + return { status, createABTest: create, createTargetBasedABTest: createTargetBased, reset }; +} + +export function useExistingABTestNames() { + const [names, setNames] = useState([]); + + useEffect(() => { + void abTestPrimitive.getAllNames().then(setNames); + }, []); + + const refresh = useCallback(async () => { + const result = await abTestPrimitive.getAllNames(); + setNames(result); + }, []); + + return { names, refresh }; +} diff --git a/src/cli/tui/hooks/useCreateConfigBundle.ts b/src/cli/tui/hooks/useCreateConfigBundle.ts new file mode 100644 index 000000000..864501eed --- /dev/null +++ b/src/cli/tui/hooks/useCreateConfigBundle.ts @@ -0,0 +1,59 @@ +import { configBundlePrimitive } from '../../primitives/registry'; +import { useCallback, useEffect, useState } from 'react'; + +interface CreateConfigBundleConfig { + name: string; + description?: string; + components: Record }>; + branchName?: string; + commitMessage?: string; +} + +export function useCreateConfigBundle() { + const [status, setStatus] = useState<{ state: 'idle' | 'loading' | 'success' | 'error'; error?: string }>({ + state: 'idle', + }); + + const create = useCallback(async (config: CreateConfigBundleConfig) => { + setStatus({ state: 'loading' }); + try { + const addResult = await configBundlePrimitive.add({ + name: config.name, + description: config.description, + components: config.components, + branchName: config.branchName, + commitMessage: config.commitMessage, + }); + if (!addResult.success) { + throw new Error(addResult.error ?? 'Failed to create configuration bundle'); + } + setStatus({ state: 'success' }); + return { ok: true as const, bundleName: config.name }; + } catch (err) { + const message = err instanceof Error ? err.message : 'Failed to create configuration bundle.'; + setStatus({ state: 'error', error: message }); + return { ok: false as const, error: message }; + } + }, []); + + const reset = useCallback(() => { + setStatus({ state: 'idle' }); + }, []); + + return { status, createConfigBundle: create, reset }; +} + +export function useExistingConfigBundleNames() { + const [names, setNames] = useState([]); + + useEffect(() => { + void configBundlePrimitive.getAllNames().then(setNames); + }, []); + + const refresh = useCallback(async () => { + const result = await configBundlePrimitive.getAllNames(); + setNames(result); + }, []); + + return { names, refresh }; +} diff --git a/src/cli/tui/hooks/useCreateEvaluator.ts b/src/cli/tui/hooks/useCreateEvaluator.ts index 6e1d8f052..f1cad666f 100644 --- a/src/cli/tui/hooks/useCreateEvaluator.ts +++ b/src/cli/tui/hooks/useCreateEvaluator.ts @@ -1,5 +1,7 @@ import type { EvaluatorConfig } from '../../../schema'; import { evaluatorPrimitive } from '../../primitives/registry'; +import { withAddTelemetry } from '../../telemetry/cli-command-run.js'; +import { Level, standardize } from '../../telemetry/schemas/common-shapes.js'; import { useCallback, useEffect, useState } from 'react'; interface CreateEvaluatorConfig { @@ -16,11 +18,19 @@ export function useCreateEvaluator() { const create = useCallback(async (config: CreateEvaluatorConfig) => { setStatus({ state: 'loading' }); try { - const addResult = await evaluatorPrimitive.add({ - name: config.name, - level: config.level as 'SESSION' | 'TRACE' | 'TOOL_CALL', - config: config.config, - }); + const addResult = await withAddTelemetry( + 'add.evaluator', + { + evaluator_type: config.config.codeBased ? 'code-based' : 'llm-as-a-judge', + level: standardize(Level, config.level), + }, + () => + evaluatorPrimitive.add({ + name: config.name, + level: config.level as 'SESSION' | 'TRACE' | 'TOOL_CALL', + config: config.config, + }) + ); if (!addResult.success) { throw new Error(addResult.error ?? 'Failed to create evaluator'); } diff --git a/src/cli/tui/hooks/useCreateMcp.ts b/src/cli/tui/hooks/useCreateMcp.ts index 2b3b3b25a..ec91666d0 100644 --- a/src/cli/tui/hooks/useCreateMcp.ts +++ b/src/cli/tui/hooks/useCreateMcp.ts @@ -4,6 +4,8 @@ import { gatewayTargetPrimitive, policyEnginePrimitive, } from '../../primitives/registry'; +import { withAddTelemetry } from '../../telemetry/cli-command-run.js'; +import { AuthorizerType, PolicyEngineMode, standardize } from '../../telemetry/schemas/common-shapes.js'; import type { AddGatewayConfig } from '../screens/mcp/types'; import { useCallback, useEffect, useState } from 'react'; @@ -23,22 +25,33 @@ export function useCreateGateway() { const createGateway = useCallback(async (config: AddGatewayConfig) => { setStatus({ state: 'loading' }); try { - const addResult = await gatewayPrimitive.add({ - name: config.name, - description: config.description, - authorizerType: config.authorizerType, - discoveryUrl: config.jwtConfig?.discoveryUrl, - allowedAudience: config.jwtConfig?.allowedAudience?.join(','), - allowedClients: config.jwtConfig?.allowedClients?.join(','), - allowedScopes: config.jwtConfig?.allowedScopes?.join(','), - customClaims: config.jwtConfig?.customClaims, - clientId: config.jwtConfig?.clientId, - clientSecret: config.jwtConfig?.clientSecret, - enableSemanticSearch: config.enableSemanticSearch, - exceptionLevel: config.exceptionLevel, - policyEngine: config.policyEngineConfiguration?.policyEngineName, - policyEngineMode: config.policyEngineConfiguration?.mode, - }); + const addResult = await withAddTelemetry( + 'add.gateway', + { + authorizer_type: standardize(AuthorizerType, config.authorizerType ?? 'NONE'), + has_policy_engine: !!config.policyEngineConfiguration?.policyEngineName, + policy_engine_mode: standardize(PolicyEngineMode, config.policyEngineConfiguration?.mode ?? 'log_only'), + semantic_search: config.enableSemanticSearch !== false, + runtime_count: 0, + }, + () => + gatewayPrimitive.add({ + name: config.name, + description: config.description, + authorizerType: config.authorizerType, + discoveryUrl: config.jwtConfig?.discoveryUrl, + allowedAudience: config.jwtConfig?.allowedAudience?.join(','), + allowedClients: config.jwtConfig?.allowedClients?.join(','), + allowedScopes: config.jwtConfig?.allowedScopes?.join(','), + customClaims: config.jwtConfig?.customClaims, + clientId: config.jwtConfig?.clientId, + clientSecret: config.jwtConfig?.clientSecret, + enableSemanticSearch: config.enableSemanticSearch, + exceptionLevel: config.exceptionLevel, + policyEngine: config.policyEngineConfiguration?.policyEngineName, + policyEngineMode: config.policyEngineConfiguration?.mode, + }) + ); if (!addResult.success) { throw new Error(addResult.error ?? 'Failed to create gateway'); } diff --git a/src/cli/tui/hooks/useCreateMemory.ts b/src/cli/tui/hooks/useCreateMemory.ts index 4345b4ead..d4196582f 100644 --- a/src/cli/tui/hooks/useCreateMemory.ts +++ b/src/cli/tui/hooks/useCreateMemory.ts @@ -2,6 +2,7 @@ import { ConfigIO } from '../../../lib'; import type { Memory } from '../../../schema'; import { getAvailableAgents } from '../../operations/attach'; import { memoryPrimitive } from '../../primitives/registry'; +import { withAddTelemetry } from '../../telemetry/cli-command-run.js'; import { useCallback, useEffect, useState } from 'react'; interface CreateMemoryConfig { @@ -24,13 +25,25 @@ export function useCreateMemory() { setStatus({ state: 'loading' }); try { const strategiesStr = config.strategies.map(s => s.type).join(','); - const addResult = await memoryPrimitive.add({ - name: config.name, - expiry: config.eventExpiryDuration, - strategies: strategiesStr || undefined, - dataStreamArn: config.streaming?.dataStreamArn, - contentLevel: config.streaming?.contentLevel, - }); + const strategyList = strategiesStr ? strategiesStr.split(',').map(s => s.trim().toUpperCase()) : []; + const addResult = await withAddTelemetry( + 'add.memory', + { + strategy_count: strategyList.length, + strategy_semantic: strategyList.includes('SEMANTIC'), + strategy_summarization: strategyList.includes('SUMMARIZATION'), + strategy_user_preference: strategyList.includes('USER_PREFERENCE'), + strategy_episodic: strategyList.includes('EPISODIC'), + }, + () => + memoryPrimitive.add({ + name: config.name, + expiry: config.eventExpiryDuration, + strategies: strategiesStr || undefined, + dataStreamArn: config.streaming?.dataStreamArn, + contentLevel: config.streaming?.contentLevel, + }) + ); if (!addResult.success) { throw new Error(addResult.error ?? 'Failed to create memory'); } diff --git a/src/cli/tui/hooks/useCreateOnlineEval.ts b/src/cli/tui/hooks/useCreateOnlineEval.ts index 2d0190552..b853fed05 100644 --- a/src/cli/tui/hooks/useCreateOnlineEval.ts +++ b/src/cli/tui/hooks/useCreateOnlineEval.ts @@ -1,11 +1,14 @@ import { onlineEvalConfigPrimitive } from '../../primitives/registry'; +import { withAddTelemetry } from '../../telemetry/cli-command-run.js'; import { useCallback, useEffect, useState } from 'react'; interface CreateOnlineEvalConfig { name: string; agent: string; + endpoint?: string; evaluators: string[]; samplingRate: number; + sessionTimeoutMinutes?: number; enableOnCreate: boolean; } @@ -17,13 +20,23 @@ export function useCreateOnlineEval() { const create = useCallback(async (config: CreateOnlineEvalConfig) => { setStatus({ state: 'loading' }); try { - const addResult = await onlineEvalConfigPrimitive.add({ - name: config.name, - agent: config.agent, - evaluators: config.evaluators, - samplingRate: config.samplingRate, - enableOnCreate: config.enableOnCreate, - }); + const addResult = await withAddTelemetry( + 'add.online-eval', + { + evaluator_count: config.evaluators.length, + enable_on_create: config.enableOnCreate ?? false, + }, + () => + onlineEvalConfigPrimitive.add({ + name: config.name, + agent: config.agent, + ...(config.endpoint ? { endpoint: config.endpoint } : {}), + evaluators: config.evaluators, + samplingRate: config.samplingRate, + ...(config.sessionTimeoutMinutes !== undefined && { sessionTimeoutMinutes: config.sessionTimeoutMinutes }), + enableOnCreate: config.enableOnCreate, + }) + ); if (!addResult.success) { throw new Error(addResult.error ?? 'Failed to create online eval config'); } diff --git a/src/cli/tui/hooks/usePanelNavigation.ts b/src/cli/tui/hooks/usePanelNavigation.ts new file mode 100644 index 000000000..1e06157ac --- /dev/null +++ b/src/cli/tui/hooks/usePanelNavigation.ts @@ -0,0 +1,196 @@ +import { useInput } from 'ink'; +import { useCallback, useState } from 'react'; + +export interface PanelPosition { + column: 0 | 1; + field: number; + layer: 'focus' | 'active'; +} + +interface UsePanelNavigationOptions { + /** Only capture input when the builder step is active */ + isActive: boolean; + /** Number of fields per column */ + fieldCount: number; + /** Called when Escape is pressed at the top-left origin */ + onExit: () => void; + /** Optional check whether a field is disabled (non-focusable) */ + isFieldDisabled?: (column: number, field: number) => boolean; + /** Optional check whether a field is auto-completed (skip on navigation) */ + isFieldAutoCompleted?: (column: number, field: number) => boolean; + /** Called when the last field in the last column is completed */ + onComplete?: () => void; +} + +interface UsePanelNavigationResult { + position: PanelPosition; + /** Whether the given field is the currently focused field */ + isFieldFocused: (column: number, field: number) => boolean; + /** Whether the given field has its picker/input open */ + isFieldActive: (column: number, field: number) => boolean; + /** Whether the given column is the active column */ + isColumnActive: (column: number) => boolean; + /** Open the picker/input for the currently focused field */ + activate: () => void; + /** Close the picker/input, returning to field focus */ + deactivate: () => void; + /** Move focus to a specific field */ + moveToField: (column: number, field: number) => void; +} + +/** + * 2D focus management hook for a side-by-side panel builder. + * + * Navigation model: + * - Tab switches columns (0 <-> 1) + * - Up/Down moves between fields within the active column + * - Enter activates the focused field (layer -> 'active') + * - Escape deactivates or navigates back + * + * When layer === 'active', the hook yields input to child components + * by setting its own `useInput` to inactive. + */ +export function usePanelNavigation({ + isActive, + fieldCount, + onExit, + isFieldDisabled, + isFieldAutoCompleted: _isFieldAutoCompleted, + onComplete, +}: UsePanelNavigationOptions): UsePanelNavigationResult { + const [position, setPosition] = useState({ + column: 0, + field: 0, + layer: 'focus', + }); + + // Only handle input when at focus layer and the panel is active + const inputActive = isActive && position.layer === 'focus'; + + useInput( + (input, key) => { + // Tab: switch columns + if (key.tab) { + setPosition(p => ({ + ...p, + column: p.column === 0 ? 1 : 0, + })); + return; + } + + // Up: move to previous field + if (key.upArrow) { + setPosition(p => { + let next = p.field - 1; + // Skip disabled fields going up + while (next >= 0 && isFieldDisabled?.(p.column, next)) { + next--; + } + if (next < 0) return p; + return { ...p, field: next }; + }); + return; + } + + // Down: move to next field + if (key.downArrow) { + setPosition(p => { + let next = p.field + 1; + // Skip disabled fields going down + while (next < fieldCount && isFieldDisabled?.(p.column, next)) { + next++; + } + if (next >= fieldCount) return p; + return { ...p, field: next }; + }); + return; + } + + // Enter: always activate the focused field (open picker) + if (key.return) { + setPosition(p => ({ ...p, layer: 'active' })); + return; + } + + // Escape: navigate back through the hierarchy + if (key.escape) { + setPosition(p => { + // If not at field 0, go to field 0 in same column + if (p.field > 0) { + return { ...p, field: 0 }; + } + // If at field 0 but not column 0, go to column 0 + if (p.column > 0) { + return { ...p, column: 0 }; + } + // At origin: exit + onExit(); + return p; + }); + return; + } + }, + { isActive: inputActive } + ); + + const isFieldFocused = useCallback( + (column: number, field: number): boolean => { + return position.column === column && position.field === field && position.layer === 'focus'; + }, + [position] + ); + + const isFieldActive = useCallback( + (column: number, field: number): boolean => { + return position.column === column && position.field === field && position.layer === 'active'; + }, + [position] + ); + + const isColumnActive = useCallback( + (column: number): boolean => { + return position.column === column; + }, + [position.column] + ); + + const activate = useCallback(() => { + setPosition(p => ({ ...p, layer: 'active' })); + }, []); + + const deactivate = useCallback(() => { + setPosition(p => { + // After a selection, advance to the next field in sequence: + // column 0 fields 0→1→2, then column 1 fields 0→1→2, then complete + const nextField = p.field + 1; + if (nextField < fieldCount) { + // Next field in same column + return { column: p.column, field: nextField, layer: 'focus' }; + } + if (p.column === 0) { + // Finished left column → move to right column field 0 + return { column: 1, field: 0, layer: 'focus' }; + } + // Finished last field in right column → stay and let onComplete handle it + if (onComplete) { + // Use setTimeout to avoid setState during render + setTimeout(onComplete, 0); + } + return { ...p, layer: 'focus' }; + }); + }, [fieldCount, onComplete]); + + const moveToField = useCallback((column: number, field: number) => { + setPosition({ column: column as 0 | 1, field, layer: 'focus' }); + }, []); + + return { + position, + isFieldFocused, + isFieldActive, + isColumnActive, + activate, + deactivate, + moveToField, + }; +} diff --git a/src/cli/tui/hooks/useRemove.ts b/src/cli/tui/hooks/useRemove.ts index df2b1fd61..db1061dd4 100644 --- a/src/cli/tui/hooks/useRemove.ts +++ b/src/cli/tui/hooks/useRemove.ts @@ -6,7 +6,9 @@ import type { RemovableMemory } from '../../primitives/MemoryPrimitive'; import type { RemovablePolicyResource } from '../../primitives/PolicyPrimitive'; import type { RemovableRuntimeEndpoint } from '../../primitives/RuntimeEndpointPrimitive'; import { + abTestPrimitive, agentPrimitive, + configBundlePrimitive, credentialPrimitive, evaluatorPrimitive, gatewayPrimitive, @@ -158,6 +160,24 @@ export function useRemovablePolicies() { return { policies, ...rest }; } +export function useRemovableConfigBundles() { + const { items: configBundles, ...rest } = useRemovableResources(() => configBundlePrimitive.getRemovable()); + return { configBundles, ...rest }; +} + +export function useRemovableABTests() { + const { items: abTests, ...rest } = useRemovableResources(() => abTestPrimitive.getRemovable()); + return { abTests, ...rest }; +} + +export function useRemoveABTest() { + return useRemoveResource( + (name: string) => abTestPrimitive.remove(name), + 'ab-test', + name => name + ); +} + export function useRemovableRuntimeEndpoints() { const { items: endpoints, ...rest } = useRemovableResources(() => runtimeEndpointPrimitive.getRemovable() @@ -240,6 +260,16 @@ export function useRemovalPreview() { (compositeKey: string) => loadPreview(k => policyPrimitive.previewRemove(k), compositeKey), [loadPreview] ); + const loadConfigBundlePreview = useCallback( + (name: string) => loadPreview(n => configBundlePrimitive.previewRemove(n), name), + [loadPreview] + ); + + const loadABTestPreview = useCallback( + (name: string) => loadPreview(n => abTestPrimitive.previewRemove(n), name), + [loadPreview] + ); + const loadRuntimeEndpointPreview = useCallback( (name: string) => loadPreview(n => runtimeEndpointPrimitive.previewRemove(n), name), [loadPreview] @@ -261,6 +291,8 @@ export function useRemovalPreview() { loadOnlineEvalPreview, loadPolicyEnginePreview, loadPolicyPreview, + loadConfigBundlePreview, + loadABTestPreview, loadRuntimeEndpointPreview, reset, }; @@ -357,6 +389,14 @@ export function useRemovePolicy() { ); } +export function useRemoveConfigBundle() { + return useRemoveResource( + (name: string) => configBundlePrimitive.remove(name), + 'config-bundle', + name => name + ); +} + export function useRemoveRuntimeEndpoint() { return useRemoveResource( (name: string) => runtimeEndpointPrimitive.remove(name), diff --git a/src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx b/src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx new file mode 100644 index 000000000..36b36ee00 --- /dev/null +++ b/src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx @@ -0,0 +1,623 @@ +import { ConfigIO } from '../../../../lib'; +import { getCredentialProvider } from '../../../aws/account'; +import { getABTest, updateABTest } from '../../../aws/agentcore-ab-tests'; +import type { GetABTestResult } from '../../../aws/agentcore-ab-tests'; +import { getOnlineEvaluationConfig } from '../../../aws/agentcore-control'; +import { getHttpGateway, listHttpGatewayTargets } from '../../../aws/agentcore-http-gateways'; +import { dnsSuffix } from '../../../aws/partition'; +import { getErrorMessage } from '../../../errors'; +import { GradientText, Screen } from '../../components'; +import { CloudWatchLogsClient, FilterLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; +import { Box, Text, useInput } from 'ink'; +import React, { useCallback, useEffect, useRef, useState } from 'react'; + +interface ABTestDetailScreenProps { + abTestId: string; + region: string; + onExit: () => void; +} + +/** Derive the gateway URL from a gateway ARN. */ +function gatewayUrlFromArn(arn: string): string { + const parts = arn.split(':'); + const region = parts[3]; + const gatewayId = parts[5]?.split('/')[1]; + if (region && gatewayId) { + return `https://${gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}`; + } + return arn; +} + +/** Extract the resource ID from an ARN (last segment after / or :). */ +function extractId(arn: string): string { + const slashIdx = arn.lastIndexOf('/'); + if (slashIdx !== -1) return arn.slice(slashIdx + 1); + const colonIdx = arn.lastIndexOf(':'); + if (colonIdx !== -1) return arn.slice(colonIdx + 1); + return arn; +} + +/** Truncate a version ID to 8 characters. */ +function shortVersion(version: string): string { + return version.slice(0, 8); +} + +/** Format a Unix epoch timestamp (seconds) to a UTC date string. */ +function formatTimestamp(ts: string | number): string { + const ms = typeof ts === 'string' ? parseFloat(ts) * 1000 : ts * 1000; + const d = new Date(ms); + return d + .toISOString() + .replace('T', ' ') + .replace(/\.\d+Z$/, ' UTC'); +} + +/** Build a horizontal rule with optional left label and right label. */ +function rule(left?: string, right?: string, width = 48): string { + if (!left && !right) return '─'.repeat(width); + const leftPart = left ? `── ${left} ` : '──'; + const rightPart = right ? ` ${right} ──` : ''; + const fillLen = width - leftPart.length - rightPart.length; + const fill = fillLen > 0 ? '─'.repeat(fillLen) : ''; + return `${leftPart}${fill}${rightPart}`; +} + +interface DebugCheckResult { + label: string; + status: 'pass' | 'fail' | 'warn'; + detail: string; +} + +async function runDebugChecks(test: GetABTestResult, region: string): Promise { + const results: DebugCheckResult[] = []; + const logsClient = new CloudWatchLogsClient({ region, credentials: getCredentialProvider() }); + + // 1. AB Test Status + results.push({ + label: 'AB Test Status', + status: test.status === 'ACTIVE' && test.executionStatus === 'RUNNING' ? 'pass' : 'warn', + detail: `${test.status} / ${test.executionStatus}`, + }); + + // 1b. AB Test Role + results.push({ + label: 'AB Test Role', + status: test.roleArn ? 'pass' : 'warn', + detail: test.roleArn ?? 'No role ARN', + }); + + // 2. Online Eval Config(s) + const evalConfigArns: { name: string; arn: string }[] = + 'perVariantOnlineEvaluationConfig' in test.evaluationConfig + ? test.evaluationConfig.perVariantOnlineEvaluationConfig.map(v => ({ + name: v.name, + arn: v.onlineEvaluationConfigArn, + })) + : [{ name: '', arn: test.evaluationConfig.onlineEvaluationConfigArn }]; + + for (const { name: variantName, arn: evalArn } of evalConfigArns) { + const evalConfigId = extractId(evalArn); + const labelSuffix = variantName ? ` (${variantName})` : ''; + try { + const evalConfig = await getOnlineEvaluationConfig({ region, configId: evalConfigId }); + results.push({ + label: `Online Eval Config${labelSuffix}`, + status: evalConfig.executionStatus === 'ENABLED' ? 'pass' : 'fail', + detail: `${evalConfig.configName} — ${evalConfig.executionStatus}`, + }); + } catch (err) { + results.push({ label: `Online Eval Config${labelSuffix}`, status: 'fail', detail: getErrorMessage(err) }); + } + } + + // 2b. Gateway Role + const gatewayId = extractId(test.gatewayArn); + try { + const gateway = await getHttpGateway({ region, gatewayId }); + results.push({ + label: 'Gateway Role', + status: gateway.roleArn ? 'pass' : 'warn', + detail: gateway.roleArn ?? 'No role ARN', + }); + } catch (err) { + results.push({ label: 'Gateway Role', status: 'fail', detail: getErrorMessage(err) }); + } + + // 5. Runtime spans — check for experiment metadata per variant in aws/spans + // service.name in spans follows the pattern: {projectName}_{agentName}.{endpoint} + // We derive the service name prefix from the deployed state runtimeId (strip random suffix). + const twoHoursAgo = Date.now() - 2 * 60 * 60 * 1000; + const variantNames = test.variants.map(v => v.name); + let serviceNamePrefix: string | undefined; + try { + const configIO = new ConfigIO(); + const deployedState = await configIO.readDeployedState(); + for (const [, target] of Object.entries(deployedState.targets ?? {})) { + const runtimes = target.resources?.runtimes ?? {}; + const firstRuntime = Object.values(runtimes)[0]; + if (firstRuntime?.runtimeId) { + // runtimeId is "{projectName}_{agentName}-{randomSuffix}", strip the suffix + serviceNamePrefix = firstRuntime.runtimeId.replace(/-[^-]+$/, ''); + break; + } + } + } catch { + // Fall back to abTestArn-only filtering if deployed state isn't readable + } + + try { + const baseFilter = serviceNamePrefix ? `"${serviceNamePrefix}"` : '"gen_ai_agent"'; + const [allRuntimeSpans, ...variantSpanResults] = await Promise.all([ + logsClient.send( + new FilterLogEventsCommand({ + logGroupName: 'aws/spans', + startTime: twoHoursAgo, + filterPattern: baseFilter, + limit: 1, + }) + ), + ...variantNames.map(name => + logsClient.send( + new FilterLogEventsCommand({ + logGroupName: 'aws/spans', + startTime: twoHoursAgo, + filterPattern: `"${test.abTestArn}" "${name}"`, + limit: 50, + }) + ) + ), + ]); + + const hasRuntimeSpans = (allRuntimeSpans.events?.length ?? 0) > 0; + const totalExperimentSpans = variantSpanResults.reduce((sum, r) => sum + (r.events?.length ?? 0), 0); + + for (let i = 0; i < variantNames.length; i++) { + const name = variantNames[i]; + const count = variantSpanResults[i]?.events?.length ?? 0; + const label = `Runtime Experiment Spans — ${name} (2h)`; + + if (count > 0) { + results.push({ label, status: 'pass', detail: `${count} spans with experiment metadata` }); + } else if (hasRuntimeSpans) { + results.push({ + label, + status: 'warn', + detail: + totalExperimentSpans > 0 + ? `No spans for ${name} — traffic may not be reaching this variant` + : 'Runtime spans found but no experiment metadata — update bedrock-agentcore SDK to the latest version', + }); + } else { + results.push({ label, status: 'warn', detail: 'No runtime spans found — send traffic to the gateway first' }); + } + } + } catch (err) { + results.push({ label: 'Runtime Experiment Spans', status: 'fail', detail: getErrorMessage(err) }); + } + + // 6. Eval Results — check each eval config's log group + const thirtyMinAgo = Date.now() - 30 * 60 * 1000; + for (const { name: variantName, arn: evalArn } of evalConfigArns) { + const configId = extractId(evalArn); + const labelSuffix = variantName ? ` (${variantName})` : ''; + try { + const evalLogGroup = `/aws/bedrock-agentcore/evaluations/results/${configId}`; + + const [allEvents, taggedEvents] = await Promise.all([ + logsClient.send(new FilterLogEventsCommand({ logGroupName: evalLogGroup, startTime: thirtyMinAgo, limit: 1 })), + logsClient.send( + new FilterLogEventsCommand({ + logGroupName: evalLogGroup, + startTime: thirtyMinAgo, + filterPattern: `"${test.abTestArn}"`, + limit: 100, + }) + ), + ]); + + const hasResults = (allEvents.events?.length ?? 0) > 0; + const taggedCount = taggedEvents.events?.length ?? 0; + + if (!hasResults) { + results.push({ + label: `Eval Results${labelSuffix}`, + status: 'warn', + detail: 'No eval results yet — wait ~5m after session timeout for evaluator to process', + }); + } else { + results.push({ + label: `Eval Results${labelSuffix}`, + status: taggedCount > 0 ? 'pass' : 'warn', + detail: + taggedCount > 0 + ? `${taggedCount} results tagged with AB test` + : 'Results exist but none tagged with variant — check gateway trace delivery', + }); + } + } catch (err) { + const msg = getErrorMessage(err); + results.push({ + label: `Eval Results${labelSuffix}`, + status: msg.includes('ResourceNotFoundException') ? 'warn' : 'fail', + detail: msg.includes('ResourceNotFoundException') ? 'Log group not found — evaluator has not run yet' : msg, + }); + } + } + + // 6. Aggregation Results + const metrics = test.results?.evaluatorMetrics ?? []; + const reporting = metrics.filter(m => m.controlStats?.sampleSize > 0); + results.push({ + label: 'Aggregation Results', + status: reporting.length > 0 ? 'pass' : 'warn', + detail: + reporting.length > 0 + ? `${reporting.length} evaluator(s) reporting` + : 'No aggregation data yet — wait ~12-15m after traffic', + }); + + return results; +} + +export function ABTestDetailScreen({ abTestId, region, onExit }: ABTestDetailScreenProps) { + const [test, setTest] = useState(null); + const [error, setError] = useState(null); + const [actionMessage, setActionMessage] = useState(null); + const [confirmingStop, setConfirmingStop] = useState(false); + const [confirmingPromote, setConfirmingPromote] = useState(false); + const [debugResults, setDebugResults] = useState(null); + const [debugLoading, setDebugLoading] = useState(false); + const [targetName, setTargetName] = useState(''); + + const hasFetched = useRef(false); + useEffect(() => { + if (hasFetched.current) return; + hasFetched.current = true; + const load = async () => { + try { + const result = await getABTest({ region, abTestId }); + setTest(result); + + // Fetch gateway target name for invocation URL + const gwId = extractId(result.gatewayArn); + try { + const targets = await listHttpGatewayTargets({ region, gatewayId: gwId, maxResults: 1 }); + const firstTarget = targets.targets[0]; + if (firstTarget) setTargetName(firstTarget.name); + } catch { + // Best-effort — URL will show without target path + } + } catch (err) { + setError(getErrorMessage(err)); + } + }; + void load(); + }, [region, abTestId]); + + const performAction = useCallback( + async (targetStatus: 'PAUSED' | 'RUNNING' | 'STOPPED', label: string) => { + setActionMessage(`${label}...`); + try { + await updateABTest({ region, abTestId, executionStatus: targetStatus }); + // Poll until status updates or max attempts reached + for (let i = 0; i < 5; i++) { + await new Promise(resolve => setTimeout(resolve, 1000)); + const result = await getABTest({ region, abTestId }); + setTest(result); + if (result.executionStatus === targetStatus) { + setActionMessage(label.replace('...', 'd').replace('ing', 'ed')); + return; + } + } + // Final fetch even if status didn't converge + setActionMessage(label.replace('ing', 'ed')); + } catch (err: unknown) { + setActionMessage(`Error: ${getErrorMessage(err)}`); + } + }, + [region, abTestId] + ); + + useInput((input, _key) => { + if (!test) return; + + if (confirmingStop) { + if (input === 'y' || input === 'Y') { + setConfirmingStop(false); + void performAction('STOPPED', 'Stopping'); + } else { + setConfirmingStop(false); + } + return; + } + + if (confirmingPromote) { + if (input === 'y' || input === 'Y') { + setConfirmingPromote(false); + setActionMessage('Promoting...'); + void (async () => { + try { + // Stop the AB test + await updateABTest({ region, abTestId, executionStatus: 'STOPPED' }); + for (let i = 0; i < 5; i++) { + await new Promise(resolve => setTimeout(resolve, 1000)); + const result = await getABTest({ region, abTestId }); + setTest(result); + if (result.executionStatus === 'STOPPED') break; + } + + // Apply promotion to agentcore.json + let promotionDetail = ''; + try { + const { promoteABTestConfig } = await import('../../../operations/ab-test/promote'); + const promoResult = await promoteABTestConfig(abTestId, test.name); + promotionDetail = promoResult.promoted + ? `${promoResult.promotionDetail} Run \`agentcore deploy\` to apply.` + : promoResult.promotionDetail; + } catch { + // Config update failed — still report the stop + } + + setActionMessage(promotionDetail || 'AB test stopped. Run `agentcore deploy` to apply.'); + } catch (err) { + setActionMessage(`Error: ${getErrorMessage(err)}`); + } + })(); + } else { + setConfirmingPromote(false); + } + return; + } + + if (input === 'p' || input === 'P') { + void performAction('PAUSED', 'Pausing'); + } + + if (input === 'r' || input === 'R') { + void performAction('RUNNING', 'Resuming'); + } + + if (input === 's' || input === 'S') { + setConfirmingStop(true); + setActionMessage(null); + } + + if (input === 'w' || input === 'W') { + setConfirmingPromote(true); + setActionMessage(null); + } + + if (input === 'd' || input === 'D') { + setDebugLoading(true); + setDebugResults(null); + void runDebugChecks(test, region) + .then(results => { + setDebugResults(results); + setDebugLoading(false); + }) + .catch(() => { + setDebugResults([{ label: 'Debug', status: 'fail' as const, detail: 'Diagnostics failed to run' }]); + setDebugLoading(false); + }); + } + }); + + if (error) { + return ( + + {`Error: ${error}`} + + ); + } + + if (!test) { + return ( + + Loading... + + ); + } + + const controlVariant = test.variants.find(v => v.name === 'C'); + const treatmentVariant = test.variants.find(v => v.name === 'T1'); + + const executionColor = + test.executionStatus === 'RUNNING' ? 'green' : test.executionStatus === 'PAUSED' ? 'yellow' : 'red'; + + const helpParts: string[] = []; + if (test.executionStatus === 'RUNNING') { + helpParts.push('P pause', 'S stop', 'W promote'); + } else if (test.executionStatus === 'PAUSED') { + helpParts.push('R resume', 'S stop', 'W promote'); + } + helpParts.push('D debug', 'Esc exit'); + const helpKeys = helpParts.join(' · '); + + // Build status text: only show provisioning status if not ACTIVE + const statusPrefix = test.status !== 'ACTIVE' ? `${test.status} ` : ''; + + // TODO(post-preview): Re-enable duration display once configurable duration is launched. + const durationText = ''; + + // Column width for side-by-side variants + const colW = 28; + + return ( + + + {/* ── Header: Line 1 — status ─────────────────────────── */} + + + {statusPrefix && {statusPrefix}} + {`● ${test.executionStatus}`} + + {durationText && {durationText}} + + + {/* ── Header: Line 2 — invocation URL ────────────────────── */} + {targetName ? ( + + {`Invocation URL: ${gatewayUrlFromArn(test.gatewayArn)}/${targetName}/invocations`} + + ) : ( + + Invocation URL: loading... + + )} + + {/* ── Header: Line 3 — online eval (only for single-config mode) ── */} + {'onlineEvaluationConfigArn' in test.evaluationConfig && ( + + {`Online Eval: ${extractId(test.evaluationConfig.onlineEvaluationConfigArn)}`} + + )} + + {/* ── Description (if present) ────────────────────────── */} + {test.description && ( + + {`Description: ${test.description}`} + + )} + + {/* ── Variants: side-by-side ──────────────────────────── */} + + + {'CONTROL (C)'} + {`${String(controlVariant?.weight ?? 'N/A')}% traffic`} + + {controlVariant?.variantConfiguration.target + ? `target: ${controlVariant.variantConfiguration.target.name}` + : `${extractId(controlVariant?.variantConfiguration.configurationBundle?.bundleArn ?? '')} @ ${shortVersion(controlVariant?.variantConfiguration.configurationBundle?.bundleVersion ?? '')}`} + + + + {'TREATMENT (T1)'} + {`${String(treatmentVariant?.weight ?? 'N/A')}% traffic`} + + {treatmentVariant?.variantConfiguration.target + ? `target: ${treatmentVariant.variantConfiguration.target.name}` + : `${extractId(treatmentVariant?.variantConfiguration.configurationBundle?.bundleArn ?? '')} @ ${shortVersion(treatmentVariant?.variantConfiguration.configurationBundle?.bundleVersion ?? '')}`} + + + + + {/* ── Evaluation Results ───────────────────────────────── */} + + {test.results ? ( + <> + + {rule( + 'Results', + test.results.analysisTimestamp ? formatTimestamp(test.results.analysisTimestamp) : undefined + )} + + + + {''} + + + {'Control'} + + + {'Treatment'} + + {'Δ'} + + {test.results.evaluatorMetrics.map((metric, i) => ( + 0 ? 1 : 0}> + + + {extractId(metric.evaluatorArn)} + + + {metric.controlStats.mean.toFixed(4)} + + + {metric.variantResults[0]?.mean.toFixed(4) ?? ''} + + {metric.variantResults[0]?.isSignificant ? ( + {`+${(metric.variantResults[0]?.percentChange ?? 0).toFixed(2)}% ✓`} + ) : ( + {`${(metric.variantResults[0]?.percentChange ?? 0).toFixed(2)}% ✗`} + )} + + + + {''} + + + {`n=${metric.controlStats.sampleSize}`} + + + {`n=${metric.variantResults[0]?.sampleSize ?? ''}`} + + {`p=${metric.variantResults[0]?.pValue?.toFixed(3) ?? 'N/A'}`} + + + ))} + + ) : ( + <> + {rule('Results')} + + No evaluation results yet. + + + )} + + + {/* ── Debug Panel ─────────────────────────────────────── */} + {debugLoading && ( + + + + )} + {debugResults && ( + + {rule('Pipeline Debug')} + {debugResults.map((check, i) => { + const icon = check.status === 'pass' ? '✓' : check.status === 'fail' ? '✗' : '⚠'; + const color = check.status === 'pass' ? 'green' : check.status === 'fail' ? 'red' : 'yellow'; + return ( + + {` ${icon} `} + {check.label} + {` ${check.detail}`} + + ); + })} + + )} + + {/* ── Stop confirmation ────────────────────────────────── */} + {confirmingStop && ( + + + { + 'Stop this AB test permanently? All traffic will shift to the control variant. This cannot be undone. (Y/n)' + } + + + )} + + {/* ── Promote confirmation ─────────────────────────────── */} + {confirmingPromote && ( + + + { + 'Promote treatment as winner? This will stop the AB test and update the control endpoint to the treatment version. Run `agentcore deploy` after to apply. (Y/n)' + } + + + )} + + {/* ── Action feedback ──────────────────────────────────── */} + {actionMessage && !confirmingStop && ( + + {actionMessage} + + )} + + + ); +} diff --git a/src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx b/src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx new file mode 100644 index 000000000..9d47e4441 --- /dev/null +++ b/src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx @@ -0,0 +1,90 @@ +import { ConfigIO } from '../../../../lib'; +import type { SelectableItem } from '../../components'; +import { Screen, SelectScreen } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { ABTestDetailScreen } from './ABTestDetailScreen'; +import { Text } from 'ink'; +import React, { useEffect, useRef, useState } from 'react'; + +interface ABTestPickerScreenProps { + onExit: () => void; +} + +interface DeployedABTest { + name: string; + abTestId: string; +} + +export function ABTestPickerScreen({ onExit }: ABTestPickerScreenProps) { + const [tests, setTests] = useState(null); + const [selectedTest, setSelectedTest] = useState(null); + const [region, setRegion] = useState('us-east-1'); + + const hasFetched = useRef(false); + useEffect(() => { + if (hasFetched.current) return; + hasFetched.current = true; + const load = async () => { + try { + const configIO = new ConfigIO(); + const [deployedState, targets] = await Promise.all([ + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + const found: DeployedABTest[] = []; + for (const target of Object.values(deployedState.targets ?? {})) { + const abTests = target.resources?.abTests; + if (abTests) { + for (const [name, state] of Object.entries(abTests)) { + found.push({ name, abTestId: state.abTestId }); + } + } + } + setTests(found); + if (targets.length > 0) setRegion(targets[0]!.region); + } catch { + setTests([]); + } + }; + void load(); + }, []); + + if (selectedTest) { + return setSelectedTest(null)} />; + } + + if (tests === null) { + return ( + + Loading AB tests... + + ); + } + + if (tests.length === 0) { + return ( + + No deployed AB tests found. + Add one with `agentcore add ab-test` and deploy. + + ); + } + + const items: SelectableItem[] = tests.map(t => ({ + id: t.name, + title: t.name, + description: `ID: ${t.abTestId}`, + })); + + return ( + { + const test = tests.find(t => t.name === item.id); + if (test) setSelectedTest(test); + }} + onExit={onExit} + /> + ); +} diff --git a/src/cli/tui/screens/ab-test/AddABTestFlow.tsx b/src/cli/tui/screens/ab-test/AddABTestFlow.tsx new file mode 100644 index 000000000..b8313075d --- /dev/null +++ b/src/cli/tui/screens/ab-test/AddABTestFlow.tsx @@ -0,0 +1,281 @@ +import { ConfigIO } from '../../../../lib'; +import { listConfigurationBundleVersions } from '../../../aws/agentcore-config-bundles'; +import { ErrorPrompt } from '../../components'; +import { useCreateABTest, useExistingABTestNames } from '../../hooks/useCreateABTest'; +import { AddSuccessScreen } from '../add/AddSuccessScreen'; +import { AddConfigBundleFlow } from '../config-bundle/AddConfigBundleFlow'; +import { AddABTestScreen } from './AddABTestScreen'; +import type { HttpGatewayInfo, OnlineEvalConfigInfo, RuntimeInfo } from './AddABTestScreen'; +import { TargetBasedABTestScreen } from './TargetBasedABTestScreen'; +import type { AddABTestConfig } from './types'; +import React, { useCallback, useEffect, useState } from 'react'; + +type FlowState = + | { name: 'create-wizard' } + | { name: 'target-wizard' } + | { name: 'create-bundle' } + | { name: 'create-success'; testName: string } + | { name: 'error'; message: string }; + +interface AddABTestFlowProps { + isInteractive?: boolean; + onExit: () => void; + onBack: () => void; + onDev?: () => void; + onDeploy?: () => void; +} + +export function AddABTestFlow({ isInteractive = true, onExit, onBack, onDev, onDeploy }: AddABTestFlowProps) { + const { createABTest, createTargetBasedABTest, reset: resetCreate } = useCreateABTest(); + const { names: existingNames } = useExistingABTestNames(); + const [flow, setFlow] = useState({ name: 'create-wizard' }); + + // Load deployed state for bundle lists + const [agents, setAgents] = useState<{ name: string }[]>([]); + const [existingHttpGateways, setExistingHttpGateways] = useState([]); + const [deployedBundles, setDeployedBundles] = useState<{ name: string; bundleId: string }[]>([]); + const [onlineEvalConfigs, setOnlineEvalConfigs] = useState([]); + const [runtimesInfo, setRuntimesInfo] = useState([]); + const [httpGatewayDetails, setHttpGatewayDetails] = useState([]); + const [onlineEvalConfigDetails, setOnlineEvalConfigDetails] = useState([]); + const [region, setRegion] = useState('us-east-1'); + + const [loadEpoch, setLoadEpoch] = useState(0); + + useEffect(() => { + void (async () => { + try { + const configIO = new ConfigIO(); + const deployedState = await configIO.readDeployedState(); + const projectSpec = await configIO.readProjectSpec(); + + // Get region from first target + for (const [, target] of Object.entries(deployedState.targets ?? {})) { + const resources = target.resources; + + // Deployed config bundles + const bundles = resources?.configBundles; + if (bundles) { + setDeployedBundles( + Object.entries(bundles).map(([name, state]) => ({ + name, + bundleId: state.bundleId, + })) + ); + } + break; + } + + // Agents from project spec runtimes + const runtimes = projectSpec.runtimes ?? []; + setAgents(runtimes.map(r => ({ name: r.name }))); + + // Runtimes with endpoints for target-based mode + setRuntimesInfo( + runtimes.map(r => ({ + name: r.name, + endpoints: Object.entries(r.endpoints ?? {}).map(([epName, ep]) => ({ + name: epName, + version: ep.version, + })), + })) + ); + + // Existing HTTP gateways from project spec + const httpGws = projectSpec.httpGateways ?? []; + setExistingHttpGateways(httpGws.map(gw => gw.name)); + + // HTTP gateway details with targets for target-based mode + setHttpGatewayDetails( + httpGws.map(gw => ({ + name: gw.name, + runtimeRef: gw.runtimeRef, + targets: (gw.targets ?? []).map(t => ({ + name: t.name, + runtimeRef: t.runtimeRef, + qualifier: t.qualifier, + })), + })) + ); + + // Online eval configs from project spec + const evalConfigs = projectSpec.onlineEvalConfigs ?? []; + setOnlineEvalConfigs(evalConfigs.map(c => c.name)); + setOnlineEvalConfigDetails( + evalConfigs.map(c => ({ + name: c.name, + agent: c.agent, + endpoint: c.endpoint, + })) + ); + + // Region from aws-targets, falling back to env + const targets = await configIO.resolveAWSDeploymentTargets(); + if (targets.length > 0) { + setRegion(targets[0]!.region); + } else { + setRegion(process.env.AWS_DEFAULT_REGION ?? process.env.AWS_REGION ?? 'us-east-1'); + } + } catch { + // No deployed state — lists will be empty + } + })(); + }, [loadEpoch]); + + const fetchBundleVersions = useCallback( + async (bundleId: string) => { + try { + const result = await listConfigurationBundleVersions({ region, bundleId }); + return result.versions.map(v => ({ + versionId: v.versionId, + createdAt: v.versionCreatedAt, + })); + } catch { + return []; + } + }, + [region] + ); + + useEffect(() => { + if (!isInteractive && flow.name === 'create-success') { + onExit(); + } + }, [isInteractive, flow.name, onExit]); + + const handleCreateComplete = useCallback( + (config: AddABTestConfig) => { + if (config.mode === 'target-based') { + const gatewayName = + config.gatewayChoice.type === 'existing-http' + ? config.gatewayChoice.name + : config.gatewayChoice.type === 'create-new' + ? `${config.name.replace(/_/g, '-').slice(0, 44)}-gw` + : ''; + void createTargetBasedABTest({ + name: config.name, + description: config.description || undefined, + gateway: gatewayName, + runtime: config.runtime, + controlEndpoint: config.controlEndpoint, + treatmentEndpoint: config.treatmentEndpoint, + controlWeight: config.controlWeight, + treatmentWeight: config.treatmentWeight, + controlOnlineEval: config.controlOnlineEval, + treatmentOnlineEval: config.treatmentOnlineEval, + enableOnCreate: config.enableOnCreate, + }).then(result => { + if (result.ok) { + setFlow({ name: 'create-success', testName: result.testName }); + return; + } + setFlow({ name: 'error', message: result.error }); + }); + return; + } + + const controlWeight = 100 - config.treatmentWeight; + void createABTest({ + name: config.name, + description: config.description || undefined, + agent: config.agent, + gatewayChoice: config.gatewayChoice, + controlBundle: config.controlBundle, + controlVersion: config.controlVersion, + treatmentBundle: config.treatmentBundle, + treatmentVersion: config.treatmentVersion, + controlWeight, + treatmentWeight: config.treatmentWeight, + onlineEval: config.onlineEval, + maxDuration: config.maxDuration, + enableOnCreate: config.enableOnCreate, + }).then(result => { + if (result.ok) { + setFlow({ name: 'create-success', testName: result.testName }); + return; + } + setFlow({ name: 'error', message: result.error }); + }); + }, + [createABTest, createTargetBasedABTest] + ); + + const handleSwitchToTargetBased = useCallback(() => { + setFlow({ name: 'target-wizard' }); + }, []); + + const handleCreateBundle = useCallback(() => { + setFlow({ name: 'create-bundle' }); + }, []); + + const handleBundleFlowDone = useCallback(() => { + setLoadEpoch(e => e + 1); + setFlow({ name: 'create-wizard' }); + }, []); + + if (flow.name === 'create-bundle') { + return ( + + ); + } + + if (flow.name === 'target-wizard') { + return ( + + ); + } + + if (flow.name === 'create-wizard') { + return ( + + ); + } + + if (flow.name === 'create-success') { + return ( + + ); + } + + return ( + { + resetCreate(); + setFlow({ name: 'create-wizard' }); + }} + onExit={onExit} + /> + ); +} diff --git a/src/cli/tui/screens/ab-test/AddABTestScreen.tsx b/src/cli/tui/screens/ab-test/AddABTestScreen.tsx new file mode 100644 index 000000000..3306ce86c --- /dev/null +++ b/src/cli/tui/screens/ab-test/AddABTestScreen.tsx @@ -0,0 +1,914 @@ +import { ABTestNameSchema } from '../../../../schema/schemas/primitives/ab-test'; +import type { SelectableItem } from '../../components'; +import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import type { VersionLoadState } from './VariantConfigForm'; +import { VariantConfigForm } from './VariantConfigForm'; +import type { AddABTestConfig, TargetInfo } from './types'; +import { AB_TEST_STEP_LABELS } from './types'; +import { useAddABTestWizard } from './useAddABTestWizard'; +import { Box, Text } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; + +function formatVersionDate(value: string): string { + const n = Number(value); + if (!isNaN(n) && n > 0) { + // Epoch seconds (< 1e12) vs milliseconds (>= 1e12) + const ms = n < 1e12 ? n * 1000 : n; + return new Date(ms).toLocaleString(); + } + return new Date(value).toLocaleString(); +} + +/** Runtime endpoint info passed from the parent flow. */ +export interface RuntimeEndpointInfo { + name: string; + version: number; +} + +/** Runtime info with endpoints, passed from the parent flow. */ +export interface RuntimeInfo { + name: string; + endpoints: RuntimeEndpointInfo[]; +} + +/** Gateway target info passed from the parent flow. */ +export interface GatewayTargetInfo { + name: string; + runtimeRef: string; + qualifier: string; +} + +/** HTTP gateway info with targets, passed from the parent flow. */ +export interface HttpGatewayInfo { + name: string; + runtimeRef: string; + targets: GatewayTargetInfo[]; +} + +/** Online eval config info with agent and endpoint for filtering. */ +export interface OnlineEvalConfigInfo { + name: string; + agent: string; + endpoint?: string; +} + +interface AddABTestScreenProps { + onComplete: (config: AddABTestConfig) => void; + onExit: () => void; + existingTestNames: string[]; + agents: { name: string }[]; + existingHttpGateways: string[]; + deployedBundles: { name: string; bundleId: string }[]; + onlineEvalConfigs: string[]; + fetchBundleVersions: (bundleId: string) => Promise<{ versionId: string; createdAt: string }[]>; + onCreateBundle?: () => void; + /** Full runtime info including endpoints (for target-based mode). */ + runtimes: RuntimeInfo[]; + /** Full HTTP gateway info including targets (for target-based mode). */ + httpGatewayDetails: HttpGatewayInfo[]; + /** Full online eval config objects for target-based eval filtering. */ + onlineEvalConfigDetails?: OnlineEvalConfigInfo[]; + /** Callback to switch to the dedicated target-based wizard screen. */ + onSwitchToTargetBased?: () => void; +} + +export function AddABTestScreen({ + onComplete, + onExit, + existingTestNames, + agents, + existingHttpGateways, + deployedBundles, + onlineEvalConfigs, + fetchBundleVersions, + onCreateBundle, + runtimes, + httpGatewayDetails, + onlineEvalConfigDetails = [], + onSwitchToTargetBased, +}: AddABTestScreenProps) { + const wizard = useAddABTestWizard(); + + // Build select items + const agentItems: SelectableItem[] = useMemo( + () => agents.map(a => ({ id: a.name, title: a.name, description: 'Agent' })), + [agents] + ); + + const bundleItems: SelectableItem[] = useMemo( + () => deployedBundles.map(b => ({ id: b.name, title: b.name, description: `ID: ${b.bundleId}` })), + [deployedBundles] + ); + + const onlineEvalItems: SelectableItem[] = useMemo( + () => onlineEvalConfigs.map(name => ({ id: name, title: name, description: 'Online Eval Config' })), + [onlineEvalConfigs] + ); + + const gatewayItems: SelectableItem[] = useMemo(() => { + const items: SelectableItem[] = []; + for (const gwName of existingHttpGateways) { + items.push({ id: gwName, title: gwName, description: 'Existing HTTP gateway' }); + } + items.push({ + id: '__create__', + title: '+ Create new gateway', + description: 'Auto-create for this AB test', + spaceBefore: items.length > 0, + }); + return items; + }, [existingHttpGateways]); + + const enableItems: SelectableItem[] = useMemo( + () => [ + { id: 'yes', title: 'Yes', description: 'Start the AB test immediately after deploy' }, + { id: 'no', title: 'No', description: 'Create paused — start manually later' }, + ], + [] + ); + + // Version items — fetched dynamically per bundle + const [controlVersionItems, setControlVersionItems] = React.useState([]); + const [treatmentVersionItems, setTreatmentVersionItems] = React.useState([]); + const [controlVersionLoadState, setControlVersionLoadState] = React.useState('idle'); + const [treatmentVersionLoadState, setTreatmentVersionLoadState] = React.useState('idle'); + + const handleFetchVersions = React.useCallback( + (bundleName: string) => { + const bundle = deployedBundles.find(b => b.name === bundleName); + if (!bundle) return; + + setControlVersionLoadState('loading'); + setTreatmentVersionLoadState('loading'); + + void fetchBundleVersions(bundle.bundleId) + .then(versions => { + const items = versions.map(v => ({ + id: v.versionId, + title: v.versionId.slice(0, 8), + description: `Created: ${formatVersionDate(v.createdAt)}`, + })); + setControlVersionItems(items); + setTreatmentVersionItems(items); + setControlVersionLoadState('loaded'); + setTreatmentVersionLoadState('loaded'); + }) + .catch(() => { + setControlVersionLoadState('error'); + setTreatmentVersionLoadState('error'); + }); + }, + [deployedBundles, fetchBundleVersions] + ); + + // ── Gateway sub-flow state (target-based: "create new" text input) ──────── + const [gatewayCreateMode, setGatewayCreateMode] = useState(false); + + // ── Target picker sub-flow state ────────────────────────────────────────── + // Sub-flow phases: 'pick' -> 'selectRuntime' -> 'selectQualifier' + type TargetSubFlowPhase = 'pick' | 'selectRuntime' | 'selectQualifier'; + const [controlSubFlow, setControlSubFlow] = useState('pick'); + const [controlNewRuntime, setControlNewRuntime] = useState(''); + + const [treatmentSubFlow, setTreatmentSubFlow] = useState('pick'); + const [treatmentNewRuntime, setTreatmentNewRuntime] = useState(''); + + /* eslint-disable react-hooks/set-state-in-effect -- intentional reset on step change */ + useEffect(() => { + if (wizard.step === 'controlTarget') { + setControlSubFlow('pick'); + setControlNewRuntime(''); + } + }, [wizard.step]); + + useEffect(() => { + if (wizard.step === 'treatmentTarget') { + setTreatmentSubFlow('pick'); + setTreatmentNewRuntime(''); + } + }, [wizard.step]); + /* eslint-enable react-hooks/set-state-in-effect */ + + // Step flags + const isModeStep = wizard.step === 'mode'; + const isNameStep = wizard.step === 'name'; + const isDescriptionStep = wizard.step === 'description'; + const isAgentStep = wizard.step === 'agent'; + const isGatewayStep = wizard.step === 'gateway'; + const isVariantsStep = wizard.step === 'variants'; + const isOnlineEvalStep = wizard.step === 'onlineEval'; + const isControlTargetStep = wizard.step === 'controlTarget'; + const isTreatmentTargetStep = wizard.step === 'treatmentTarget'; + const isWeightsStep = wizard.step === 'weights'; + const isEvalPathStep = wizard.step === 'evalPath'; + const isEvalSelectStep = wizard.step === 'evalSelect'; + const isEnableStep = wizard.step === 'enableOnCreate'; + const isConfirmStep = wizard.step === 'confirm'; + + const isTargetBased = wizard.config.mode === 'target-based'; + + // Tell the wizard which steps to skip (both forward and backward navigation). + const gatewayChoiceTypeRef = React.useRef(wizard.config.gatewayChoice.type); + + const shouldSkipStep = useCallback( + (s: string) => { + // Agent selection is only needed in config-bundle mode when auto-creating a gateway. + if (s === 'agent' && (isTargetBased || gatewayChoiceTypeRef.current !== 'create-new')) return true; + // Config-bundle steps skipped in target-based mode + if (s === 'variants' && isTargetBased) return true; + if (s === 'onlineEval' && isTargetBased) return true; + // Target-based steps skipped in config-bundle mode + if (s === 'controlTarget' && !isTargetBased) return true; + if (s === 'treatmentTarget' && !isTargetBased) return true; + if (s === 'weights' && !isTargetBased) return true; + if (s === 'evalPath' && !isTargetBased) return true; + if (s === 'evalSelect' && !isTargetBased) return true; + if (s === 'evalCreate' && !isTargetBased) return true; + if (s === 'evalSamplingRate' && !isTargetBased) return true; + if (s === 'maxDuration') return true; + return false; + }, + [isTargetBased] + ); + + useEffect(() => { + wizard.setSkipCheck(shouldSkipStep); + }, [shouldSkipStep]); // wizard.setSkipCheck is stable (useCallback with no deps) + + // Mode selection items + const modeItems: SelectableItem[] = useMemo( + () => [ + { + id: 'config-bundle', + title: 'Config Bundle', + description: 'Split traffic between config bundle versions (same target, different config)', + }, + { + id: 'target-based', + title: 'Target-Based', + description: 'Split traffic between gateway targets (different targets, each self-contained)', + }, + ], + [] + ); + + // ── Target picker items builder ────────────────────────────────────────── + // Builds the three-section grouped picker items for target selection. + const buildTargetItems = useCallback( + (excludeTarget: TargetInfo | null): SelectableItem[] => { + const items: SelectableItem[] = []; + + // Section 1: Existing targets on the selected gateway + const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); + const existingTargets = selectedGw?.targets ?? []; + if (existingTargets.length > 0) { + items.push({ + id: '__section_existing__', + title: '── Existing Targets ──', + description: '', + disabled: true, + }); + for (const t of existingTargets) { + if (excludeTarget?.name === t.name) continue; + items.push({ + id: `existing:${t.name}`, + title: t.name, + description: `endpoint=${t.qualifier} runtime=${t.runtimeRef}`, + }); + } + } + + // Section 2: Endpoints from project runtimes (quick-create targets) + const endpointItems: SelectableItem[] = []; + for (const rt of runtimes) { + for (const ep of rt.endpoints) { + const targetName = ep.name; + if (excludeTarget?.name === targetName) continue; + endpointItems.push({ + id: `endpoint:${rt.name}/${ep.name}`, + title: `${rt.name}/${ep.name}`, + description: `v${ep.version}`, + }); + } + } + if (endpointItems.length > 0) { + items.push({ + id: '__section_endpoints__', + title: '── Endpoints ──', + description: 'Select to auto-create target', + disabled: true, + spaceBefore: items.length > 0, + }); + items.push(...endpointItems); + } + + // Section 3: Create new target + items.push({ + id: '__create_target__', + title: '+ Create new target', + description: 'Configure runtime, name, and endpoint', + spaceBefore: true, + }); + + return items; + }, + [httpGatewayDetails, runtimes, wizard.config.gateway] + ); + + const controlTargetItems = useMemo(() => buildTargetItems(null), [buildTargetItems]); + const treatmentTargetItems = useMemo( + () => buildTargetItems(wizard.config.controlTargetInfo), + [buildTargetItems, wizard.config.controlTargetInfo] + ); + + // Runtime items for the "create new target" sub-flow + const runtimeItems: SelectableItem[] = useMemo( + () => runtimes.map(r => ({ id: r.name, title: r.name, description: `${r.endpoints.length} endpoint(s)` })), + [runtimes] + ); + + // Qualifier items for a given runtime (DEFAULT + all endpoints) + const buildQualifierItems = useCallback( + (runtimeName: string): SelectableItem[] => { + const rt = runtimes.find(r => r.name === runtimeName); + const items: SelectableItem[] = [{ id: 'DEFAULT', title: 'DEFAULT', description: 'Default endpoint' }]; + if (rt) { + for (const ep of rt.endpoints) { + items.push({ id: ep.name, title: ep.name, description: `v${ep.version}` }); + } + } + return items; + }, + [runtimes] + ); + + const controlEndpointItems = useMemo( + () => buildQualifierItems(controlNewRuntime), + [buildQualifierItems, controlNewRuntime] + ); + const treatmentEndpointItems = useMemo( + () => buildQualifierItems(treatmentNewRuntime), + [buildQualifierItems, treatmentNewRuntime] + ); + + // Navigation hooks for select steps + const modeNav = useListNavigation({ + items: modeItems, + onSelect: item => { + if (item.id === 'target-based' && onSwitchToTargetBased) { + onSwitchToTargetBased(); + return; + } + wizard.setMode(item.id as 'config-bundle' | 'target-based'); + }, + onExit: () => wizard.goBack(), + isActive: isModeStep, + }); + + const agentNav = useListNavigation({ + items: agentItems, + onSelect: item => wizard.setAgent(item.id), + onExit: () => wizard.goBack(), + isActive: isAgentStep, + }); + + const gatewayNav = useListNavigation({ + items: gatewayItems, + onSelect: item => { + if (item.id === '__create__') { + setGatewayCreateMode(true); + return; + } + const choice = { type: 'existing-http', name: item.id } as const; + gatewayChoiceTypeRef.current = choice.type; + wizard.setGatewayWithName(item.id, false); + }, + onExit: () => wizard.goBack(), + isActive: isGatewayStep && !gatewayCreateMode, + isDisabled: item => item.disabled === true, + }); + + const onlineEvalNav = useListNavigation({ + items: onlineEvalItems, + onSelect: item => wizard.setOnlineEval(item.id), + onExit: () => wizard.goBack(), + isActive: isOnlineEvalStep, + }); + + // ── Control target picker navigation ───────────────────────────────────── + const controlTargetNav = useListNavigation({ + items: controlTargetItems, + onSelect: item => { + if (item.id === '__create_target__') { + setControlSubFlow('selectRuntime'); + return; + } + if (item.id.startsWith('existing:')) { + const targetName = item.id.replace('existing:', ''); + const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); + const target = selectedGw?.targets.find(t => t.name === targetName); + if (target) { + wizard.setControlTarget( + { name: target.name, runtimeRef: target.runtimeRef, qualifier: target.qualifier }, + false + ); + } + return; + } + if (item.id.startsWith('endpoint:')) { + const path = item.id.replace('endpoint:', ''); + const [runtimeName, endpointName] = path.split('/'); + if (runtimeName && endpointName) { + const autoName = `${runtimeName}-${endpointName}`; + wizard.setControlTarget({ name: autoName, runtimeRef: runtimeName, qualifier: endpointName }, true); + } + } + }, + onExit: () => wizard.goBack(), + isActive: isControlTargetStep && controlSubFlow === 'pick', + isDisabled: item => item.disabled === true, + }); + + // Control sub-flow: select runtime + const controlRuntimeNav = useListNavigation({ + items: runtimeItems, + onSelect: item => { + setControlNewRuntime(item.id); + setControlSubFlow('selectQualifier'); + }, + onExit: () => setControlSubFlow('pick'), + isActive: isControlTargetStep && controlSubFlow === 'selectRuntime', + }); + + // Control sub-flow: select qualifier (auto-generates target name) + const controlEndpointNav = useListNavigation({ + items: controlEndpointItems, + onSelect: item => { + const autoName = `${controlNewRuntime}-${item.id}`; + wizard.setControlTarget({ name: autoName, runtimeRef: controlNewRuntime, qualifier: item.id }, true); + }, + onExit: () => setControlSubFlow('selectRuntime'), + isActive: isControlTargetStep && controlSubFlow === 'selectQualifier', + }); + + // ── Treatment target picker navigation ─────────────────────────────────── + const treatmentTargetNav = useListNavigation({ + items: treatmentTargetItems, + onSelect: item => { + if (item.id === '__create_target__') { + setTreatmentSubFlow('selectRuntime'); + return; + } + if (item.id.startsWith('existing:')) { + const targetName = item.id.replace('existing:', ''); + const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); + const target = selectedGw?.targets.find(t => t.name === targetName); + if (target) { + wizard.setTreatmentTarget( + { name: target.name, runtimeRef: target.runtimeRef, qualifier: target.qualifier }, + false + ); + } + return; + } + if (item.id.startsWith('endpoint:')) { + const path = item.id.replace('endpoint:', ''); + const [runtimeName, endpointName] = path.split('/'); + if (runtimeName && endpointName) { + const autoName = `${runtimeName}-${endpointName}`; + wizard.setTreatmentTarget({ name: autoName, runtimeRef: runtimeName, qualifier: endpointName }, true); + } + } + }, + onExit: () => wizard.goBack(), + isActive: isTreatmentTargetStep && treatmentSubFlow === 'pick', + isDisabled: item => item.disabled === true, + }); + + // Treatment sub-flow: select runtime + const treatmentRuntimeNav = useListNavigation({ + items: runtimeItems, + onSelect: item => { + setTreatmentNewRuntime(item.id); + setTreatmentSubFlow('selectQualifier'); + }, + onExit: () => setTreatmentSubFlow('pick'), + isActive: isTreatmentTargetStep && treatmentSubFlow === 'selectRuntime', + }); + + // Treatment sub-flow: select qualifier (auto-generates target name) + const treatmentEndpointNav = useListNavigation({ + items: treatmentEndpointItems, + onSelect: item => { + const autoName = `${treatmentNewRuntime}-${item.id}`; + wizard.setTreatmentTarget({ name: autoName, runtimeRef: treatmentNewRuntime, qualifier: item.id }, true); + }, + onExit: () => setTreatmentSubFlow('selectRuntime'), + isActive: isTreatmentTargetStep && treatmentSubFlow === 'selectQualifier', + }); + + const evalPathItems: SelectableItem[] = useMemo( + () => [ + { + id: 'select', + title: 'Select existing online eval configs', + description: 'Use configs already in your project', + }, + { id: 'create', title: 'Create new', description: 'Pick evaluators + sampling rate, auto-create configs' }, + ], + [] + ); + + const evalPathNav = useListNavigation({ + items: evalPathItems, + onSelect: item => wizard.setEvalPath(item.id as 'select' | 'create'), + onExit: () => wizard.goBack(), + isActive: isEvalPathStep, + }); + + // ── Eval select sub-flow: pick control eval, then treatment eval ──────── + type EvalSelectPhase = 'controlEval' | 'treatmentEval'; + const [evalSelectPhase, setEvalSelectPhase] = useState('controlEval'); + const [selectedControlEval, setSelectedControlEval] = useState(''); + + // Reset eval select sub-flow when entering the step + /* eslint-disable react-hooks/set-state-in-effect -- intentional reset on step change */ + useEffect(() => { + if (wizard.step === 'evalSelect') { + setEvalSelectPhase('controlEval'); + setSelectedControlEval(''); + } + }, [wizard.step]); + /* eslint-enable react-hooks/set-state-in-effect */ + + // Filter online eval configs by runtime + endpoint (qualifier) + const controlRuntime = wizard.config.controlTargetInfo?.runtimeRef ?? ''; + const controlEndpoint = wizard.config.controlTargetInfo?.qualifier ?? ''; + const treatmentRuntime = wizard.config.treatmentTargetInfo?.runtimeRef ?? ''; + const treatmentEndpoint = wizard.config.treatmentTargetInfo?.qualifier ?? ''; + + const controlEvalItems: SelectableItem[] = useMemo(() => { + return onlineEvalConfigDetails + .filter(c => c.agent === controlRuntime && (c.endpoint ?? 'DEFAULT') === controlEndpoint) + .map(c => ({ id: c.name, title: c.name, description: `${c.agent}/${c.endpoint ?? 'DEFAULT'}` })); + }, [onlineEvalConfigDetails, controlRuntime, controlEndpoint]); + + const treatmentEvalItems: SelectableItem[] = useMemo(() => { + return onlineEvalConfigDetails + .filter(c => c.agent === treatmentRuntime && (c.endpoint ?? 'DEFAULT') === treatmentEndpoint) + .map(c => ({ id: c.name, title: c.name, description: `${c.agent}/${c.endpoint ?? 'DEFAULT'}` })); + }, [onlineEvalConfigDetails, treatmentRuntime, treatmentEndpoint]); + + const controlEvalNoMatch = isEvalSelectStep && evalSelectPhase === 'controlEval' && controlEvalItems.length === 0; + const treatmentEvalNoMatch = + isEvalSelectStep && evalSelectPhase === 'treatmentEval' && treatmentEvalItems.length === 0; + + const controlEvalNav = useListNavigation({ + items: controlEvalItems, + onSelect: item => { + setSelectedControlEval(item.id); + setEvalSelectPhase('treatmentEval'); + }, + onExit: () => wizard.goBack(), + isActive: isEvalSelectStep && evalSelectPhase === 'controlEval' && !controlEvalNoMatch, + }); + + const treatmentEvalNav = useListNavigation({ + items: treatmentEvalItems, + onSelect: item => { + wizard.setEvalSelect(selectedControlEval, item.id); + }, + onExit: () => setEvalSelectPhase('controlEval'), + isActive: isEvalSelectStep && evalSelectPhase === 'treatmentEval' && !treatmentEvalNoMatch, + }); + + const enableNav = useListNavigation({ + items: enableItems, + onSelect: item => wizard.setEnableOnCreate(item.id === 'yes'), + onExit: () => wizard.goBack(), + isActive: isEnableStep, + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete(wizard.config), + onExit: () => wizard.goBack(), + isActive: isConfirmStep, + }); + + // Help text + const isSelectStep = + isModeStep || + isAgentStep || + (isGatewayStep && !gatewayCreateMode) || + isOnlineEvalStep || + isEnableStep || + isControlTargetStep || + isTreatmentTargetStep || + isEvalPathStep || + isEvalSelectStep; + const helpText = isSelectStep + ? HELP_TEXT.NAVIGATE_SELECT + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : isVariantsStep + ? HELP_TEXT.VARIANTS_FORM + : HELP_TEXT.TEXT_INPUT; + + const headerContent = ; + + const controlWeight = 100 - wizard.config.treatmentWeight; + + // Format target display for confirm review + const formatTargetDisplay = (info: TargetInfo | null, isNew: boolean): string => { + if (!info) return '(not set)'; + const newLabel = isNew ? ' (new)' : ''; + return `${info.name} endpoint=${info.qualifier} runtime=${info.runtimeRef}${newLabel}`; + }; + + return ( + + + {isModeStep && ( + + )} + + {isNameStep && ( + (existingTestNames.includes(value) ? `AB test "${value}" already exists` : true)} + /> + )} + + {isDescriptionStep && ( + wizard.goBack()} + /> + )} + + {isAgentStep && } + + {/* ── Step 4: Gateway selection ──────────────────────────── */} + {isGatewayStep && !gatewayCreateMode && ( + + )} + {isGatewayStep && gatewayCreateMode && ( + { + gatewayChoiceTypeRef.current = 'create-new'; + wizard.setGatewayWithName(name, true); + setGatewayCreateMode(false); + }} + onCancel={() => setGatewayCreateMode(false)} + /> + )} + + {isVariantsStep && ( + wizard.goBack()} + onCreateBundle={onCreateBundle} + /> + )} + + {/* ── Step 5: Control target selection ─────────────────── */} + {isControlTargetStep && controlSubFlow === 'pick' && ( + + )} + {isControlTargetStep && controlSubFlow === 'selectRuntime' && ( + + )} + {isControlTargetStep && controlSubFlow === 'selectQualifier' && ( + + )} + + {/* ── Step 6: Treatment target selection ───────────────── */} + {isTreatmentTargetStep && treatmentSubFlow === 'pick' && ( + + {wizard.config.controlTargetInfo && ( + + + {'\u2713'} Control: {wizard.config.controlTargetInfo.name} endpoint= + {wizard.config.controlTargetInfo.qualifier} + + + )} + + + )} + {isTreatmentTargetStep && treatmentSubFlow === 'selectRuntime' && ( + + )} + {isTreatmentTargetStep && treatmentSubFlow === 'selectQualifier' && ( + + )} + + {/* ── Target-based: Traffic weights ───────────────────── */} + {isWeightsStep && ( + { + const w = parseInt(value, 10); + if (!isNaN(w) && w >= 1 && w <= 99) { + wizard.setWeights(w, 100 - w); + } + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const w = parseInt(value, 10); + if (isNaN(w)) return 'Must be a number'; + if (w < 1 || w > 99) return 'Must be between 1 and 99'; + return true; + }} + /> + )} + + {/* ── Target-based: Eval path selection ───────────────── */} + {isEvalPathStep && ( + + )} + + {/* ── Target-based: Eval select (control) ───────────── */} + {isEvalSelectStep && evalSelectPhase === 'controlEval' && !controlEvalNoMatch && ( + + )} + {isEvalSelectStep && evalSelectPhase === 'controlEval' && controlEvalNoMatch && ( + + No online eval config found for {controlRuntime}/{controlEndpoint}. Create one first: agentcore add + online-eval --runtime {controlRuntime} --endpoint {controlEndpoint} + + )} + + {/* ── Target-based: Eval select (treatment) ─────────── */} + {isEvalSelectStep && evalSelectPhase === 'treatmentEval' && !treatmentEvalNoMatch && ( + + + + {'\u2713'} Control eval: {selectedControlEval} + + + + + )} + {isEvalSelectStep && evalSelectPhase === 'treatmentEval' && treatmentEvalNoMatch && ( + + No online eval config found for {treatmentRuntime}/{treatmentEndpoint}. Create one first: agentcore add + online-eval --runtime {treatmentRuntime} --endpoint {treatmentEndpoint} + + )} + + {/* ── Config-bundle: Online eval selection ────────────── */} + {isOnlineEvalStep && + (onlineEvalItems.length > 0 ? ( + + ) : ( + + No online eval configs found. An online eval is required for AB tests. Add one with `agentcore add + online-eval`, then retry. Press Esc to go back. + + ))} + + {/* TODO(post-preview): Re-enable maxDuration TextInput once configurable duration is launched. */} + + {isEnableStep && ( + + )} + + {isConfirmStep && ( + + )} + + + ); +} diff --git a/src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx b/src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx new file mode 100644 index 000000000..48adc621f --- /dev/null +++ b/src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx @@ -0,0 +1,26 @@ +import type { RemovableResource } from '../../../primitives/types'; +import type { SelectableItem } from '../../components'; +import { SelectScreen } from '../../components'; +import React, { useMemo } from 'react'; + +interface RemoveABTestScreenProps { + abTests: RemovableResource[]; + onSelect: (testName: string) => void; + onExit: () => void; +} + +export function RemoveABTestScreen({ abTests, onSelect, onExit }: RemoveABTestScreenProps) { + const items: SelectableItem[] = useMemo( + () => + abTests.map(t => ({ + id: t.name, + title: t.name, + description: 'AB Test', + })), + [abTests] + ); + + return ( + onSelect(item.id)} onExit={onExit} /> + ); +} diff --git a/src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx b/src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx new file mode 100644 index 000000000..60b92dd45 --- /dev/null +++ b/src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx @@ -0,0 +1,712 @@ +import type { SelectableItem } from '../../components'; +import { + ConfirmReview, + Cursor, + Panel, + Screen, + StepIndicator, + TextInput, + TwoColumn, + WizardSelect, +} from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { usePanelNavigation } from '../../hooks/usePanelNavigation'; +import type { HttpGatewayInfo, OnlineEvalConfigInfo, RuntimeInfo } from './AddABTestScreen'; +import type { AddABTestConfig, TargetInfo } from './types'; +import { TARGET_BASED_STEP_LABELS, useTargetBasedWizard } from './useTargetBasedWizard'; +import { Box, Text, useInput } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; + +// ───────────────────────────────────────────────────────────────────────────── +// Props +// ───────────────────────────────────────────────────────────────────────────── + +interface TargetBasedABTestScreenProps { + onComplete: (config: AddABTestConfig) => void; + onExit: () => void; + existingTestNames: string[]; + runtimes: RuntimeInfo[]; + httpGatewayDetails: HttpGatewayInfo[]; + existingHttpGateways: string[]; + onlineEvalConfigDetails: OnlineEvalConfigInfo[]; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Builder field indices +// ───────────────────────────────────────────────────────────────────────────── + +const FIELD_TARGET = 0; +const FIELD_WEIGHT = 1; +const FIELD_EVAL = 2; +const FIELD_COUNT = 3; + +// ───────────────────────────────────────────────────────────────────────────── +// VariantColumn sub-component +// ───────────────────────────────────────────────────────────────────────────── + +interface VariantColumnProps { + label: string; + color: string; + isActive: boolean; + focusedField: number | null; + activeField: number | null; + targetInfo: TargetInfo | null; + weight: number; + evalConfigName: string; + targetItems: SelectableItem[]; + targetNavIndex: number; + evalItems: SelectableItem[]; + evalNavIndex: number; + onWeightSubmit: (value: string) => void; + onWeightCancel: () => void; +} + +function VariantColumn({ + label, + color, + isActive, + focusedField, + activeField, + targetInfo, + weight, + evalConfigName, + targetItems, + targetNavIndex, + evalItems, + evalNavIndex, + onWeightSubmit, + onWeightCancel, +}: VariantColumnProps) { + const borderColor = isActive ? color : 'gray'; + + const fieldLabel = (idx: number, text: string, value: string) => { + const isFocused = focusedField === idx; + const isFieldActive = activeField === idx; + const prefix = isFocused || isFieldActive ? '>' : ' '; + const checkmark = value && value !== '(not set)' ? '\u2713 ' : ''; + + return ( + + + {prefix} {text}:{' '} + + + {checkmark} + {value} + + + ); + }; + + return ( + + + {label} + + + {/* Target field */} + {activeField === FIELD_TARGET ? ( + + ) : ( + fieldLabel( + FIELD_TARGET, + 'Target', + targetInfo ? `${targetInfo.name} (${targetInfo.runtimeRef}/${targetInfo.qualifier})` : '(not set)' + ) + )} + + {/* Weight field */} + {activeField === FIELD_WEIGHT ? ( + { + const w = parseInt(value, 10); + if (isNaN(w)) return 'Must be a number'; + if (w < 1 || w > 99) return 'Must be between 1 and 99'; + return true; + }} + /> + ) : ( + fieldLabel(FIELD_WEIGHT, 'Weight', `${weight}%`) + )} + + {/* Eval config field */} + {activeField === FIELD_EVAL ? ( + evalItems.length > 0 ? ( + + ) : ( + + No eval config found for this target. + Press Esc to go back. Create one with: agentcore add online-eval + + ) + ) : ( + fieldLabel(FIELD_EVAL, 'Eval', evalConfigName || '(optional)') + )} + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main Screen +// ───────────────────────────────────────────────────────────────────────────── + +export function TargetBasedABTestScreen({ + onComplete, + onExit, + existingTestNames, + runtimes, + httpGatewayDetails, + existingHttpGateways, + onlineEvalConfigDetails, +}: TargetBasedABTestScreenProps) { + const wizard = useTargetBasedWizard(); + + // ── Name/Description multi-field form ─────────────────────────────────── + type NameField = 'name' | 'description'; + const NAME_FIELDS: NameField[] = ['name', 'description']; + const [activeNameField, setActiveNameField] = useState('name'); + const [nameValue, setNameValue] = useState(''); + const [descriptionValue, setDescriptionValue] = useState(''); + const [nameError, setNameError] = useState(null); + const [gatewayCreateMode, setGatewayCreateMode] = useState(false); + + // Step flags + const isNameStep = wizard.step === 'nameDescription'; + const isGatewayStep = wizard.step === 'gateway'; + const isBuilderStep = wizard.step === 'builder'; + const isEnableStep = wizard.step === 'enableOnCreate'; + const isConfirmStep = wizard.step === 'confirm'; + + // ── Name/Description input handler ───────────────────────────────────── + useInput( + (input, key) => { + if (!isNameStep) return; + + if (key.escape) { + if (activeNameField === 'description') { + setActiveNameField('name'); + } else { + onExit(); + } + return; + } + + if (key.tab || key.upArrow || key.downArrow) { + const idx = NAME_FIELDS.indexOf(activeNameField); + if (key.shift || key.upArrow) { + setActiveNameField(NAME_FIELDS[(idx - 1 + NAME_FIELDS.length) % NAME_FIELDS.length]!); + } else { + setActiveNameField(NAME_FIELDS[(idx + 1) % NAME_FIELDS.length]!); + } + setNameError(null); + return; + } + + if (key.return) { + if (activeNameField === 'name') { + if (!nameValue.trim()) { + setNameError('Name is required'); + return; + } + if (!/^[a-zA-Z][a-zA-Z0-9_]{0,47}$/.test(nameValue.trim())) { + setNameError('Must begin with a letter, alphanumeric + underscores only (max 48 chars)'); + return; + } + if (existingTestNames.includes(nameValue.trim())) { + setNameError(`AB test "${nameValue.trim()}" already exists`); + return; + } + setActiveNameField('description'); + setNameError(null); + return; + } + // On description, submit both + if (!nameValue.trim()) { + setNameError('Name is required'); + setActiveNameField('name'); + return; + } + wizard.setName(nameValue.trim()); + wizard.setDescription(descriptionValue.trim()); + wizard.advanceFromNameDescription(); + return; + } + + // Text input + if (key.backspace || key.delete) { + if (activeNameField === 'name') setNameValue(v => v.slice(0, -1)); + else setDescriptionValue(v => v.slice(0, -1)); + setNameError(null); + return; + } + if (input && !key.ctrl && !key.meta) { + if (activeNameField === 'name') setNameValue(v => v + input); + else setDescriptionValue(v => v + input); + setNameError(null); + } + }, + { isActive: isNameStep } + ); + + // ── Gateway items ─────────────────────────────────────────────────────── + const gatewayItems: SelectableItem[] = useMemo(() => { + const items: SelectableItem[] = []; + for (const gwName of existingHttpGateways) { + items.push({ id: gwName, title: gwName, description: 'Existing HTTP gateway' }); + } + items.push({ + id: '__create__', + title: 'Create new gateway', + description: 'Auto-create for this AB test', + }); + return items; + }, [existingHttpGateways]); + + // ── Target items builder ──────────────────────────────────────────────── + const buildTargetItems = useCallback( + (excludeTarget: TargetInfo | null): SelectableItem[] => { + const items: SelectableItem[] = []; + + // Section 1: Existing targets on the selected gateway + const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); + const existingTargets = selectedGw?.targets ?? []; + if (existingTargets.length > 0) { + items.push({ + id: '__section_existing__', + title: '── Gateway Targets ──', + description: '', + disabled: true, + }); + for (const t of existingTargets) { + if (t.name === excludeTarget?.name) continue; + items.push({ + id: `existing:${t.name}`, + title: t.name, + description: `${t.runtimeRef}/${t.qualifier}`, + }); + } + } + + // Section 2: Runtime endpoints (auto-create targets) + const endpointItems: SelectableItem[] = []; + for (const rt of runtimes) { + for (const ep of rt.endpoints) { + const targetName = `${rt.name}-${ep.name}`; + if (targetName === excludeTarget?.name) continue; + endpointItems.push({ + id: `endpoint:${rt.name}/${ep.name}`, + title: `${rt.name}/${ep.name}`, + description: `v${ep.version}`, + }); + } + } + if (endpointItems.length > 0) { + items.push({ + id: '__section_endpoints__', + title: '── Runtime Endpoints ──\n Select to auto-create target', + description: '', + disabled: true, + spaceBefore: items.length > 0, + }); + items.push(...endpointItems); + } + + return items; + }, + [httpGatewayDetails, runtimes, wizard.config.gateway] + ); + + const controlTargetItems = useMemo(() => buildTargetItems(null), [buildTargetItems]); + const treatmentTargetItems = useMemo( + () => buildTargetItems(wizard.config.controlTargetInfo), + [buildTargetItems, wizard.config.controlTargetInfo] + ); + + // ── Eval items (auto-matched by runtime + endpoint) ───────────────────── + const buildEvalItems = useCallback( + (targetInfo: TargetInfo | null): SelectableItem[] => { + if (!targetInfo) return []; + return onlineEvalConfigDetails + .filter(c => c.agent === targetInfo.runtimeRef && (c.endpoint ?? 'DEFAULT') === targetInfo.qualifier) + .map(c => ({ id: c.name, title: c.name, description: `${c.agent}/${c.endpoint ?? 'DEFAULT'}` })); + }, + [onlineEvalConfigDetails] + ); + + const controlEvalItems = useMemo( + () => buildEvalItems(wizard.config.controlTargetInfo), + [buildEvalItems, wizard.config.controlTargetInfo] + ); + const treatmentEvalItems = useMemo( + () => buildEvalItems(wizard.config.treatmentTargetInfo), + [buildEvalItems, wizard.config.treatmentTargetInfo] + ); + + // Auto-match eval when target is selected and exactly one match exists + useEffect(() => { + if (wizard.config.controlTargetInfo && controlEvalItems.length === 1 && !wizard.config.controlOnlineEval) { + wizard.setControlEval(controlEvalItems[0]!.id); + } + }, [wizard.config.controlTargetInfo, controlEvalItems, wizard.config.controlOnlineEval, wizard.setControlEval]); + + useEffect(() => { + if (wizard.config.treatmentTargetInfo && treatmentEvalItems.length === 1 && !wizard.config.treatmentOnlineEval) { + wizard.setTreatmentEval(treatmentEvalItems[0]!.id); + } + }, [ + wizard.config.treatmentTargetInfo, + treatmentEvalItems, + wizard.config.treatmentOnlineEval, + wizard.setTreatmentEval, + ]); + + // ── Enable items ──────────────────────────────────────────────────────── + const enableItems: SelectableItem[] = useMemo( + () => [ + { id: 'yes', title: 'Yes', description: 'Start the AB test immediately after deploy' }, + { id: 'no', title: 'No', description: 'Create paused — start manually later' }, + ], + [] + ); + + // ── Panel navigation for the builder step ─────────────────────────────── + const panel = usePanelNavigation({ + isActive: isBuilderStep, + fieldCount: FIELD_COUNT, + onExit: () => wizard.goBack(), + onComplete: () => wizard.advance(), + }); + + // ── Target selection handler ──────────────────────────────────────────── + const handleTargetSelect = useCallback( + (column: number, item: SelectableItem) => { + const setter = column === 0 ? wizard.setControlTarget : wizard.setTreatmentTarget; + + if (item.id.startsWith('existing:')) { + const targetName = item.id.replace('existing:', ''); + const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); + const target = selectedGw?.targets.find(t => t.name === targetName); + if (target) { + setter({ name: target.name, runtimeRef: target.runtimeRef, qualifier: target.qualifier }, false); + } + } else if (item.id.startsWith('endpoint:')) { + const path = item.id.replace('endpoint:', ''); + const [runtimeName, endpointName] = path.split('/'); + if (runtimeName && endpointName) { + const autoName = `${runtimeName}-${endpointName}`; + setter({ name: autoName, runtimeRef: runtimeName, qualifier: endpointName }, true); + } + } + panel.deactivate(); + }, + [httpGatewayDetails, wizard.config.gateway, wizard.setControlTarget, wizard.setTreatmentTarget, panel] + ); + + // ── List navigations for builder pickers ──────────────────────────────── + + // Control target picker + const controlTargetNav = useListNavigation({ + items: controlTargetItems, + onSelect: item => handleTargetSelect(0, item), + onExit: () => panel.deactivate(), + isActive: panel.isFieldActive(0, FIELD_TARGET), + isDisabled: item => item.disabled === true, + }); + + // Treatment target picker + const treatmentTargetNav = useListNavigation({ + items: treatmentTargetItems, + onSelect: item => handleTargetSelect(1, item), + onExit: () => panel.deactivate(), + isActive: panel.isFieldActive(1, FIELD_TARGET), + isDisabled: item => item.disabled === true, + }); + + // Control eval picker + const controlEvalNav = useListNavigation({ + items: controlEvalItems, + onSelect: item => { + wizard.setControlEval(item.id); + panel.deactivate(); + }, + onExit: () => panel.deactivate(), + isActive: panel.isFieldActive(0, FIELD_EVAL), + }); + + // Treatment eval picker + const treatmentEvalNav = useListNavigation({ + items: treatmentEvalItems, + onSelect: item => { + wizard.setTreatmentEval(item.id); + panel.deactivate(); + }, + onExit: () => panel.deactivate(), + isActive: panel.isFieldActive(1, FIELD_EVAL), + }); + + // ── Non-builder navigation hooks ──────────────────────────────────────── + + const gatewayNav = useListNavigation({ + items: gatewayItems, + onSelect: item => { + if (item.id === '__create__') { + setGatewayCreateMode(true); + return; + } + wizard.setGateway(item.id, false); + }, + onExit: () => wizard.goBack(), + isActive: isGatewayStep && !gatewayCreateMode, + isDisabled: item => item.disabled === true, + }); + + const enableNav = useListNavigation({ + items: enableItems, + onSelect: item => wizard.setEnableOnCreate(item.id === 'yes'), + onExit: () => wizard.goBack(), + isActive: isEnableStep, + }); + + // Builder "Continue" navigation — when all fields filled, Enter on confirm row advances + const builderContinueItems: SelectableItem[] = useMemo( + () => (wizard.isBuilderComplete ? [{ id: 'continue', title: 'Continue' }] : []), + [wizard.isBuilderComplete] + ); + + const _builderContinueNav = useListNavigation({ + items: builderContinueItems, + onSelect: () => wizard.advance(), + onExit: () => wizard.goBack(), + isActive: false, // Controlled programmatically below + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete(wizard.toAddABTestConfig()), + onExit: () => wizard.goBack(), + isActive: isConfirmStep, + }); + + // ── Help text ─────────────────────────────────────────────────────────── + const isSelectStep = (isGatewayStep && !gatewayCreateMode) || isEnableStep; + const helpText = isSelectStep + ? HELP_TEXT.NAVIGATE_SELECT + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : isBuilderStep + ? 'Tab switch column \u00B7 \u2191\u2193 navigate \u00B7 Enter select \u00B7 Esc back' + : HELP_TEXT.TEXT_INPUT; + + const headerContent = ( + + ); + + // ── Format display helpers ────────────────────────────────────────────── + const formatTargetDisplay = (info: TargetInfo | null, isNew: boolean): string => { + if (!info) return '(not set)'; + const newLabel = isNew ? ' (new)' : ''; + return `${info.name} endpoint=${info.qualifier} runtime=${info.runtimeRef}${newLabel}`; + }; + + // ── Weight submit handlers ────────────────────────────────────────────── + const handleControlWeightSubmit = useCallback( + (value: string) => { + const w = parseInt(value, 10); + if (!isNaN(w) && w >= 1 && w <= 99) { + wizard.setControlWeight(w); + } + panel.deactivate(); + }, + [wizard, panel] + ); + + const handleTreatmentWeightSubmit = useCallback( + (value: string) => { + const w = parseInt(value, 10); + if (!isNaN(w) && w >= 1 && w <= 99) { + // Treatment weight setter: set control to 100 - treatment + wizard.setControlWeight(100 - w); + } + panel.deactivate(); + }, + [wizard, panel] + ); + + const handleWeightCancel = useCallback(() => { + panel.deactivate(); + }, [panel]); + + return ( + + + {/* ── Step 1: Name + Description ─────────────────────── */} + {isNameStep && ( + + + {'Name: '} + {activeNameField === 'name' && !nameValue && } + + {nameValue || {'e.g., my-ab-test'}} + + {activeNameField === 'name' && nameValue ? : null} + + + {'Description: '} + {activeNameField === 'description' && !descriptionValue && } + + {descriptionValue || {'(optional)'}} + + {activeNameField === 'description' && descriptionValue ? : null} + + {nameError && ( + + {nameError} + + )} + + )} + + {/* ── Step 2: Gateway ────────────────────────────────── */} + {isGatewayStep && !gatewayCreateMode && ( + + )} + {isGatewayStep && gatewayCreateMode && ( + { + wizard.setGateway(name, true); + setGatewayCreateMode(false); + }} + onCancel={() => setGatewayCreateMode(false)} + /> + )} + + {/* ── Step 3: Side-by-Side Builder ───────────────────── */} + {isBuilderStep && ( + + + } + right={ + + } + /> + {wizard.isBuilderComplete && ( + + + {'\u2713'} All fields configured. Press Enter to continue, or adjust values above. + + + )} + {!wizard.isBuilderComplete && ( + + Configure both columns, then press Enter to continue. + + )} + + )} + + {/* ── Step 4: Enable on Create ───────────────────────── */} + {isEnableStep && ( + + )} + + {/* ── Step 5: Confirm ────────────────────────────────── */} + {isConfirmStep && ( + + )} + + + ); +} diff --git a/src/cli/tui/screens/ab-test/VariantConfigForm.tsx b/src/cli/tui/screens/ab-test/VariantConfigForm.tsx new file mode 100644 index 000000000..61f465323 --- /dev/null +++ b/src/cli/tui/screens/ab-test/VariantConfigForm.tsx @@ -0,0 +1,268 @@ +import type { SelectableItem } from '../../components'; +import { TextInput, WizardSelect } from '../../components'; +import { useListNavigation } from '../../hooks'; +import { Box, Text } from 'ink'; +import React, { useCallback, useMemo, useState } from 'react'; + +type VariantSubField = 'controlBundle' | 'controlVersion' | 'treatmentBundle' | 'treatmentVersion' | 'treatmentWeight'; + +const SUB_FIELDS: VariantSubField[] = [ + 'controlBundle', + 'controlVersion', + 'treatmentBundle', + 'treatmentVersion', + 'treatmentWeight', +]; + +export interface VariantConfig { + controlBundle: string; + controlVersion: string; + treatmentBundle: string; + treatmentVersion: string; + treatmentWeight: number; +} + +export type VersionLoadState = 'idle' | 'loading' | 'loaded' | 'error'; + +interface VariantConfigFormProps { + bundleItems: SelectableItem[]; + fetchVersionItems: (bundleName: string) => void; + controlVersionItems: SelectableItem[]; + treatmentVersionItems: SelectableItem[]; + controlVersionLoadState: VersionLoadState; + treatmentVersionLoadState: VersionLoadState; + onComplete: (config: VariantConfig) => void; + onCancel: () => void; + onCreateBundle?: () => void; +} + +export function VariantConfigForm({ + bundleItems, + fetchVersionItems, + controlVersionItems, + treatmentVersionItems, + controlVersionLoadState, + treatmentVersionLoadState, + onComplete, + onCancel, + onCreateBundle, +}: VariantConfigFormProps) { + const [activeField, setActiveField] = useState('controlBundle'); + const [controlBundle, setControlBundle] = useState(''); + const [controlVersion, setControlVersion] = useState(''); + const [treatmentBundle, setTreatmentBundle] = useState(''); + const [treatmentVersion, setTreatmentVersion] = useState(''); + const [treatmentWeight, setTreatmentWeight] = useState('20'); + + const augmentedBundleItems: SelectableItem[] = useMemo(() => { + const items: SelectableItem[] = []; + if (onCreateBundle) { + items.push({ id: '__create_bundle__', title: 'Create new config bundle', description: 'Add a new bundle first' }); + } + items.push(...bundleItems); + return items; + }, [bundleItems, onCreateBundle]); + + const advanceField = useCallback(() => { + const idx = SUB_FIELDS.indexOf(activeField); + const next = SUB_FIELDS[idx + 1]; + if (next) setActiveField(next); + }, [activeField]); + + // Navigation for each select sub-field + const controlBundleNav = useListNavigation({ + items: augmentedBundleItems, + onSelect: item => { + if (item.id === '__create_bundle__') { + onCreateBundle?.(); + return; + } + setControlBundle(item.id); + fetchVersionItems(item.id); + advanceField(); + }, + onExit: onCancel, + isActive: activeField === 'controlBundle', + }); + + const controlVersionNav = useListNavigation({ + items: controlVersionItems, + onSelect: item => { + setControlVersion(item.id); + advanceField(); + }, + onExit: () => setActiveField('controlBundle'), + isActive: activeField === 'controlVersion' && controlVersionLoadState === 'loaded', + }); + + const treatmentBundleNav = useListNavigation({ + items: augmentedBundleItems, + onSelect: item => { + if (item.id === '__create_bundle__') { + onCreateBundle?.(); + return; + } + setTreatmentBundle(item.id); + fetchVersionItems(item.id); + advanceField(); + }, + onExit: () => setActiveField('controlVersion'), + isActive: activeField === 'treatmentBundle', + }); + + const treatmentVersionNav = useListNavigation({ + items: treatmentVersionItems, + onSelect: item => { + setTreatmentVersion(item.id); + advanceField(); + }, + onExit: () => setActiveField('treatmentBundle'), + isActive: activeField === 'treatmentVersion' && treatmentVersionLoadState === 'loaded', + }); + + const controlWeight = 100 - parseInt(treatmentWeight || '0', 10); + + const completedValue = (value: string, label: string) => ( + + {label}: + {value || '(pending)'} + {value && } + + ); + + const pendingValue = (label: string) => ( + + {label}: + (pending) + + ); + + const renderVersionField = ( + isActive: boolean, + loadState: VersionLoadState, + items: SelectableItem[], + nav: { selectedIndex: number }, + title: string, + completedVersion: string, + label: string + ) => { + if (!isActive) { + return completedVersion ? completedValue(completedVersion.slice(0, 8), label) : pendingValue(label); + } + + switch (loadState) { + case 'loading': + return {label}: Loading versions...; + case 'error': + return {label}: Failed to load versions. Press Esc to go back and retry.; + case 'loaded': + if (items.length === 0) { + return {label}: No versions found. Deploy the config bundle first.; + } + return ; + default: + return {label}: Waiting...; + } + }; + + return ( + + Configure Variants + + {/* Control section */} + + + Control (C): + + + {activeField === 'controlBundle' ? ( + augmentedBundleItems.length > 0 ? ( + + ) : ( + No deployed config bundles found. + ) + ) : ( + completedValue(controlBundle, ' Bundle') + )} + + {renderVersionField( + activeField === 'controlVersion', + controlVersionLoadState, + controlVersionItems, + controlVersionNav, + ' Select control version', + controlVersion, + ' Version' + )} + + + {/* Treatment section */} + + + Treatment (T1): + + + {activeField === 'treatmentBundle' ? ( + + ) : treatmentBundle ? ( + completedValue(treatmentBundle, ' Bundle') + ) : ( + pendingValue(' Bundle') + )} + + {renderVersionField( + activeField === 'treatmentVersion', + treatmentVersionLoadState, + treatmentVersionItems, + treatmentVersionNav, + ' Select treatment version', + treatmentVersion, + ' Version' + )} + + {activeField === 'treatmentWeight' ? ( + + setTreatmentWeight(value)} + onSubmit={value => { + const n = parseInt(value, 10); + if (!isNaN(n) && n >= 1 && n <= 99) { + setTreatmentWeight(value); + onComplete({ + controlBundle, + controlVersion, + treatmentBundle, + treatmentVersion, + treatmentWeight: n, + }); + } + }} + onCancel={() => setActiveField('treatmentVersion')} + customValidation={(value: string) => { + const n = parseInt(value, 10); + if (isNaN(n)) return 'Must be a number'; + if (n < 1 || n > 99) return 'Must be between 1 and 99'; + return true; + }} + /> + + ) : treatmentWeight && treatmentVersion ? ( + completedValue(`${treatmentWeight}% (control: ${controlWeight}%)`, ' Weight') + ) : ( + pendingValue(' Weight') + )} + + + ); +} diff --git a/src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx b/src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx new file mode 100644 index 000000000..082d7662b --- /dev/null +++ b/src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx @@ -0,0 +1,286 @@ +import type { VariantConfig } from '../VariantConfigForm'; +import type { GatewayChoice } from '../types'; +import type { StepSkipCheck } from '../useAddABTestWizard'; +import { useAddABTestWizard } from '../useAddABTestWizard'; +import { Text } from 'ink'; +import { render } from 'ink-testing-library'; +import React, { act, useImperativeHandle } from 'react'; +import { describe, expect, it } from 'vitest'; + +// ── Simple harness ───────────────────────────────────────────────────────── + +function Harness() { + const wizard = useAddABTestWizard(); + return ( + + step:{wizard.step} + name:{wizard.config.name} + treatmentWeight:{wizard.config.treatmentWeight} + enableOnCreate:{String(wizard.config.enableOnCreate)} + steps:{wizard.steps.join(',')} + + ); +} + +// ── Imperative harness ───────────────────────────────────────────────────── + +interface HarnessHandle { + setName: (name: string) => void; + setDescription: (desc: string) => void; + setAgent: (agent: string) => void; + setGateway: (choice: GatewayChoice) => void; + setVariants: (vc: VariantConfig) => void; + setOnlineEval: (eval_: string) => void; + setMaxDuration: (days: number | undefined) => void; + setEnableOnCreate: (enable: boolean) => void; + setSkipCheck: (check: StepSkipCheck) => void; + goBack: () => void; + reset: () => void; +} + +const ImperativeHarness = React.forwardRef((_, ref) => { + const wizard = useAddABTestWizard(); + useImperativeHandle(ref, () => ({ + setName: wizard.setName, + setDescription: wizard.setDescription, + setAgent: wizard.setAgent, + setGateway: wizard.setGateway, + setVariants: wizard.setVariants, + setOnlineEval: wizard.setOnlineEval, + setMaxDuration: wizard.setMaxDuration, + setEnableOnCreate: wizard.setEnableOnCreate, + setSkipCheck: wizard.setSkipCheck, + goBack: wizard.goBack, + reset: wizard.reset, + })); + return ( + + step:{wizard.step} + name:{wizard.config.name} + description:{wizard.config.description} + agent:{wizard.config.agent} + controlBundle:{wizard.config.controlBundle} + treatmentWeight:{wizard.config.treatmentWeight} + onlineEval:{wizard.config.onlineEval} + maxDuration:{String(wizard.config.maxDuration ?? 'undefined')} + enableOnCreate:{String(wizard.config.enableOnCreate)} + + ); +}); +ImperativeHarness.displayName = 'ImperativeHarness'; + +// ── Tests ────────────────────────────────────────────────────────────────── + +describe('useAddABTestWizard', () => { + describe('defaults', () => { + it('default step is mode', () => { + const { lastFrame } = render(); + expect(lastFrame()).toContain('step:mode'); + }); + + it('default treatment weight is 20', () => { + const { lastFrame } = render(); + expect(lastFrame()).toContain('treatmentWeight:20'); + }); + + it('default enableOnCreate is true', () => { + const { lastFrame } = render(); + expect(lastFrame()).toContain('enableOnCreate:true'); + }); + + it('has all 10 steps', () => { + const { lastFrame } = render(); + const frame = lastFrame()!.replace(/\n/g, ''); + expect(frame).toContain( + 'steps:mode,name,description,gateway,agent,variants,onlineEval,maxDuration,enableOnCreate,confirm' + ); + }); + }); + + describe('step navigation', () => { + it('setName advances to description', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('Test1')); + + expect(lastFrame()).toContain('step:description'); + expect(lastFrame()).toContain('name:Test1'); + }); + + it('setDescription advances to gateway', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('Test1')); + act(() => ref.current!.setDescription('desc')); + + expect(lastFrame()).toContain('step:gateway'); + expect(lastFrame()).toContain('description:desc'); + }); + + it('setGateway advances to agent', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('T')); + act(() => ref.current!.setDescription('')); + act(() => ref.current!.setGateway({ type: 'create-new' })); + + expect(lastFrame()).toContain('step:agent'); + }); + + it('setAgent advances to variants', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('T')); + act(() => ref.current!.setDescription('')); + act(() => ref.current!.setGateway({ type: 'create-new' })); + act(() => ref.current!.setAgent('my-agent')); + + expect(lastFrame()).toContain('step:variants'); + expect(lastFrame()).toContain('agent:my-agent'); + }); + + it('setVariants advances to onlineEval', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('T')); + act(() => ref.current!.setDescription('')); + act(() => ref.current!.setGateway({ type: 'create-new' })); + act(() => ref.current!.setAgent('my-agent')); + act(() => + ref.current!.setVariants({ + controlBundle: 'cb', + controlVersion: 'v1', + treatmentBundle: 'tb', + treatmentVersion: 'v2', + treatmentWeight: 30, + }) + ); + + expect(lastFrame()).toContain('step:onlineEval'); + expect(lastFrame()).toContain('controlBundle:cb'); + expect(lastFrame()).toContain('treatmentWeight:30'); + }); + + it('full wizard reaches confirm step', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('T')); + act(() => ref.current!.setDescription('')); + act(() => ref.current!.setGateway({ type: 'create-new' })); + act(() => ref.current!.setAgent('my-agent')); + act(() => + ref.current!.setVariants({ + controlBundle: 'cb', + controlVersion: 'v1', + treatmentBundle: 'tb', + treatmentVersion: 'v2', + treatmentWeight: 25, + }) + ); + act(() => ref.current!.setOnlineEval('eval-arn')); + act(() => ref.current!.setMaxDuration(30)); + act(() => ref.current!.setEnableOnCreate(false)); + + const frame = lastFrame()!.replace(/\n/g, ''); + expect(frame).toContain('step:confirm'); + expect(frame).toContain('enableOnCreate:false'); + expect(frame).toContain('maxDuration:30'); + }); + }); + + describe('goBack', () => { + it('goes back from description to name', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('T')); + expect(lastFrame()).toContain('step:description'); + + act(() => ref.current!.goBack()); + expect(lastFrame()).toContain('step:name'); + }); + + it('does not go back from first step', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.goBack()); + expect(lastFrame()).toContain('step:mode'); + }); + }); + + describe('reset', () => { + it('resets to initial state', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('Test1')); + act(() => ref.current!.setDescription('desc')); + expect(lastFrame()).toContain('step:gateway'); + + act(() => ref.current!.reset()); + + expect(lastFrame()).toContain('step:mode'); + expect(lastFrame()).toContain('name:'); + expect(lastFrame()).toContain('treatmentWeight:20'); + }); + }); + + describe('skip check', () => { + it('advance skips over steps marked as skippable', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setSkipCheck(s => s === 'gateway')); + act(() => ref.current!.setName('T')); + act(() => ref.current!.setDescription('')); + act(() => ref.current!.setAgent('my-agent')); + + expect(lastFrame()).toContain('step:variants'); + }); + + it('goBack skips over steps marked as skippable', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('T')); + act(() => ref.current!.setDescription('')); + act(() => ref.current!.setGateway({ type: 'create-new' })); + act(() => ref.current!.setAgent('my-agent')); + expect(lastFrame()).toContain('step:variants'); + + act(() => ref.current!.setSkipCheck(s => s === 'agent')); + act(() => ref.current!.goBack()); + + expect(lastFrame()).toContain('step:gateway'); + }); + + it('advance skips multiple consecutive skippable steps', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setSkipCheck(s => s === 'agent' || s === 'variants')); + act(() => ref.current!.setName('T')); + act(() => ref.current!.setDescription('')); + act(() => ref.current!.setGateway({ type: 'create-new' })); + + expect(lastFrame()).toContain('step:onlineEval'); + }); + + it('skip check does not affect non-skippable steps', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setSkipCheck(() => false)); + act(() => ref.current!.setName('T')); + + expect(lastFrame()).toContain('step:description'); + }); + }); +}); diff --git a/src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx b/src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx new file mode 100644 index 000000000..4ea0a40d5 --- /dev/null +++ b/src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx @@ -0,0 +1,319 @@ +import type { TargetInfo } from '../types'; +import { useTargetBasedWizard } from '../useTargetBasedWizard'; +import { Text } from 'ink'; +import { render } from 'ink-testing-library'; +import React, { act, useImperativeHandle } from 'react'; +import { describe, expect, it } from 'vitest'; + +// ── Simple harness ───────────────────────────────────────────────────────── + +function Harness() { + const wizard = useTargetBasedWizard(); + return ( + + step:{wizard.step} + name:{wizard.config.name} + description:{wizard.config.description} + gateway:{wizard.config.gateway} + controlWeight:{wizard.config.controlWeight} + treatmentWeight:{wizard.config.treatmentWeight} + enableOnCreate:{String(wizard.config.enableOnCreate)} + + ); +} + +// ── Imperative harness ───────────────────────────────────────────────────── + +interface HarnessHandle { + setName: (name: string) => void; + setDescription: (desc: string) => void; + advanceFromNameDescription: () => void; + setGateway: (name: string, isNew: boolean) => void; + advance: () => void; + goBack: () => void; + setControlTarget: (target: TargetInfo, isNew: boolean) => void; + setTreatmentTarget: (target: TargetInfo, isNew: boolean) => void; + setControlWeight: (w: number) => void; + setControlEval: (name: string) => void; + setTreatmentEval: (name: string) => void; + setEnableOnCreate: (enable: boolean) => void; + toAddABTestConfig: ReturnType['toAddABTestConfig']; +} + +const ImperativeHarness = React.forwardRef((_, ref) => { + const wizard = useTargetBasedWizard(); + useImperativeHandle(ref, () => ({ + setName: wizard.setName, + setDescription: wizard.setDescription, + advanceFromNameDescription: wizard.advanceFromNameDescription, + setGateway: wizard.setGateway, + advance: wizard.advance, + goBack: wizard.goBack, + setControlTarget: wizard.setControlTarget, + setTreatmentTarget: wizard.setTreatmentTarget, + setControlWeight: wizard.setControlWeight, + setControlEval: wizard.setControlEval, + setTreatmentEval: wizard.setTreatmentEval, + setEnableOnCreate: wizard.setEnableOnCreate, + toAddABTestConfig: wizard.toAddABTestConfig, + })); + const ctrlName = wizard.config.controlTargetInfo ? wizard.config.controlTargetInfo.name : 'null'; + const treatName = wizard.config.treatmentTargetInfo ? wizard.config.treatmentTargetInfo.name : 'null'; + return ( + + {[ + `step:${wizard.step}`, + `name:${wizard.config.name}`, + `description:${wizard.config.description}`, + `gateway:${wizard.config.gateway}`, + `gatewayIsNew:${String(wizard.config.gatewayIsNew)}`, + `controlWeight:${wizard.config.controlWeight}`, + `treatmentWeight:${wizard.config.treatmentWeight}`, + `controlOnlineEval:${wizard.config.controlOnlineEval}`, + `treatmentOnlineEval:${wizard.config.treatmentOnlineEval}`, + `enableOnCreate:${String(wizard.config.enableOnCreate)}`, + `controlTargetInfo:${ctrlName}`, + `treatmentTargetInfo:${treatName}`, + ].join('|')} + + ); +}); +ImperativeHarness.displayName = 'ImperativeHarness'; + +// ── Tests ────────────────────────────────────────────────────────────────── + +describe('useTargetBasedWizard', () => { + describe('defaults', () => { + it('initial step is nameDescription', () => { + const { lastFrame } = render(); + expect(lastFrame()).toContain('step:nameDescription'); + }); + + it('default weights are 90/10', () => { + const { lastFrame } = render(); + expect(lastFrame()).toContain('controlWeight:90'); + expect(lastFrame()).toContain('treatmentWeight:10'); + }); + + it('default enableOnCreate is true', () => { + const { lastFrame } = render(); + expect(lastFrame()).toContain('enableOnCreate:true'); + }); + }); + + describe('step navigation', () => { + it('advanceFromNameDescription moves to gateway step', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.advanceFromNameDescription()); + + expect(lastFrame()).toContain('step:gateway'); + }); + + it('advance from gateway moves to builder', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.advanceFromNameDescription()); + // setGateway auto-advances to builder + act(() => ref.current!.setGateway('my-gw', false)); + + expect(lastFrame()).toContain('step:builder'); + }); + + it('advance from builder moves to enableOnCreate', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.advanceFromNameDescription()); + act(() => ref.current!.setGateway('my-gw', false)); + // Now at builder, advance to enableOnCreate + act(() => ref.current!.advance()); + + expect(lastFrame()).toContain('step:enableOnCreate'); + }); + + it('advance from enableOnCreate moves to confirm', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.advanceFromNameDescription()); + act(() => ref.current!.setGateway('my-gw', false)); + act(() => ref.current!.advance()); // builder → enableOnCreate + act(() => ref.current!.setEnableOnCreate(true)); // enableOnCreate → confirm + + expect(lastFrame()).toContain('step:confirm'); + }); + }); + + describe('goBack', () => { + it('goBack from gateway goes to nameDescription', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.advanceFromNameDescription()); + expect(lastFrame()).toContain('step:gateway'); + + act(() => ref.current!.goBack()); + expect(lastFrame()).toContain('step:nameDescription'); + }); + + it('goBack from builder goes to gateway', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.advanceFromNameDescription()); + act(() => ref.current!.setGateway('my-gw', false)); + expect(lastFrame()).toContain('step:builder'); + + act(() => ref.current!.goBack()); + expect(lastFrame()).toContain('step:gateway'); + }); + }); + + describe('config updates', () => { + it('setName updates config', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setName('MyTest')); + + expect(lastFrame()).toContain('name:MyTest'); + }); + + it('setDescription updates config', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setDescription('desc1')); + + expect(lastFrame()).toContain('description:desc1'); + }); + + it('setGateway updates config', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.advanceFromNameDescription()); + act(() => ref.current!.setGateway('gw-123', true)); + + expect(lastFrame()).toContain('gateway:gw-123'); + expect(lastFrame()).toContain('gatewayIsNew:true'); + }); + + it('setControlTarget updates config with targetInfo', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + const target: TargetInfo = { name: 'ctrl-target', runtimeRef: 'arn:runtime:1', qualifier: 'DEFAULT' }; + act(() => ref.current!.setControlTarget(target, false)); + + expect(lastFrame()).toContain('controlTargetInfo:ctrl-target'); + }); + + it('setTreatmentTarget updates config with targetInfo', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + const target: TargetInfo = { name: 'tt1', runtimeRef: 'arn:runtime:2', qualifier: 'v2' }; + act(() => ref.current!.setTreatmentTarget(target, true)); + + const frame = lastFrame()!.replace(/\n/g, ''); + expect(frame).toContain('treatmentTargetInfo:tt1'); + }); + + it('setControlWeight updates config (sum to 100)', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setControlWeight(70)); + + const frame = lastFrame()!.replace(/\n/g, ''); + expect(frame).toContain('controlWeight:70'); + expect(frame).toContain('treatmentWeight:30'); + }); + + it('setControlEval updates config', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setControlEval('eval-arn-1')); + + expect(lastFrame()).toContain('controlOnlineEval:eval-arn-1'); + }); + + it('setTreatmentEval updates config', () => { + const ref = React.createRef(); + const { lastFrame } = render(); + + act(() => ref.current!.setTreatmentEval('eval-arn-2')); + + expect(lastFrame()).toContain('treatmentOnlineEval:eval-arn-2'); + }); + }); + + describe('toAddABTestConfig', () => { + it('returns correct AddABTestConfig shape', () => { + const ref = React.createRef(); + render(); + + const controlTarget: TargetInfo = { name: 'ctrl', runtimeRef: 'arn:runtime:1', qualifier: 'DEFAULT' }; + const treatmentTarget: TargetInfo = { name: 'treat', runtimeRef: 'arn:runtime:2', qualifier: 'v2' }; + + act(() => ref.current!.setName('TestAB')); + act(() => ref.current!.setDescription('A/B test')); + act(() => ref.current!.advanceFromNameDescription()); + act(() => ref.current!.setGateway('my-gateway', false)); + act(() => ref.current!.setControlTarget(controlTarget, false)); + act(() => ref.current!.setTreatmentTarget(treatmentTarget, true)); + act(() => ref.current!.setControlWeight(80)); + act(() => ref.current!.setControlEval('eval-1')); + act(() => ref.current!.setTreatmentEval('eval-2')); + + let config: ReturnType | undefined; + act(() => { + config = ref.current!.toAddABTestConfig(); + }); + + expect(config).toBeDefined(); + expect(config!.mode).toBe('target-based'); + expect(config!.name).toBe('TestAB'); + expect(config!.description).toBe('A/B test'); + expect(config!.gateway).toBe('my-gateway'); + expect(config!.gatewayIsNew).toBe(false); + expect(config!.gatewayChoice).toEqual({ type: 'existing-http', name: 'my-gateway' }); + expect(config!.controlTargetInfo).toEqual(controlTarget); + expect(config!.controlTargetIsNew).toBe(false); + expect(config!.treatmentTargetInfo).toEqual(treatmentTarget); + expect(config!.treatmentTargetIsNew).toBe(true); + expect(config!.controlWeight).toBe(80); + expect(config!.treatmentWeight).toBe(20); + expect(config!.controlOnlineEval).toBe('eval-1'); + expect(config!.treatmentOnlineEval).toBe('eval-2'); + expect(config!.runtime).toBe('arn:runtime:1'); + expect(config!.controlTarget).toBe('ctrl'); + expect(config!.controlEndpoint).toBe('DEFAULT'); + expect(config!.treatmentTarget).toBe('treat'); + expect(config!.treatmentEndpoint).toBe('v2'); + expect(config!.enableOnCreate).toBe(true); + expect(config!.evaluators).toEqual([]); + expect(config!.samplingRate).toBe(10); + }); + + it('returns create-new gatewayChoice when gatewayIsNew is true', () => { + const ref = React.createRef(); + render(); + + act(() => ref.current!.advanceFromNameDescription()); + act(() => ref.current!.setGateway('new-gw', true)); + + let config: ReturnType | undefined; + act(() => { + config = ref.current!.toAddABTestConfig(); + }); + + expect(config!.gatewayChoice).toEqual({ type: 'create-new' }); + }); + }); +}); diff --git a/src/cli/tui/screens/ab-test/index.ts b/src/cli/tui/screens/ab-test/index.ts new file mode 100644 index 000000000..162b24eb9 --- /dev/null +++ b/src/cli/tui/screens/ab-test/index.ts @@ -0,0 +1,4 @@ +export { AddABTestFlow } from './AddABTestFlow'; +export { ABTestDetailScreen } from './ABTestDetailScreen'; +export { ABTestPickerScreen } from './ABTestPickerScreen'; +export { RemoveABTestScreen } from './RemoveABTestScreen'; diff --git a/src/cli/tui/screens/ab-test/types.ts b/src/cli/tui/screens/ab-test/types.ts new file mode 100644 index 000000000..977a2ca07 --- /dev/null +++ b/src/cli/tui/screens/ab-test/types.ts @@ -0,0 +1,89 @@ +// ───────────────────────────────────────────────────────────────────────────── +// AB Test Wizard Types +// ───────────────────────────────────────────────────────────────────────────── + +export type ABTestMode = 'config-bundle' | 'target-based'; + +export type AddABTestStep = + | 'mode' + | 'name' + | 'description' + | 'agent' + | 'gateway' + | 'variants' + | 'controlTarget' + | 'treatmentTarget' + | 'weights' + | 'evalPath' + | 'evalSelect' + | 'evalCreate' + | 'evalSamplingRate' + | 'onlineEval' + | 'maxDuration' + | 'enableOnCreate' + | 'confirm'; + +export type GatewayChoice = { type: 'create-new' } | { type: 'existing-http'; name: string }; + +/** Rich target info for target-based AB testing. */ +export interface TargetInfo { + name: string; + runtimeRef: string; + qualifier: string; +} + +export interface AddABTestConfig { + mode: ABTestMode; + name: string; + description: string; + agent: string; + gatewayChoice: GatewayChoice; + // Config-bundle mode + controlBundle: string; + controlVersion: string; + treatmentBundle: string; + treatmentVersion: string; + treatmentWeight: number; + onlineEval: string; + // Target-based mode fields + gateway: string; + gatewayIsNew: boolean; + controlTargetInfo: TargetInfo | null; + controlTargetIsNew: boolean; + treatmentTargetInfo: TargetInfo | null; + treatmentTargetIsNew: boolean; + // Legacy target-based fields (populated from TargetInfo for downstream compatibility) + runtime: string; + controlTarget: string; + controlEndpoint: string; + treatmentTarget: string; + treatmentEndpoint: string; + controlWeight: number; + controlOnlineEval: string; + treatmentOnlineEval: string; + evaluators: string[]; + samplingRate: number; + // Shared + maxDuration: number | undefined; + enableOnCreate: boolean; +} + +export const AB_TEST_STEP_LABELS: Record = { + mode: 'Mode', + name: 'Name', + description: 'Description', + agent: 'Agent', + gateway: 'Gateway', + variants: 'Variants', + controlTarget: 'Control', + treatmentTarget: 'Treatment', + weights: 'Weights', + evalPath: 'Eval', + evalSelect: 'Eval', + evalCreate: 'Eval', + evalSamplingRate: 'Eval', + onlineEval: 'Eval', + maxDuration: 'Duration', + enableOnCreate: 'Enable', + confirm: 'Confirm', +}; diff --git a/src/cli/tui/screens/ab-test/useAddABTestWizard.ts b/src/cli/tui/screens/ab-test/useAddABTestWizard.ts new file mode 100644 index 000000000..bb4fef0ad --- /dev/null +++ b/src/cli/tui/screens/ab-test/useAddABTestWizard.ts @@ -0,0 +1,324 @@ +import type { VariantConfig } from './VariantConfigForm'; +import type { ABTestMode, AddABTestConfig, AddABTestStep, GatewayChoice, TargetInfo } from './types'; +import { useCallback, useRef, useState } from 'react'; + +const CONFIG_BUNDLE_STEPS: AddABTestStep[] = [ + 'mode', + 'name', + 'description', + 'gateway', + 'agent', + 'variants', + 'onlineEval', + 'maxDuration', + 'enableOnCreate', + 'confirm', +]; + +const TARGET_BASED_STEPS: AddABTestStep[] = [ + 'mode', + 'name', + 'description', + 'gateway', + 'controlTarget', + 'treatmentTarget', + 'weights', + 'evalSelect', + 'enableOnCreate', + 'confirm', +]; + +function getDefaultConfig(): AddABTestConfig { + return { + mode: 'config-bundle', + name: '', + description: '', + agent: '', + gatewayChoice: { type: 'create-new' }, + controlBundle: '', + controlVersion: '', + treatmentBundle: '', + treatmentVersion: '', + treatmentWeight: 20, + onlineEval: '', + // Target-based mode fields + gateway: '', + gatewayIsNew: false, + controlTargetInfo: null, + controlTargetIsNew: false, + treatmentTargetInfo: null, + treatmentTargetIsNew: false, + // Legacy target-based fields + runtime: '', + controlTarget: '', + controlEndpoint: '', + treatmentTarget: '', + treatmentEndpoint: '', + controlWeight: 90, + controlOnlineEval: '', + treatmentOnlineEval: '', + evaluators: [], + samplingRate: 10, + maxDuration: undefined, + enableOnCreate: true, + }; +} + +export type StepSkipCheck = (step: AddABTestStep) => boolean; + +export function useAddABTestWizard() { + const [config, setConfig] = useState(getDefaultConfig); + const [step, setStep] = useState('mode'); + const skipCheckRef = useRef(() => false); + + const getSteps = useCallback((): AddABTestStep[] => { + return config.mode === 'target-based' ? TARGET_BASED_STEPS : CONFIG_BUNDLE_STEPS; + }, [config.mode]); + + const currentIndex = getSteps().indexOf(step); + + const setSkipCheck = useCallback((check: StepSkipCheck) => { + skipCheckRef.current = check; + }, []); + + const goBack = useCallback(() => { + const steps = getSteps(); + for (let i = currentIndex - 1; i >= 0; i--) { + if (!skipCheckRef.current(steps[i]!)) { + setStep(steps[i]!); + return; + } + } + }, [currentIndex, getSteps]); + + const nextStep = useCallback( + (currentStepName: AddABTestStep): AddABTestStep | undefined => { + const steps = getSteps(); + const idx = steps.indexOf(currentStepName); + for (let i = idx + 1; i < steps.length; i++) { + if (!skipCheckRef.current(steps[i]!)) { + return steps[i]!; + } + } + return undefined; + }, + [getSteps] + ); + + const advance = useCallback( + (from: AddABTestStep) => { + const next = nextStep(from); + if (next) setStep(next); + }, + [nextStep] + ); + + const setMode = useCallback( + (mode: ABTestMode) => { + setConfig(c => ({ ...c, mode })); + advance('mode'); + }, + [advance] + ); + + const setName = useCallback( + (name: string) => { + setConfig(c => ({ ...c, name })); + advance('name'); + }, + [advance] + ); + + const setDescription = useCallback( + (description: string) => { + setConfig(c => ({ ...c, description })); + advance('description'); + }, + [advance] + ); + + const setAgent = useCallback( + (agent: string) => { + setConfig(c => ({ ...c, agent })); + advance('agent'); + }, + [advance] + ); + + const setGateway = useCallback( + (gatewayChoice: GatewayChoice) => { + setConfig(c => ({ + ...c, + gatewayChoice, + gateway: gatewayChoice.type === 'existing-http' ? gatewayChoice.name : '', + gatewayIsNew: gatewayChoice.type === 'create-new', + })); + advance('gateway'); + }, + [advance] + ); + + const setGatewayWithName = useCallback( + (gatewayName: string, isNew: boolean) => { + const gatewayChoice: GatewayChoice = isNew + ? { type: 'create-new' } + : { type: 'existing-http', name: gatewayName }; + setConfig(c => ({ + ...c, + gatewayChoice, + gateway: gatewayName, + gatewayIsNew: isNew, + })); + advance('gateway'); + }, + [advance] + ); + + const setVariants = useCallback( + (variantConfig: VariantConfig) => { + setConfig(c => ({ + ...c, + controlBundle: variantConfig.controlBundle, + controlVersion: variantConfig.controlVersion, + treatmentBundle: variantConfig.treatmentBundle, + treatmentVersion: variantConfig.treatmentVersion, + treatmentWeight: variantConfig.treatmentWeight, + })); + advance('variants'); + }, + [advance] + ); + + const setOnlineEval = useCallback( + (onlineEval: string) => { + setConfig(c => ({ ...c, onlineEval })); + advance('onlineEval'); + }, + [advance] + ); + + // Target-based mode setters + + const setControlTarget = useCallback( + (target: TargetInfo, isNew: boolean) => { + setConfig(c => ({ + ...c, + controlTargetInfo: target, + controlTargetIsNew: isNew, + controlTarget: target.name, + controlEndpoint: target.qualifier, + runtime: target.runtimeRef, + })); + advance('controlTarget'); + }, + [advance] + ); + + const setTreatmentTarget = useCallback( + (target: TargetInfo, isNew: boolean) => { + setConfig(c => ({ + ...c, + treatmentTargetInfo: target, + treatmentTargetIsNew: isNew, + treatmentTarget: target.name, + treatmentEndpoint: target.qualifier, + // Keep runtime from control if already set, otherwise use treatment's + runtime: c.runtime || target.runtimeRef, + })); + advance('treatmentTarget'); + }, + [advance] + ); + + const setWeights = useCallback( + (controlWeight: number, treatmentWeight: number) => { + setConfig(c => ({ ...c, controlWeight, treatmentWeight })); + advance('weights'); + }, + [advance] + ); + + const setEvalPath = useCallback( + (path: 'select' | 'create') => { + if (path === 'select') { + advance('evalPath'); + } else { + // Skip evalSelect, go to evalCreate + setStep('evalCreate'); + } + }, + [advance] + ); + + const setEvalSelect = useCallback( + (controlEval: string, treatmentEval: string) => { + setConfig(c => ({ ...c, controlOnlineEval: controlEval, treatmentOnlineEval: treatmentEval })); + advance('evalSelect'); + }, + [advance] + ); + + const setEvaluators = useCallback( + (evaluators: string[]) => { + setConfig(c => ({ ...c, evaluators })); + advance('evalCreate'); + }, + [advance] + ); + + const setSamplingRate = useCallback( + (samplingRate: number) => { + setConfig(c => ({ ...c, samplingRate })); + advance('evalSamplingRate'); + }, + [advance] + ); + + const setMaxDuration = useCallback( + (maxDuration: number | undefined) => { + setConfig(c => ({ ...c, maxDuration })); + advance('maxDuration'); + }, + [advance] + ); + + const setEnableOnCreate = useCallback( + (enableOnCreate: boolean) => { + setConfig(c => ({ ...c, enableOnCreate })); + advance('enableOnCreate'); + }, + [advance] + ); + + const reset = useCallback(() => { + setConfig(getDefaultConfig()); + setStep('mode'); + }, []); + + return { + config, + step, + steps: getSteps(), + currentIndex, + goBack, + setSkipCheck, + setMode, + setName, + setDescription, + setAgent, + setGateway, + setGatewayWithName, + setVariants, + setOnlineEval, + setControlTarget, + setTreatmentTarget, + setWeights, + setEvalPath, + setEvalSelect, + setEvaluators, + setSamplingRate, + setMaxDuration, + setEnableOnCreate, + reset, + }; +} diff --git a/src/cli/tui/screens/ab-test/useTargetBasedWizard.ts b/src/cli/tui/screens/ab-test/useTargetBasedWizard.ts new file mode 100644 index 000000000..7c26474d8 --- /dev/null +++ b/src/cli/tui/screens/ab-test/useTargetBasedWizard.ts @@ -0,0 +1,188 @@ +import type { AddABTestConfig, GatewayChoice, TargetInfo } from './types'; +import { useCallback, useState } from 'react'; + +export type TargetBasedStep = 'nameDescription' | 'gateway' | 'builder' | 'enableOnCreate' | 'confirm'; + +export const TARGET_BASED_STEP_LABELS: Record = { + nameDescription: 'Name', + gateway: 'Gateway', + builder: 'Configure', + enableOnCreate: 'Enable', + confirm: 'Confirm', +}; + +const STEPS: TargetBasedStep[] = ['nameDescription', 'gateway', 'builder', 'enableOnCreate', 'confirm']; + +interface TargetBasedConfig { + name: string; + description: string; + gateway: string; + gatewayIsNew: boolean; + controlTargetInfo: TargetInfo | null; + controlTargetIsNew: boolean; + controlWeight: number; + controlOnlineEval: string; + treatmentTargetInfo: TargetInfo | null; + treatmentTargetIsNew: boolean; + treatmentWeight: number; + treatmentOnlineEval: string; + enableOnCreate: boolean; +} + +function getDefaultConfig(): TargetBasedConfig { + return { + name: '', + description: '', + gateway: '', + gatewayIsNew: false, + controlTargetInfo: null, + controlTargetIsNew: false, + controlWeight: 90, + controlOnlineEval: '', + treatmentTargetInfo: null, + treatmentTargetIsNew: false, + treatmentWeight: 10, + treatmentOnlineEval: '', + enableOnCreate: true, + }; +} + +export function useTargetBasedWizard() { + const [config, setConfig] = useState(getDefaultConfig); + const [step, setStep] = useState('nameDescription'); + + const currentIndex = STEPS.indexOf(step); + + const goBack = useCallback(() => { + const idx = STEPS.indexOf(step); + if (idx > 0) { + setStep(STEPS[idx - 1]!); + } + }, [step]); + + const advance = useCallback(() => { + const idx = STEPS.indexOf(step); + if (idx < STEPS.length - 1) { + setStep(STEPS[idx + 1]!); + } + }, [step]); + + const setName = useCallback((name: string) => { + setConfig(c => ({ ...c, name })); + }, []); + + const setDescription = useCallback((description: string) => { + setConfig(c => ({ ...c, description })); + }, []); + + const advanceFromNameDescription = useCallback(() => { + setStep('gateway'); + }, []); + + const setGateway = useCallback((name: string, isNew: boolean) => { + setConfig(c => ({ ...c, gateway: name, gatewayIsNew: isNew })); + // Auto-advance to builder + setStep('builder'); + }, []); + + const setControlTarget = useCallback((target: TargetInfo, isNew: boolean) => { + setConfig(c => ({ + ...c, + controlTargetInfo: target, + controlTargetIsNew: isNew, + })); + }, []); + + const setTreatmentTarget = useCallback((target: TargetInfo, isNew: boolean) => { + setConfig(c => ({ + ...c, + treatmentTargetInfo: target, + treatmentTargetIsNew: isNew, + })); + }, []); + + const setControlWeight = useCallback((w: number) => { + setConfig(c => ({ ...c, controlWeight: w, treatmentWeight: 100 - w })); + }, []); + + const setControlEval = useCallback((name: string) => { + setConfig(c => ({ ...c, controlOnlineEval: name })); + }, []); + + const setTreatmentEval = useCallback((name: string) => { + setConfig(c => ({ ...c, treatmentOnlineEval: name })); + }, []); + + const setEnableOnCreate = useCallback((enableOnCreate: boolean) => { + setConfig(c => ({ ...c, enableOnCreate })); + setStep('confirm'); + }, []); + + const isBuilderComplete = + config.controlTargetInfo !== null && + config.treatmentTargetInfo !== null && + config.controlWeight > 0 && + config.treatmentWeight > 0; + + const toAddABTestConfig = useCallback((): AddABTestConfig => { + const gatewayChoice: GatewayChoice = config.gatewayIsNew + ? { type: 'create-new' } + : { type: 'existing-http', name: config.gateway }; + + return { + mode: 'target-based', + name: config.name, + description: config.description, + agent: '', + gatewayChoice, + // Config-bundle fields (safe defaults) + controlBundle: '', + controlVersion: '', + treatmentBundle: '', + treatmentVersion: '', + treatmentWeight: config.treatmentWeight, + onlineEval: '', + // Target-based fields + gateway: config.gateway, + gatewayIsNew: config.gatewayIsNew, + controlTargetInfo: config.controlTargetInfo, + controlTargetIsNew: config.controlTargetIsNew, + treatmentTargetInfo: config.treatmentTargetInfo, + treatmentTargetIsNew: config.treatmentTargetIsNew, + // Legacy target-based fields + runtime: config.controlTargetInfo?.runtimeRef ?? '', + controlTarget: config.controlTargetInfo?.name ?? '', + controlEndpoint: config.controlTargetInfo?.qualifier ?? '', + treatmentTarget: config.treatmentTargetInfo?.name ?? '', + treatmentEndpoint: config.treatmentTargetInfo?.qualifier ?? '', + controlWeight: config.controlWeight, + controlOnlineEval: config.controlOnlineEval, + treatmentOnlineEval: config.treatmentOnlineEval, + evaluators: [], + samplingRate: 10, + maxDuration: undefined, + enableOnCreate: config.enableOnCreate, + }; + }, [config]); + + return { + config, + step, + steps: STEPS, + currentIndex, + goBack, + advance, + setName, + setDescription, + advanceFromNameDescription, + setGateway, + setControlTarget, + setTreatmentTarget, + setControlWeight, + setControlEval, + setTreatmentEval, + setEnableOnCreate, + isBuilderComplete, + toAddABTestConfig, + }; +} diff --git a/src/cli/tui/screens/add/AddFlow.tsx b/src/cli/tui/screens/add/AddFlow.tsx index dc4c914f5..fdf64bff1 100644 --- a/src/cli/tui/screens/add/AddFlow.tsx +++ b/src/cli/tui/screens/add/AddFlow.tsx @@ -3,10 +3,12 @@ import { VPC_ENDPOINT_WARNING } from '../../../commands/shared/vpc-utils'; import { computeDefaultCredentialEnvVarName } from '../../../primitives/credential-utils'; import { ErrorPrompt } from '../../components'; import { useAvailableAgents } from '../../hooks/useCreateMcp'; +import { AddABTestFlow } from '../ab-test'; import { AddAgentFlow } from '../agent/AddAgentFlow'; import type { AddAgentConfig } from '../agent/types'; import { FRAMEWORK_OPTIONS } from '../agent/types'; import { useAddAgent } from '../agent/useAddAgent'; +import { AddConfigBundleFlow } from '../config-bundle'; import { AddEvaluatorFlow } from '../evaluator'; import { AddHarnessFlow } from '../harness/AddHarnessFlow'; import { AddIdentityFlow } from '../identity'; @@ -33,6 +35,8 @@ type FlowState = | { name: 'evaluator-wizard' } | { name: 'online-eval-wizard' } | { name: 'policy-wizard' } + | { name: 'config-bundle-wizard' } + | { name: 'ab-test-wizard' } | { name: 'runtime-endpoint-wizard' } | { name: 'agent-create-success'; @@ -187,6 +191,10 @@ function getInitialFlowState(resource?: AddResourceType): FlowState { return { name: 'policy-wizard' }; case 'runtime-endpoint': return { name: 'runtime-endpoint-wizard' }; + case 'config-bundle': + return { name: 'config-bundle-wizard' }; + case 'ab-test': + return { name: 'ab-test-wizard' }; default: return { name: 'select' }; } @@ -237,6 +245,12 @@ export function AddFlow(props: AddFlowProps) { case 'policy': setFlow({ name: 'policy-wizard' }); break; + case 'config-bundle': + setFlow({ name: 'config-bundle-wizard' }); + break; + case 'ab-test': + setFlow({ name: 'ab-test-wizard' }); + break; case 'runtime-endpoint': setFlow({ name: 'runtime-endpoint-wizard' }); break; @@ -485,6 +499,32 @@ export function AddFlow(props: AddFlowProps) { ); } + // Configuration bundle wizard + if (flow.name === 'config-bundle-wizard') { + return ( + setFlow({ name: 'select' })} + onDev={props.onDev} + onDeploy={props.onDeploy} + /> + ); + } + + // AB test wizard + if (flow.name === 'ab-test-wizard') { + return ( + setFlow({ name: 'select' })} + onDev={props.onDev} + onDeploy={props.onDeploy} + /> + ); + } + if (flow.name === 'runtime-endpoint-wizard') { return ( ({ diff --git a/src/cli/tui/screens/agent/AddAgentScreen.tsx b/src/cli/tui/screens/agent/AddAgentScreen.tsx index e99386055..c8961c065 100644 --- a/src/cli/tui/screens/agent/AddAgentScreen.tsx +++ b/src/cli/tui/screens/agent/AddAgentScreen.tsx @@ -185,6 +185,7 @@ export function AddAgentScreen({ existingAgentNames, onComplete, onExit }: AddAg idleTimeout: '' as string, maxLifetime: '' as string, sessionStorageMountPath: '' as string, + withConfigBundle: undefined as boolean | undefined, }); const [byoAdvancedSettings, setByoAdvancedSettings] = useState>(new Set()); const [byoAuthorizerType, setByoAuthorizerType] = useState('AWS_IAM'); @@ -311,6 +312,7 @@ export function AddAgentScreen({ existingAgentNames, onComplete, onExit }: AddAg idleRuntimeSessionTimeout: generateWizard.config.idleRuntimeSessionTimeout, maxLifetime: generateWizard.config.maxLifetime, sessionStorageMountPath: generateWizard.config.sessionStorageMountPath, + withConfigBundle: generateWizard.config.withConfigBundle, pythonVersion: DEFAULT_PYTHON_VERSION, memory: generateWizard.config.memory, }; @@ -433,6 +435,7 @@ export function AddAgentScreen({ existingAgentNames, onComplete, onExit }: AddAg ...(byoConfig.idleTimeout && { idleRuntimeSessionTimeout: Number(byoConfig.idleTimeout) }), ...(byoConfig.maxLifetime && { maxLifetime: Number(byoConfig.maxLifetime) }), ...(byoConfig.sessionStorageMountPath && { sessionStorageMountPath: byoConfig.sessionStorageMountPath }), + ...(byoConfig.withConfigBundle && { withConfigBundle: true }), pythonVersion: DEFAULT_PYTHON_VERSION, memory: 'none', }; @@ -494,11 +497,14 @@ export function AddAgentScreen({ existingAgentNames, onComplete, onExit }: AddAg idleTimeout: '', maxLifetime: '', sessionStorageMountPath: '', + withConfigBundle: undefined, })); setByoAuthorizerType('AWS_IAM'); setByoJwtConfig(undefined); setByoStep('confirm'); } else { + // Config bundle has no sub-steps — set flag immediately + setByoConfig(c => ({ ...c, withConfigBundle: selected.has('configBundle') || undefined })); // Navigate to first advanced sub-step (steps memo hasn't updated yet) setTimeout(() => { if (selected.has('dockerfile') && byoConfig.buildType === 'Container') { @@ -1348,6 +1354,9 @@ export function AddAgentScreen({ existingAgentNames, onComplete, onExit }: AddAg ...(byoConfig.sessionStorageMountPath ? [{ label: 'Session Storage', value: byoConfig.sessionStorageMountPath }] : []), + ...(byoConfig.withConfigBundle + ? [{ label: 'Config Bundle', value: 'Yes (auto-created on deploy)' }] + : []), ]} /> )} diff --git a/src/cli/tui/screens/agent/types.ts b/src/cli/tui/screens/agent/types.ts index bc1553352..c708bcac0 100644 --- a/src/cli/tui/screens/agent/types.ts +++ b/src/cli/tui/screens/agent/types.ts @@ -97,6 +97,8 @@ export interface AddAgentConfig { maxLifetime?: number; /** Mount path for session filesystem storage (e.g. /mnt/session-storage) */ sessionStorageMountPath?: string; + /** When true, create a config bundle wired into the agent template */ + withConfigBundle?: boolean; /** Python version (only for Python agents) */ pythonVersion: PythonRuntime; /** Memory option (create path only) */ diff --git a/src/cli/tui/screens/agent/useAddAgent.ts b/src/cli/tui/screens/agent/useAddAgent.ts index 3160e8712..e2fabe140 100644 --- a/src/cli/tui/screens/agent/useAddAgent.ts +++ b/src/cli/tui/screens/agent/useAddAgent.ts @@ -1,7 +1,7 @@ import { APP_DIR, ConfigIO, NoProjectError, findConfigRoot, setEnvVar } from '../../../../lib'; import type { AgentEnvSpec, DirectoryPath, FilePath } from '../../../../schema'; -import { getErrorMessage } from '../../../errors'; import { type PythonSetupResult, setupPythonProject } from '../../../operations'; +import { createConfigBundleForAgent } from '../../../operations/agent/config-bundle-defaults'; import { mapGenerateConfigToRenderConfig, mapModelProviderToCredentials, @@ -12,6 +12,19 @@ import { executeImportAgent } from '../../../operations/agent/import'; import { buildAuthorizerConfigFromJwtConfig, createManagedOAuthCredential } from '../../../primitives/auth-utils'; import { computeDefaultCredentialEnvVarName } from '../../../primitives/credential-utils'; import { credentialPrimitive } from '../../../primitives/registry'; +import { withAddTelemetry } from '../../../telemetry/cli-command-run.js'; +import { + AgentType as AgentTypeEnum, + AuthorizerType as AuthorizerTypeEnum, + Build, + Framework, + Language, + Memory as MemoryEnum, + ModelProvider, + NetworkMode, + Protocol, + standardize, +} from '../../../telemetry/schemas/common-shapes.js'; import { createRenderer } from '../../../templates'; import type { GenerateConfig } from '../generate/types'; import type { AddAgentConfig } from './types'; @@ -118,6 +131,7 @@ function mapAddAgentConfigToGenerateConfig(config: AddAgentConfig): GenerateConf idleRuntimeSessionTimeout: config.idleRuntimeSessionTimeout, maxLifetime: config.maxLifetime, sessionStorageMountPath: config.sessionStorageMountPath, + withConfigBundle: config.withConfigBundle, }; } @@ -135,34 +149,25 @@ export function useAddAgent() { const addAgent = useCallback(async (config: AddAgentConfig): Promise => { setIsLoading(true); try { - const configBaseDir = findConfigRoot(); - if (!configBaseDir) { - return { ok: false, error: new NoProjectError().message }; - } - - const configIO = new ConfigIO({ baseDir: configBaseDir }); - - if (!configIO.configExists('project')) { - return { ok: false, error: new NoProjectError().message }; - } - - // Check for duplicate agent name - const project = await configIO.readProjectSpec(); - const existingAgent = project.runtimes.find(agent => agent.name === config.name); - if (existingAgent) { - return { ok: false, error: `Agent "${config.name}" already exists in this project.` }; - } - - // Branch based on agent type - if (config.agentType === 'import') { - return await handleImportPath(config, configBaseDir); - } else if (config.agentType === 'create') { - return await handleCreatePath(config, configBaseDir); - } else { - return await handleByoPath(config, configIO, configBaseDir); + const result = await withAddTelemetry( + 'add.agent', + { + language: standardize(Language, config.language), + framework: standardize(Framework, config.framework), + model_provider: standardize(ModelProvider, config.modelProvider), + agent_type: standardize(AgentTypeEnum, config.agentType), + build: standardize(Build, config.buildType), + protocol: standardize(Protocol, config.protocol ?? 'HTTP'), + network_mode: standardize(NetworkMode, config.networkMode ?? 'PUBLIC'), + authorizer_type: standardize(AuthorizerTypeEnum, config.authorizerType ?? 'NONE'), + memory: standardize(MemoryEnum, config.memory ?? 'none'), + }, + () => addAgentInner(config) + ); + if (!result.success) { + return { ok: false, error: result.error }; } - } catch (err) { - return { ok: false, error: getErrorMessage(err) }; + return result.outcome; } finally { setIsLoading(false); } @@ -175,6 +180,43 @@ export function useAddAgent() { return { addAgent, isLoading, reset }; } +type AddAgentInnerResult = + | { success: true; outcome: AddAgentCreateResult | AddAgentByoResult } + | { success: false; error: string }; + +async function addAgentInner(config: AddAgentConfig): Promise { + const configBaseDir = findConfigRoot(); + if (!configBaseDir) { + return { success: false, error: new NoProjectError().message }; + } + + const configIO = new ConfigIO({ baseDir: configBaseDir }); + + if (!configIO.configExists('project')) { + return { success: false, error: new NoProjectError().message }; + } + + const project = await configIO.readProjectSpec(); + const existingAgent = project.runtimes.find(agent => agent.name === config.name); + if (existingAgent) { + return { success: false, error: `Agent "${config.name}" already exists in this project.` }; + } + + let outcome: AddAgentCreateResult | AddAgentByoResult | AddAgentError; + if (config.agentType === 'import') { + outcome = await handleImportPath(config, configBaseDir); + } else if (config.agentType === 'create') { + outcome = await handleCreatePath(config, configBaseDir); + } else { + outcome = await handleByoPath(config, configIO, configBaseDir); + } + + if (!outcome.ok) { + return { success: false, error: outcome.error }; + } + return { success: true, outcome }; +} + /** * Handle the "create" path: generate agent from template and write to project. */ @@ -259,6 +301,11 @@ async function handleCreatePath( pythonSetupResult = await setupPythonProject({ projectDir: agentPath }); } + // Auto-create config bundle when opted in + if (config.withConfigBundle) { + await createConfigBundleForAgent(config.name, configBaseDir); + } + return { ok: true, type: 'create', diff --git a/src/cli/tui/screens/config-bundle-hub/ConfigBundleFlow.tsx b/src/cli/tui/screens/config-bundle-hub/ConfigBundleFlow.tsx new file mode 100644 index 000000000..4c634ca37 --- /dev/null +++ b/src/cli/tui/screens/config-bundle-hub/ConfigBundleFlow.tsx @@ -0,0 +1,60 @@ +/** + * Config Bundle Flow — manages navigation between hub, version history, and diff screens. + */ +import { ConfigBundleHubScreen } from './ConfigBundleHubScreen'; +import { DiffScreen } from './DiffScreen'; +import { VersionHistoryScreen } from './VersionHistoryScreen'; +import type { BundleWithMeta } from './useConfigBundleHub'; +import React, { useState } from 'react'; + +type FlowState = + | { name: 'hub' } + | { name: 'versions'; bundle: BundleWithMeta; region: string } + | { name: 'diff'; bundle: BundleWithMeta; region: string; fromVersionId: string; toVersionId: string }; + +interface ConfigBundleFlowProps { + onExit: () => void; +} + +export function ConfigBundleFlow({ onExit }: ConfigBundleFlowProps) { + const [flow, setFlow] = useState({ name: 'hub' }); + + if (flow.name === 'hub') { + return ( + { + setFlow({ name: 'versions', bundle, region }); + }} + onExit={onExit} + /> + ); + } + + if (flow.name === 'versions') { + return ( + + setFlow({ name: 'diff', bundle: flow.bundle, region: flow.region, fromVersionId, toVersionId }) + } + onExit={() => setFlow({ name: 'hub' })} + /> + ); + } + + if (flow.name === 'diff') { + return ( + setFlow({ name: 'versions', bundle: flow.bundle, region: flow.region })} + /> + ); + } + + return null; +} diff --git a/src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx b/src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx new file mode 100644 index 000000000..476336dd0 --- /dev/null +++ b/src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx @@ -0,0 +1,129 @@ +/** + * Top-level config bundle hub — lists all deployed bundles. + * Enter drills into version history. + */ +import { Panel, Screen } from '../../components'; +import type { BundleWithMeta } from './useConfigBundleHub'; +import { useConfigBundleHub } from './useConfigBundleHub'; +import { Box, Text, useInput } from 'ink'; +import React from 'react'; + +function formatRelativeTime(epochSeconds: string): string { + const ms = Number(epochSeconds) < 1e12 ? Number(epochSeconds) * 1000 : Number(epochSeconds); + const diff = Date.now() - ms; + const minutes = Math.floor(diff / 60000); + if (minutes < 1) return 'just now'; + if (minutes < 60) return `${minutes}m ago`; + const hours = Math.floor(minutes / 60); + if (hours < 24) return `${hours}h ago`; + const days = Math.floor(hours / 24); + return `${days}d ago`; +} + +interface ConfigBundleHubScreenProps { + onSelectBundle: (bundle: BundleWithMeta, region: string) => void; + onExit: () => void; +} + +export function ConfigBundleHubScreen({ onSelectBundle, onExit }: ConfigBundleHubScreenProps) { + const { bundles, isLoading, error, region } = useConfigBundleHub(); + const [selectedIndex, setSelectedIndex] = React.useState(0); + + useInput( + (input: string, key: { return: boolean; upArrow: boolean; downArrow: boolean }) => { + if (key.upArrow && bundles.length > 0) { + setSelectedIndex(i => (i - 1 + bundles.length) % bundles.length); + } + if (key.downArrow && bundles.length > 0) { + setSelectedIndex(i => (i + 1) % bundles.length); + } + if (key.return && bundles[selectedIndex]) { + onSelectBundle(bundles[selectedIndex], region); + } + }, + { isActive: !isLoading && bundles.length > 0 } + ); + + if (isLoading) { + return ( + + Loading configuration bundles... + + ); + } + + if (error) { + return ( + + Error: {error} + + ); + } + + if (bundles.length === 0) { + return ( + + + No configuration bundles found. + Use `agentcore add config-bundle` to create one, then deploy. + + + ); + } + + const headerContent = ( + + Region: + {region} + · {bundles.length} bundle(s) + + ); + + return ( + + + {bundles.map((bundle, idx) => ( + + ))} + + + ); +} + +function BundleRow({ bundle, selected }: { bundle: BundleWithMeta; selected: boolean }) { + const branchSummary = bundle.branches.length > 0 ? bundle.branches.join(', ') : 'no branches'; + + return ( + + + {selected ? '❯' : ' '} + + {bundle.bundleName} + + + + {' '} + + Versions: {bundle.versionCount} ({branchSummary}) + + + {bundle.description && ( + + {' '} + Description: {bundle.description} + + )} + {bundle.lastUpdated && ( + + {' '} + Last update: {formatRelativeTime(bundle.lastUpdated)} + + )} + + ); +} diff --git a/src/cli/tui/screens/config-bundle-hub/DiffScreen.tsx b/src/cli/tui/screens/config-bundle-hub/DiffScreen.tsx new file mode 100644 index 000000000..f91b2b00e --- /dev/null +++ b/src/cli/tui/screens/config-bundle-hub/DiffScreen.tsx @@ -0,0 +1,149 @@ +/** + * Diff screen — shows component differences between two bundle versions. + */ +import { getConfigurationBundleVersion } from '../../../../cli/aws/agentcore-config-bundles'; +import type { GetConfigurationBundleVersionResult } from '../../../../cli/aws/agentcore-config-bundles'; +import { deepDiff } from '../../../../cli/operations/config-bundle/diff-versions'; +import type { DiffEntry } from '../../../../cli/operations/config-bundle/diff-versions'; +import { Panel, Screen } from '../../components'; +import { Box, Text, useInput, useStdout } from 'ink'; +import React, { useEffect, useMemo, useState } from 'react'; + +function formatTimestamp(epochSeconds: string): string { + const num = Number(epochSeconds); + const ms = num < 1e12 ? num * 1000 : num; + return new Date(ms) + .toISOString() + .replace('T', ' ') + .replace(/\.\d+Z$/, 'Z'); +} + +interface DiffScreenProps { + bundleId: string; + bundleName: string; + fromVersionId: string; + toVersionId: string; + region: string; + onExit: () => void; +} + +export function DiffScreen({ bundleId, bundleName, fromVersionId, toVersionId, region, onExit }: DiffScreenProps) { + const [fromVersion, setFromVersion] = useState(); + const [toVersion, setToVersion] = useState(); + const [diffs, setDiffs] = useState([]); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(); + const [scrollOffset, setScrollOffset] = useState(0); + const { stdout } = useStdout(); + + useEffect(() => { + async function load() { + try { + const [from, to] = await Promise.all([ + getConfigurationBundleVersion({ region, bundleId, versionId: fromVersionId }), + getConfigurationBundleVersion({ region, bundleId, versionId: toVersionId }), + ]); + setFromVersion(from); + setToVersion(to); + setDiffs(deepDiff(from.components, to.components)); + setIsLoading(false); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + setIsLoading(false); + } + } + void load(); + }, [bundleId, fromVersionId, toVersionId, region]); + + // Build display lines + const lines = useMemo(() => { + if (!fromVersion || !toVersion) return []; + const result: { text: string; color?: string }[] = []; + + result.push({ + text: `Diff: ${fromVersion.versionId} → ${toVersion.versionId}`, + }); + result.push({ + text: `From: ${fromVersion.lineageMetadata?.commitMessage ?? '(no message)'} (${formatTimestamp(fromVersion.versionCreatedAt)})`, + color: 'gray', + }); + result.push({ + text: `To: ${toVersion.lineageMetadata?.commitMessage ?? '(no message)'} (${formatTimestamp(toVersion.versionCreatedAt)})`, + color: 'gray', + }); + result.push({ text: '' }); + + if (diffs.length === 0) { + result.push({ text: 'No differences found.', color: 'green' }); + } else { + result.push({ text: `${diffs.length} change(s):` }); + result.push({ text: '' }); + + for (const d of diffs) { + result.push({ text: d.path }); + if (d.type === 'added') { + result.push({ text: `+ ${JSON.stringify(d.newValue)}`, color: 'green' }); + } else if (d.type === 'removed') { + result.push({ text: `- ${JSON.stringify(d.oldValue)}`, color: 'red' }); + } else if (d.type === 'changed') { + result.push({ text: `- ${JSON.stringify(d.oldValue)}`, color: 'red' }); + result.push({ text: `+ ${JSON.stringify(d.newValue)}`, color: 'green' }); + } + result.push({ text: '' }); + } + } + + return result; + }, [fromVersion, toVersion, diffs]); + + const terminalHeight = stdout?.rows ?? 24; + const displayHeight = Math.max(5, terminalHeight - 10); + const maxScroll = Math.max(0, lines.length - displayHeight); + + useInput((_input, key) => { + if (key.upArrow) setScrollOffset(prev => Math.max(0, prev - 1)); + if (key.downArrow) setScrollOffset(prev => Math.min(maxScroll, prev + 1)); + }); + + if (isLoading) { + return ( + + Loading versions for diff... + + ); + } + + if (error) { + return ( + + Error: {error} + + ); + } + + const visibleLines = lines.slice(scrollOffset, scrollOffset + displayHeight); + const needsScroll = lines.length > displayHeight; + + return ( + + + + {visibleLines.map((line, idx) => ( + + {line.text} + + ))} + + {needsScroll && ( + + [{scrollOffset + 1}-{Math.min(scrollOffset + displayHeight, lines.length)} of {lines.length}] + + )} + + + ); +} diff --git a/src/cli/tui/screens/config-bundle-hub/VersionHistoryScreen.tsx b/src/cli/tui/screens/config-bundle-hub/VersionHistoryScreen.tsx new file mode 100644 index 000000000..56e161c85 --- /dev/null +++ b/src/cli/tui/screens/config-bundle-hub/VersionHistoryScreen.tsx @@ -0,0 +1,245 @@ +/** + * Version history screen — shows versions grouped by branch for a single bundle. + * Enter views version details, D starts diff selection. + */ +import { getConfigurationBundleVersion } from '../../../../cli/aws/agentcore-config-bundles'; +import type { ConfigurationBundleVersionSummary } from '../../../../cli/aws/agentcore-config-bundles'; +import { Panel, Screen } from '../../components'; +import type { BundleWithMeta } from './useConfigBundleHub'; +import { useVersionHistory } from './useConfigBundleHub'; +import { Box, Text, useInput } from 'ink'; +import React, { useMemo, useState } from 'react'; + +function formatTimestamp(epochSeconds: string): string { + const num = Number(epochSeconds); + const ms = num < 1e12 ? num * 1000 : num; + return new Date(ms) + .toISOString() + .replace('T', ' ') + .replace(/\.\d+Z$/, 'Z'); +} + +interface VersionHistoryScreenProps { + bundle: BundleWithMeta; + region: string; + onViewDiff: (bundleId: string, fromVersionId: string, toVersionId: string) => void; + onExit: () => void; +} + +type Mode = 'browse' | 'diff-select-from' | 'diff-select-to' | 'version-detail'; + +export function VersionHistoryScreen({ bundle, region, onViewDiff, onExit }: VersionHistoryScreenProps) { + const { versions, isLoading, error } = useVersionHistory(bundle.bundleId, region); + const [selectedIndex, setSelectedIndex] = useState(0); + const [mode, setMode] = useState('browse'); + const [diffFromId, setDiffFromId] = useState(); + const [detailText, setDetailText] = useState(); + + // Flat list of all versions for navigation + const flatVersions = useMemo(() => versions, [versions]); + + // Group by branch for display + const byBranch = useMemo(() => { + const map = new Map(); + for (const v of versions) { + const branch = v.lineageMetadata?.branchName ?? 'unknown'; + if (!map.has(branch)) map.set(branch, []); + map.get(branch)!.push(v); + } + return map; + }, [versions]); + + useInput( + (input, key) => { + if (isLoading || flatVersions.length === 0) return; + + if (mode === 'version-detail') { + if (key.escape) setMode('browse'); + return; + } + + // Navigation + if (key.upArrow) { + setSelectedIndex(i => (i - 1 + flatVersions.length) % flatVersions.length); + return; + } + if (key.downArrow) { + setSelectedIndex(i => (i + 1) % flatVersions.length); + return; + } + + if (mode === 'browse') { + // Enter — view version detail + if (key.return && flatVersions[selectedIndex]) { + setMode('version-detail'); + setDetailText(undefined); + void loadDetail(flatVersions[selectedIndex].versionId); + return; + } + // D — start diff + if (input === 'd' || input === 'D') { + setMode('diff-select-from'); + return; + } + } + + if (mode === 'diff-select-from') { + if (key.escape) { + setMode('browse'); + return; + } + if (key.return && flatVersions[selectedIndex]) { + setDiffFromId(flatVersions[selectedIndex].versionId); + setMode('diff-select-to'); + return; + } + } + + if (mode === 'diff-select-to') { + if (key.escape) { + setMode('diff-select-from'); + return; + } + if (key.return && flatVersions[selectedIndex] && diffFromId) { + onViewDiff(bundle.bundleId, diffFromId, flatVersions[selectedIndex].versionId); + return; + } + } + }, + { isActive: !isLoading } + ); + + async function loadDetail(versionId: string) { + try { + const detail = await getConfigurationBundleVersion({ + region, + bundleId: bundle.bundleId, + versionId, + }); + const lines: string[] = []; + lines.push(`Version: ${detail.versionId}`); + if (detail.description) lines.push(`Description: ${detail.description}`); + if (detail.lineageMetadata?.branchName) lines.push(`Branch: ${detail.lineageMetadata.branchName}`); + if (detail.lineageMetadata?.commitMessage) lines.push(`Message: ${detail.lineageMetadata.commitMessage}`); + if (detail.lineageMetadata?.createdBy) { + const cb = detail.lineageMetadata.createdBy; + lines.push(`Created by: ${cb.name}${cb.arn ? ` (${cb.arn})` : ''}`); + } + if (detail.lineageMetadata?.parentVersionIds?.length) { + lines.push(`Parent: ${detail.lineageMetadata.parentVersionIds.map(id => id).join(', ')}`); + } + lines.push(`Created: ${formatTimestamp(detail.versionCreatedAt)}`); + lines.push(''); + lines.push('Components:'); + for (const [arn, comp] of Object.entries(detail.components)) { + lines.push(` ${arn}`); + lines.push(` ${JSON.stringify(comp.configuration, null, 2).split('\n').join('\n ')}`); + lines.push(''); + } + setDetailText(lines.join('\n')); + } catch (err) { + setDetailText(`Error loading version: ${err instanceof Error ? err.message : String(err)}`); + } + } + + if (isLoading) { + return ( + + Loading version history... + + ); + } + + if (error) { + return ( + + Error: {error} + + ); + } + + // Version detail overlay + if (mode === 'version-detail') { + return ( + setMode('browse')} helpText="Esc back"> + {detailText ? {detailText} : Loading...} + + ); + } + + // Mode-specific help text + let helpText = '↑↓ navigate · Enter view · D diff · Esc back · Ctrl+C quit'; + if (mode === 'diff-select-from') { + helpText = '↑↓ navigate · Enter select FROM version · Esc cancel'; + } else if (mode === 'diff-select-to') { + helpText = `↑↓ navigate · Enter select TO version · Esc back (from: ${diffFromId!})`; + } + + // Mode-specific header + let modeIndicator: React.ReactNode = null; + if (mode === 'diff-select-from') { + modeIndicator = ( + + Select the FROM version for diff: + + ); + } else if (mode === 'diff-select-to') { + modeIndicator = ( + + From: {diffFromId!} — Now select the TO version: + + ); + } + + // Build a flat index map so we can highlight the selected version + let flatIdx = 0; + + return ( + + + {modeIndicator} + {[...byBranch.entries()].map(([branch, branchVersions]) => ( + + + Branch: {branch} + + {branchVersions.map((v, i) => { + const currentFlatIdx = flatIdx++; + const isSelected = currentFlatIdx === selectedIndex; + const meta = v.lineageMetadata; + const message = meta?.commitMessage ?? ''; + const isLast = i === branchVersions.length - 1; + const connector = isLast ? '└' : '├'; + const isDiffFrom = v.versionId === diffFromId; + + return ( + + + {isSelected ? '❯' : ' '} + {connector} + + {v.versionId} + + {formatTimestamp(v.versionCreatedAt)} + {message ? "{message}" : null} + + {meta?.parentVersionIds?.length ? ( + + {' '} + {isLast ? ' ' : '│'} parent: {meta.parentVersionIds.join(', ')} + + ) : null} + + ); + })} + + ))} + + + ); +} diff --git a/src/cli/tui/screens/config-bundle-hub/index.ts b/src/cli/tui/screens/config-bundle-hub/index.ts new file mode 100644 index 000000000..b7ceb3d02 --- /dev/null +++ b/src/cli/tui/screens/config-bundle-hub/index.ts @@ -0,0 +1,4 @@ +export { ConfigBundleFlow } from './ConfigBundleFlow'; +export { ConfigBundleHubScreen } from './ConfigBundleHubScreen'; +export { VersionHistoryScreen } from './VersionHistoryScreen'; +export { DiffScreen } from './DiffScreen'; diff --git a/src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts b/src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts new file mode 100644 index 000000000..c0276ae53 --- /dev/null +++ b/src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts @@ -0,0 +1,220 @@ +/** + * Hook for the Config Bundle Hub — reads bundles from project config + * and enriches deployed ones with version metadata from the API. + */ +import type { ConfigurationBundleVersionSummary } from '../../../../cli/aws/agentcore-config-bundles'; +import { + listConfigurationBundleVersions, + listConfigurationBundles, +} from '../../../../cli/aws/agentcore-config-bundles'; +import { ConfigIO } from '../../../../lib'; +import { getBundleNameVariants } from '../../../operations/config-bundle/bundle-name-variants'; +import { useEffect, useRef, useState } from 'react'; + +export interface BundleWithMeta { + bundleId: string; + bundleArn: string; + bundleName: string; + description?: string; + versionCount: number; + branches: string[]; + lastUpdated?: string; +} + +export interface ConfigBundleHubState { + bundles: BundleWithMeta[]; + isLoading: boolean; + error?: string; + region: string; +} + +export function useConfigBundleHub(): ConfigBundleHubState { + const [bundles, setBundles] = useState([]); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(); + const [region, setRegion] = useState('us-east-1'); + const mountedRef = useRef(true); + + useEffect(() => { + mountedRef.current = true; + + async function load() { + setIsLoading(true); + setError(undefined); + try { + const configIO = new ConfigIO(); + const [projectSpec, deployedState, targets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + + if (targets.length === 0) { + if (mountedRef.current) { + setError('No AWS deployment targets configured.'); + setIsLoading(false); + } + return; + } + const resolvedRegion = targets[0]!.region; + if (mountedRef.current) setRegion(resolvedRegion); + + // Get config bundles from project config (agentcore.json) + const projectBundles = projectSpec.configBundles ?? []; + if (projectBundles.length === 0) { + if (mountedRef.current) { + setBundles([]); + setIsLoading(false); + } + return; + } + + // Get deployed state to look up bundleIds + const deployedBundles = + Object.values(deployedState.targets).find(t => t.resources?.configBundles)?.resources?.configBundles ?? {}; + + // Build bundle list from project config, enriching with deployed version info + const enriched = await Promise.all( + projectBundles.map(async (bundleSpec): Promise => { + const deployed = deployedBundles[bundleSpec.name]; + if (!deployed) { + // Not yet deployed — show from project config only + return { + bundleId: '', + bundleArn: '', + bundleName: bundleSpec.name, + description: bundleSpec.description, + versionCount: 0, + branches: bundleSpec.branchName ? [bundleSpec.branchName] : [], + }; + } + + // Deployed — fetch version metadata from API + // Use a helper that falls back to the list API if the deployed-state bundleId is stale + let effectiveBundleId = deployed.bundleId; + let effectiveBundleArn = deployed.bundleArn; + + try { + const versions = await listConfigurationBundleVersions({ + region: resolvedRegion, + bundleId: effectiveBundleId, + maxResults: 50, + }); + const branchSet = new Set(); + let latestTs = ''; + for (const v of versions.versions) { + if (v.lineageMetadata?.branchName) branchSet.add(v.lineageMetadata.branchName); + if (v.versionCreatedAt > latestTs) latestTs = v.versionCreatedAt; + } + return { + bundleId: effectiveBundleId, + bundleArn: effectiveBundleArn, + bundleName: bundleSpec.name, + description: bundleSpec.description, + versionCount: versions.versions.length, + branches: [...branchSet], + lastUpdated: latestTs || undefined, + }; + } catch { + // Stale deployed-state ID — try to resolve via list API + try { + const allBundles = await listConfigurationBundles({ region: resolvedRegion, maxResults: 100 }); + const nameVariants = getBundleNameVariants(bundleSpec.name, projectSpec.name); + const match = allBundles.bundles.find(b => nameVariants.includes(b.bundleName)); + if (match) { + effectiveBundleId = match.bundleId; + effectiveBundleArn = match.bundleArn; + const versions = await listConfigurationBundleVersions({ + region: resolvedRegion, + bundleId: effectiveBundleId, + maxResults: 50, + }); + const branchSet = new Set(); + let latestTs = ''; + for (const v of versions.versions) { + if (v.lineageMetadata?.branchName) branchSet.add(v.lineageMetadata.branchName); + if (v.versionCreatedAt > latestTs) latestTs = v.versionCreatedAt; + } + return { + bundleId: effectiveBundleId, + bundleArn: effectiveBundleArn, + bundleName: bundleSpec.name, + description: bundleSpec.description, + versionCount: versions.versions.length, + branches: [...branchSet], + lastUpdated: latestTs || undefined, + }; + } + } catch { + // Both paths failed + } + return { + bundleId: effectiveBundleId, + bundleArn: effectiveBundleArn, + bundleName: bundleSpec.name, + description: bundleSpec.description, + versionCount: 0, + branches: [], + }; + } + }) + ); + + if (mountedRef.current) { + setBundles(enriched); + setIsLoading(false); + } + } catch (err) { + if (mountedRef.current) { + setError(err instanceof Error ? err.message : String(err)); + setIsLoading(false); + } + } + } + + void load(); + return () => { + mountedRef.current = false; + }; + }, []); + + return { bundles, isLoading, error, region }; +} + +export function useVersionHistory(bundleId: string, region: string) { + const [versions, setVersions] = useState([]); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(); + + useEffect(() => { + async function load() { + setIsLoading(true); + setError(undefined); + try { + const allVersions: ConfigurationBundleVersionSummary[] = []; + let nextToken: string | undefined; + do { + const result = await listConfigurationBundleVersions({ + region, + bundleId, + maxResults: 50, + nextToken, + }); + allVersions.push(...result.versions); + nextToken = result.nextToken; + } while (nextToken); + + allVersions.sort((a, b) => Number(b.versionCreatedAt) - Number(a.versionCreatedAt)); + setVersions(allVersions); + setIsLoading(false); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + setIsLoading(false); + } + } + + void load(); + }, [bundleId, region]); + + return { versions, isLoading, error }; +} diff --git a/src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx b/src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx new file mode 100644 index 000000000..2ccc9fab2 --- /dev/null +++ b/src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx @@ -0,0 +1,177 @@ +import { ConfigIO } from '../../../../lib'; +import { ErrorPrompt, GradientText, Screen } from '../../components'; +import { useCreateConfigBundle, useExistingConfigBundleNames } from '../../hooks/useCreateConfigBundle'; +import { AddSuccessScreen } from '../add/AddSuccessScreen'; +import { AddConfigBundleScreen } from './AddConfigBundleScreen'; +import type { AddConfigBundleConfig, DeployedComponent } from './types'; +import React, { useCallback, useEffect, useState } from 'react'; + +type FlowState = + | { name: 'loading' } + | { name: 'create-wizard'; deployedComponents: DeployedComponent[] } + | { name: 'create-success'; bundleName: string } + | { name: 'error'; message: string }; + +interface AddConfigBundleFlowProps { + isInteractive?: boolean; + onExit: () => void; + onBack: () => void; + onDev?: () => void; + onDeploy?: () => void; +} + +export function AddConfigBundleFlow({ + isInteractive = true, + onExit, + onBack, + onDev, + onDeploy, +}: AddConfigBundleFlowProps) { + const { createConfigBundle, reset: resetCreate } = useCreateConfigBundle(); + const { names: existingNames } = useExistingConfigBundleNames(); + const [flow, setFlow] = useState({ name: 'loading' }); + + // Load deployed runtimes/gateways and fill in undeployed ones from project spec + useEffect(() => { + void (async () => { + try { + const configIO = new ConfigIO(); + const components: DeployedComponent[] = []; + const deployedArns = new Set(); + + // 1. Collect deployed components (real ARNs) + try { + const deployedState = await configIO.readDeployedState(); + for (const target of Object.values(deployedState.targets)) { + const runtimes = target.resources?.runtimes; + if (runtimes) { + for (const [name, state] of Object.entries(runtimes)) { + components.push({ name, arn: state.runtimeArn, type: 'runtime' }); + deployedArns.add(name); + } + } + const httpGateways = target.resources?.httpGateways; + if (httpGateways) { + for (const [name, state] of Object.entries(httpGateways)) { + components.push({ name, arn: state.gatewayArn, type: 'gateway' }); + deployedArns.add(name); + } + } + } + } catch { + // No deployed state yet — that's fine, we'll use project spec below + } + + // 2. Add undeployed runtimes/gateways from project spec as placeholders + try { + const projectSpec = await configIO.readProjectSpec(); + for (const rt of projectSpec.runtimes ?? []) { + if (!deployedArns.has(rt.name)) { + components.push({ + name: rt.name, + arn: `{{runtime:${rt.name}}}`, + type: 'runtime', + isPlaceholder: true, + }); + } + } + for (const gw of projectSpec.httpGateways ?? []) { + if (!deployedArns.has(gw.name)) { + components.push({ + name: gw.name, + arn: `{{gateway:${gw.name}}}`, + type: 'gateway', + isPlaceholder: true, + }); + } + } + } catch { + // If we can't read project spec, continue with what we have + } + + setFlow({ name: 'create-wizard', deployedComponents: components }); + } catch { + setFlow({ name: 'create-wizard', deployedComponents: [] }); + } + })(); + }, []); + + useEffect(() => { + if (!isInteractive && flow.name === 'create-success') { + onExit(); + } + }, [isInteractive, flow.name, onExit]); + + const handleCreateComplete = useCallback( + (config: AddConfigBundleConfig) => { + void createConfigBundle({ + name: config.name, + description: config.description || undefined, + components: config.components, + branchName: config.branchName || 'mainline', + commitMessage: config.commitMessage || `Create ${config.name}`, + }).then(result => { + if (result.ok) { + setFlow(prev => { + if (prev.name === 'loading') return prev; + return { name: 'create-success', bundleName: result.bundleName }; + }); + return; + } + setFlow(prev => { + if (prev.name === 'loading') return prev; + return { name: 'error', message: result.error }; + }); + }); + }, + [createConfigBundle] + ); + + if (flow.name === 'loading') { + return ( + + + + ); + } + + if (flow.name === 'create-wizard') { + return ( + + ); + } + + if (flow.name === 'create-success') { + return ( + + ); + } + + return ( + { + resetCreate(); + setFlow(prev => { + if (prev.name === 'loading') return prev; + return { name: 'create-wizard', deployedComponents: [] }; + }); + }} + onExit={onExit} + /> + ); +} diff --git a/src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx b/src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx new file mode 100644 index 000000000..47d33ddf5 --- /dev/null +++ b/src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx @@ -0,0 +1,275 @@ +import { ConfigBundleNameSchema } from '../../../../schema'; +import type { SelectableItem } from '../../components'; +import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { generateUniqueName } from '../../utils'; +import type { AddConfigBundleConfig, ComponentType, DeployedComponent } from './types'; +import { COMPONENT_TYPE_OPTIONS, CONFIG_BUNDLE_STEP_LABELS } from './types'; +import { useAddConfigBundleWizard } from './useAddConfigBundleWizard'; +import { Box, Text } from 'ink'; +import React, { useMemo } from 'react'; + +interface AddConfigBundleScreenProps { + onComplete: (config: AddConfigBundleConfig) => void; + onExit: () => void; + existingBundleNames: string[]; + deployedComponents: DeployedComponent[]; +} + +function validateConfigJson(value: string): string | true { + try { + const parsed: unknown = JSON.parse(value); + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + return 'Must be a JSON object with key-value pairs'; + } + return true; + } catch (err) { + if (err instanceof SyntaxError) { + return 'Invalid JSON syntax'; + } + return 'Must be a valid JSON object'; + } +} + +export function AddConfigBundleScreen({ + onComplete, + onExit, + existingBundleNames, + deployedComponents, +}: AddConfigBundleScreenProps) { + const wizard = useAddConfigBundleWizard(); + + const componentTypeItems: SelectableItem[] = useMemo( + () => COMPONENT_TYPE_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), + [] + ); + + // Filter deployed components by selected type + const availableComponents: SelectableItem[] = useMemo(() => { + const filtered = deployedComponents.filter(c => c.type === wizard.config.currentComponentType); + // Exclude already-added ARNs + const existingArns = new Set(Object.keys(wizard.config.components)); + return filtered + .filter(c => !existingArns.has(c.arn)) + .map(c => ({ + id: c.arn, + title: c.name, + description: c.isPlaceholder ? '(not yet deployed — ARN resolved on deploy)' : c.arn, + })); + }, [deployedComponents, wizard.config.currentComponentType, wizard.config.components]); + + const addAnotherItems: SelectableItem[] = useMemo( + () => [ + { id: 'no', title: 'Continue' }, + { id: 'yes', title: 'Add another component' }, + ], + [] + ); + + const isNameStep = wizard.step === 'name'; + const isDescriptionStep = wizard.step === 'description'; + const isComponentTypeStep = wizard.step === 'componentType'; + const isComponentSelectStep = wizard.step === 'componentSelect'; + const isConfigurationStep = wizard.step === 'configuration'; + const isAddAnotherStep = wizard.step === 'addAnother'; + const isBranchNameStep = wizard.step === 'branchName'; + const isCommitMessageStep = wizard.step === 'commitMessage'; + const isConfirmStep = wizard.step === 'confirm'; + + const componentTypeNav = useListNavigation({ + items: componentTypeItems, + onSelect: item => wizard.setComponentType(item.id as ComponentType), + onExit: () => wizard.goBack(), + isActive: isComponentTypeStep, + }); + + const componentSelectNav = useListNavigation({ + items: availableComponents, + onSelect: item => wizard.setSelectedComponent(item.id), + onExit: () => wizard.goBack(), + isActive: isComponentSelectStep, + }); + + const addAnotherNav = useListNavigation({ + items: addAnotherItems, + onSelect: item => { + if (item.id === 'yes') wizard.addAnotherComponent(); + else wizard.doneAddingComponents(); + }, + onExit: () => wizard.goBack(), + isActive: isAddAnotherStep, + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete(wizard.config), + onExit: () => wizard.goBack(), + isActive: isConfirmStep, + }); + + const helpText = + isComponentTypeStep || isComponentSelectStep || isAddAnotherStep + ? HELP_TEXT.NAVIGATE_SELECT + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : HELP_TEXT.TEXT_INPUT; + + const headerContent = ( + + ); + + const componentCount = Object.keys(wizard.config.components).length; + + return ( + + + {isNameStep && ( + !existingBundleNames.includes(value) || 'Bundle name already exists'} + /> + )} + + {isDescriptionStep && ( + wizard.goBack()} + /> + )} + + {isComponentTypeStep && ( + 0 + ? `${componentCount} component(s) added. Select another type or go back to continue.` + : 'Select the type of resource to add to this bundle' + } + items={componentTypeItems} + selectedIndex={componentTypeNav.selectedIndex} + /> + )} + + {isComponentSelectStep && availableComponents.length > 0 && ( + + )} + + {isComponentSelectStep && availableComponents.length === 0 && ( + + + No deployed {wizard.config.currentComponentType === 'runtime' ? 'runtimes' : 'gateways'} found. + + Deploy your resources first with `agentcore deploy`, then try again. + Press Esc to go back. + + )} + + {isConfigurationStep && ( + <> + + + Component: {wizard.config.currentComponentArn} + + Enter the configuration as a JSON object (key-value pairs). + Example: {'{"systemPrompt": "You are a helpful assistant", "temperature": 0.7}'} + + { + const parsed = JSON.parse(value) as Record; + wizard.setConfiguration(parsed); + }} + onCancel={() => wizard.goBack()} + customValidation={validateConfigJson} + /> + + )} + + {isAddAnotherStep && ( + <> + + + {componentCount} component{componentCount !== 1 ? 's' : ''} configured: + + {Object.keys(wizard.config.components).map(arn => ( + + {' '}• {arn} + + ))} + + + + )} + + {isBranchNameStep && ( + wizard.goBack()} + /> + )} + + {isCommitMessageStep && ( + wizard.goBack()} + /> + )} + + {isConfirmStep && ( + ({ + label: ` ${arn.split('/').pop() ?? arn}`, + value: Object.keys(comp.configuration).join(', '), + })), + { label: 'Branch', value: wizard.config.branchName || 'mainline' }, + { label: 'Message', value: wizard.config.commitMessage || `Create ${wizard.config.name}` }, + ]} + /> + )} + + + ); +} diff --git a/src/cli/tui/screens/config-bundle/index.ts b/src/cli/tui/screens/config-bundle/index.ts new file mode 100644 index 000000000..831a3c94e --- /dev/null +++ b/src/cli/tui/screens/config-bundle/index.ts @@ -0,0 +1 @@ +export { AddConfigBundleFlow } from './AddConfigBundleFlow'; diff --git a/src/cli/tui/screens/config-bundle/types.ts b/src/cli/tui/screens/config-bundle/types.ts new file mode 100644 index 000000000..dba1ba4e7 --- /dev/null +++ b/src/cli/tui/screens/config-bundle/types.ts @@ -0,0 +1,57 @@ +import type { ComponentConfigurationMap } from '../../../../schema'; + +// ───────────────────────────────────────────────────────────────────────────── +// Config Bundle Wizard Types +// ───────────────────────────────────────────────────────────────────────────── + +export type AddConfigBundleStep = + | 'name' + | 'description' + | 'componentType' + | 'componentSelect' + | 'configuration' + | 'addAnother' + | 'branchName' + | 'commitMessage' + | 'confirm'; + +export type ComponentType = 'runtime' | 'gateway'; + +export interface DeployedComponent { + name: string; + arn: string; + type: ComponentType; + /** True when the resource is not yet deployed — ARN is a placeholder resolved at deploy time. */ + isPlaceholder?: boolean; +} + +export interface AddConfigBundleConfig { + name: string; + description: string; + components: ComponentConfigurationMap; + /** Raw text entered by user (JSON string or file path). */ + componentsRaw: string; + branchName: string; + commitMessage: string; + /** Currently selected component type in wizard. */ + currentComponentType?: ComponentType; + /** Currently selected component ARN in wizard. */ + currentComponentArn?: string; +} + +export const CONFIG_BUNDLE_STEP_LABELS: Record = { + name: 'Name', + description: 'Description', + componentType: 'Type', + componentSelect: 'Component', + configuration: 'Config', + addAnother: 'More?', + branchName: 'Branch', + commitMessage: 'Message', + confirm: 'Confirm', +}; + +export const COMPONENT_TYPE_OPTIONS = [ + { id: 'runtime', title: 'Agent Runtime', description: 'Configure an agent runtime' }, + { id: 'gateway', title: 'HTTP Gateway', description: 'Configure an HTTP gateway' }, +] as const; diff --git a/src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts b/src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts new file mode 100644 index 000000000..daa79173b --- /dev/null +++ b/src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts @@ -0,0 +1,113 @@ +import type { ComponentConfigurationMap } from '../../../../schema'; +import type { AddConfigBundleConfig, AddConfigBundleStep, ComponentType } from './types'; +import { useCallback, useState } from 'react'; + +const ALL_STEPS: AddConfigBundleStep[] = [ + 'name', + 'description', + 'componentType', + 'componentSelect', + 'configuration', + 'addAnother', + 'branchName', + 'commitMessage', + 'confirm', +]; + +function getDefaultConfig(): AddConfigBundleConfig { + return { + name: '', + description: '', + components: {}, + componentsRaw: '', + branchName: 'mainline', + commitMessage: '', + }; +} + +export function useAddConfigBundleWizard() { + const [config, setConfig] = useState(getDefaultConfig); + const [step, setStep] = useState('name'); + + const currentIndex = ALL_STEPS.indexOf(step); + + const goBack = useCallback(() => { + const prevStep = ALL_STEPS[currentIndex - 1]; + if (prevStep) setStep(prevStep); + }, [currentIndex]); + + const setName = useCallback((name: string) => { + setConfig(c => ({ ...c, name })); + setStep('description'); + }, []); + + const setDescription = useCallback((description: string) => { + setConfig(c => ({ ...c, description })); + setStep('componentType'); + }, []); + + const setComponentType = useCallback((componentType: ComponentType) => { + setConfig(c => ({ ...c, currentComponentType: componentType, currentComponentArn: undefined })); + setStep('componentSelect'); + }, []); + + const setSelectedComponent = useCallback((arn: string) => { + setConfig(c => ({ ...c, currentComponentArn: arn })); + setStep('configuration'); + }, []); + + const setConfiguration = useCallback((configuration: Record) => { + setConfig(c => { + const arn = c.currentComponentArn; + if (!arn) return c; + const updatedComponents: ComponentConfigurationMap = { + ...c.components, + [arn]: { configuration }, + }; + return { ...c, components: updatedComponents }; + }); + setStep('addAnother'); + }, []); + + const addAnotherComponent = useCallback(() => { + setConfig(c => ({ ...c, currentComponentType: undefined, currentComponentArn: undefined })); + setStep('componentType'); + }, []); + + const doneAddingComponents = useCallback(() => { + setStep('branchName'); + }, []); + + const setBranchName = useCallback((branchName: string) => { + setConfig(c => ({ ...c, branchName })); + setStep('commitMessage'); + }, []); + + const setCommitMessage = useCallback((commitMessage: string) => { + setConfig(c => ({ ...c, commitMessage })); + setStep('confirm'); + }, []); + + const reset = useCallback(() => { + setConfig(getDefaultConfig()); + setStep('name'); + }, []); + + return { + config, + step, + steps: ALL_STEPS, + currentIndex, + goBack, + setName, + setDescription, + setComponentType, + setSelectedComponent, + setConfiguration, + addAnotherComponent, + doneAddingComponents, + setBranchName, + setCommitMessage, + reset, + }; +} diff --git a/src/cli/tui/screens/create/useCreateFlow.ts b/src/cli/tui/screens/create/useCreateFlow.ts index c9ec5076b..bc3131521 100644 --- a/src/cli/tui/screens/create/useCreateFlow.ts +++ b/src/cli/tui/screens/create/useCreateFlow.ts @@ -3,6 +3,7 @@ import type { DeployedState } from '../../../../schema'; import { getErrorMessage } from '../../../errors'; import { CreateLogger } from '../../../logging'; import { initGitRepo, setupPythonProject, writeEnvFile, writeGitignore } from '../../../operations'; +import { createConfigBundleForAgent } from '../../../operations/agent/config-bundle-defaults'; import { mapGenerateConfigToRenderConfig, mapModelProviderToCredentials, @@ -306,6 +307,7 @@ export function useCreateFlow(cwd: string): CreateFlowState { idleRuntimeSessionTimeout: addAgentConfig.idleRuntimeSessionTimeout, maxLifetime: addAgentConfig.maxLifetime, sessionStorageMountPath: addAgentConfig.sessionStorageMountPath, + withConfigBundle: addAgentConfig.withConfigBundle, }; logger.logSubStep(`Framework: ${generateConfig.sdk}`); @@ -368,6 +370,11 @@ export function useCreateFlow(cwd: string): CreateFlowState { () => configIO.readProjectSpec() ); } + // Auto-create config bundle when opted in + if (addAgentConfig.withConfigBundle) { + logger.logSubStep('Creating config bundle...'); + await createConfigBundleForAgent(addAgentConfig.name, configBaseDir); + } } else if (addAgentConfig.agentType === 'import') { // Import path: delegate to executeImportAgent logger.logSubStep(`Importing from Bedrock Agent: ${addAgentConfig.bedrockAgentId}`); diff --git a/src/cli/tui/screens/deploy/DeployScreen.tsx b/src/cli/tui/screens/deploy/DeployScreen.tsx index 00ab0d57e..729d14656 100644 --- a/src/cli/tui/screens/deploy/DeployScreen.tsx +++ b/src/cli/tui/screens/deploy/DeployScreen.tsx @@ -76,6 +76,8 @@ export function DeployScreen({ diffSummaries, numStacksWithChanges, deployNotes, + postDeployWarnings, + postDeployHasError, isDiffLoading, requestDiff, hasError, @@ -342,7 +344,13 @@ export function DeployScreen({ {/* Show deploy status when deploying or complete */} {showDeployStatus && ( - + )} @@ -375,6 +383,20 @@ export function DeployScreen({ )} + {allSuccess && postDeployWarnings.length > 0 && ( + + + Post-deploy warnings: + + {postDeployWarnings.map((w, i) => ( + + {' '} + {w} + + ))} + + )} + {allSuccess && deployNotes.length > 0 && ( {deployNotes.map((note, i) => ( diff --git a/src/cli/tui/screens/deploy/useDeployFlow.ts b/src/cli/tui/screens/deploy/useDeployFlow.ts index a1f8d460c..33531e521 100644 --- a/src/cli/tui/screens/deploy/useDeployFlow.ts +++ b/src/cli/tui/screens/deploy/useDeployFlow.ts @@ -17,6 +17,13 @@ import { ExecLogger } from '../../../logging'; import { performStackTeardown, setupTransactionSearch } from '../../../operations/deploy'; import { getGatewayTargetStatuses } from '../../../operations/deploy/gateway-status'; import { createDeploymentManager } from '../../../operations/deploy/imperative'; +import { deleteOrphanedABTests, setupABTests } from '../../../operations/deploy/post-deploy-ab-tests'; +import { + resolveConfigBundleComponentKeys, + setupConfigBundles, +} from '../../../operations/deploy/post-deploy-config-bundles'; +import { setupHttpGateways } from '../../../operations/deploy/post-deploy-http-gateways'; +import { enableOnlineEvalConfigs } from '../../../operations/deploy/post-deploy-online-evals'; import { type StackDiffSummary, type Step, @@ -85,6 +92,10 @@ interface DeployFlowState { numStacksWithChanges?: number; /** Notes to display after successful deploy (e.g., transaction search info) */ deployNotes: string[]; + /** Warnings from post-deploy steps (config bundles, AB tests) */ + postDeployWarnings: string[]; + /** True if any post-deploy sub-resource operation had errors */ + postDeployHasError: boolean; /** Whether an on-demand diff is currently running */ isDiffLoading: boolean; /** Request an on-demand diff (lazy: runs once, caches result) */ @@ -136,6 +147,8 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState const [numStacksWithChanges, setNumStacksWithChanges] = useState(); const [isDiffLoading, setIsDiffLoading] = useState(false); const [deployNotes, setDeployNotes] = useState([]); + const [postDeployWarnings, setPostDeployWarnings] = useState([]); + const [postDeployHasError, setPostDeployHasError] = useState(false); const isDiffRunningRef = useRef(false); const [deployOutput, setDeployOutput] = useState(null); const [deployMessages, setDeployMessages] = useState([]); @@ -278,8 +291,14 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState const evaluators = parseEvaluatorOutputs(outputs, evaluatorNames); // Parse online eval config outputs - const onlineEvalNames = (ctx.projectSpec.onlineEvalConfigs ?? []).map((c: { name: string }) => c.name); - const onlineEvalConfigs = parseOnlineEvalOutputs(outputs, onlineEvalNames); + const onlineEvalSpecs = (ctx.projectSpec.onlineEvalConfigs ?? []).map( + (c: { name: string; agent?: string; endpoint?: string }) => ({ + name: c.name, + agent: c.agent, + endpoint: c.endpoint, + }) + ); + const onlineEvalConfigs = parseOnlineEvalOutputs(outputs, onlineEvalSpecs); // Parse policy engine outputs const policyEngineSpecs = ctx.projectSpec.policyEngines ?? []; @@ -331,7 +350,7 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState // Persist state BEFORE updating React step status — React state updates can // interrupt this async callback by triggering re-renders that dispose resources. - const deployedState = buildDeployedState({ + let deployedState = buildDeployedState({ targetName: target.name, stackName: currentStackName, agents, @@ -361,6 +380,210 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState throw new Error(`Harness deployment failed: ${harnessDeployError}`); } + // Post-deploy: Enable online eval configs that have enableOnCreate (CFN deploys them as DISABLED). + // Only enable configs that are newly deployed — skip configs that already existed before this + // deploy run, so we don't re-enable configs a customer intentionally disabled. + const onlineEvalFullSpecs = ctx.projectSpec.onlineEvalConfigs ?? []; + const deployedOnlineEvalConfigs = deployedState.targets?.[target.name]?.resources?.onlineEvalConfigs ?? {}; + const previouslyDeployedOnlineEvals = existingState?.targets?.[target.name]?.resources?.onlineEvalConfigs ?? {}; + const newOnlineEvalFullSpecs = onlineEvalFullSpecs.filter(c => !previouslyDeployedOnlineEvals[c.name]); + if (newOnlineEvalFullSpecs.length > 0 && Object.keys(deployedOnlineEvalConfigs).length > 0) { + try { + const enableResult = await enableOnlineEvalConfigs({ + region: target.region, + onlineEvalConfigs: newOnlineEvalFullSpecs, + deployedOnlineEvalConfigs, + }); + + if (enableResult.hasErrors) { + const errors = enableResult.results.filter(r => r.status === 'error'); + for (const err of errors) { + logger.log(`Online eval enable "${err.configName}" error: ${err.error}`, 'warn'); + } + setPostDeployHasError(true); + setPostDeployWarnings(prev => [ + ...prev, + ...errors.map(err => `Online eval "${err.configName}": ${err.error}`), + ]); + } + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + logger.log(`Online eval enable failed: ${message}`, 'warn'); + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, `Online eval enable failed: ${message}`]); + } + } + + // Post-deploy: Create/update configuration bundles + const configBundleSpecs = ctx.projectSpec.configBundles ?? []; + if (configBundleSpecs.length > 0) { + try { + // Resolve component key placeholders (e.g., {{runtime:name}} → real ARN) + const resolvedProjectSpec = resolveConfigBundleComponentKeys(ctx.projectSpec, deployedState, target.name); + const existingConfigBundles = deployedState.targets?.[target.name]?.resources?.configBundles; + const configBundleResult = await setupConfigBundles({ + region: target.region, + projectSpec: resolvedProjectSpec, + existingBundles: existingConfigBundles, + }); + + // Merge config bundle state into deployed state + if (Object.keys(configBundleResult.configBundles).length > 0) { + const updatedState = await configIO.readDeployedState().catch(() => deployedState); + const targetResources = updatedState.targets[target.name]?.resources; + if (targetResources) { + targetResources.configBundles = configBundleResult.configBundles; + await configIO.writeDeployedState(updatedState); + } + } + + if (configBundleResult.hasErrors) { + const errors = configBundleResult.results.filter(r => r.status === 'error'); + for (const err of errors) { + logger.log(`Config bundle "${err.bundleName}" setup error: ${err.error}`, 'warn'); + } + setPostDeployHasError(true); + setPostDeployWarnings(prev => [ + ...prev, + ...errors.map(err => `Config bundle "${err.bundleName}": ${err.error}`), + ]); + } + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + logger.log(`Config bundle setup failed: ${message}`, 'warn'); + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, `Config bundle setup failed: ${message}`]); + } + } + + // Pre-gateway: Delete orphaned AB tests so their gateway rules are cleaned up + // before we attempt to delete orphaned HTTP gateways. + const existingABTests = deployedState.targets?.[target.name]?.resources?.abTests; + if (existingABTests && Object.keys(existingABTests).length > 0) { + try { + const deleteResult = await deleteOrphanedABTests({ + region: target.region, + projectSpec: ctx.projectSpec, + existingABTests, + }); + + if (deleteResult.hasErrors) { + const errors = deleteResult.results.filter(r => r.status === 'error'); + for (const err of errors) { + logger.log(`AB test delete "${err.testName}" error: ${err.error}`, 'warn'); + } + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, ...errors.map(err => `AB test "${err.testName}": ${err.error}`)]); + } + + // Surface warnings (e.g., "AB test was stopped before deletion") + for (const r of deleteResult.results) { + if (r.warning) { + logger.log(r.warning, 'warn'); + setPostDeployWarnings(prev => [...prev, r.warning!]); + } + } + + // Update deployed state to remove deleted AB tests + if (deleteResult.results.some(r => r.status === 'deleted')) { + const updatedState = await configIO.readDeployedState().catch(() => deployedState); + const targetResources = updatedState.targets[target.name]?.resources; + if (targetResources?.abTests) { + for (const r of deleteResult.results) { + if (r.status === 'deleted') delete targetResources.abTests[r.testName]; + } + await configIO.writeDeployedState(updatedState); + deployedState = updatedState; + } + } + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + logger.log(`AB test orphan cleanup failed: ${message}`, 'warn'); + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, `AB test orphan cleanup failed: ${message}`]); + } + } + + // Post-deploy: Create/update HTTP gateways + const httpGatewaySpecs = ctx.projectSpec.httpGateways ?? []; + const existingHttpGateways = deployedState.targets?.[target.name]?.resources?.httpGateways; + if (httpGatewaySpecs.length > 0 || Object.keys(existingHttpGateways ?? {}).length > 0) { + try { + const deployedResources = deployedState.targets?.[target.name]?.resources; + const httpGatewayResult = await setupHttpGateways({ + region: target.region, + projectName: ctx.projectSpec.name, + projectSpec: ctx.projectSpec, + existingHttpGateways, + deployedResources, + }); + + // Always merge HTTP gateway state (even if empty, to clear deleted gateways) + const updatedState = await configIO.readDeployedState().catch(() => deployedState); + const targetResources = updatedState.targets[target.name]?.resources; + if (targetResources) { + targetResources.httpGateways = httpGatewayResult.httpGateways; + await configIO.writeDeployedState(updatedState); + deployedState = updatedState; + } + + if (httpGatewayResult.hasErrors) { + const errors = httpGatewayResult.results.filter(r => r.status === 'error'); + for (const err of errors) { + logger.log(`HTTP gateway "${err.gatewayName}" setup error: ${err.error}`, 'warn'); + } + setPostDeployHasError(true); + setPostDeployWarnings(prev => [ + ...prev, + ...errors.map(err => `HTTP gateway "${err.gatewayName}": ${err.error}`), + ]); + } + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + logger.log(`HTTP gateway setup failed: ${message}`, 'warn'); + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, `HTTP gateway setup failed: ${message}`]); + } + } + + // Post-deploy: Create/update AB tests + const abTestSpecs = ctx.projectSpec.abTests ?? []; + if (abTestSpecs.length > 0) { + try { + const existingABTests = deployedState.targets?.[target.name]?.resources?.abTests; + const deployedResources = deployedState.targets?.[target.name]?.resources; + const abTestResult = await setupABTests({ + region: target.region, + projectSpec: ctx.projectSpec, + existingABTests, + deployedResources, + }); + + if (Object.keys(abTestResult.abTests).length > 0) { + const updatedState = await configIO.readDeployedState().catch(() => deployedState); + const targetResources = updatedState.targets[target.name]?.resources; + if (targetResources) { + targetResources.abTests = abTestResult.abTests; + await configIO.writeDeployedState(updatedState); + } + } + + if (abTestResult.hasErrors) { + const errors = abTestResult.results.filter(r => r.status === 'error'); + for (const err of errors) { + logger.log(`AB test "${err.testName}" setup error: ${err.error}`, 'warn'); + } + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, ...errors.map(err => `AB test "${err.testName}": ${err.error}`)]); + } + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + logger.log(`AB test setup failed: ${message}`, 'warn'); + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, `AB test setup failed: ${message}`]); + } + } + // Query gateway target sync statuses (non-blocking) const allStatuses: { name: string; status: string }[] = []; for (const [, gateway] of Object.entries(gateways)) { @@ -746,6 +969,8 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState diffSummaries, numStacksWithChanges, deployNotes, + postDeployWarnings, + postDeployHasError, isDiffLoading, requestDiff, stackOutputs, diff --git a/src/cli/tui/screens/eval/EvalHubScreen.tsx b/src/cli/tui/screens/eval/EvalHubScreen.tsx index 67056413d..27cb2e66f 100644 --- a/src/cli/tui/screens/eval/EvalHubScreen.tsx +++ b/src/cli/tui/screens/eval/EvalHubScreen.tsx @@ -4,7 +4,7 @@ import { HELP_TEXT } from '../../constants'; import { useListNavigation } from '../../hooks'; import React, { useMemo } from 'react'; -type EvalHubView = 'run-eval' | 'runs' | 'online-dashboard'; +type EvalHubView = 'run-eval' | 'runs' | 'run-batch-eval' | 'batch-eval-history' | 'online-dashboard'; interface EvalHubScreenProps { onSelect: (view: EvalHubView) => void; @@ -19,7 +19,17 @@ export function EvalHubScreen({ onSelect, onExit }: EvalHubScreenProps) { title: 'Run On-demand Evaluation', description: 'Evaluate agent traces with selected evaluators', }, - { id: 'runs', title: 'Eval Runs', description: 'View past eval run results and scores' }, + { id: 'runs', title: 'Eval Runs', description: 'View past on-demand eval results and scores' }, + { + id: 'run-batch-eval', + title: 'Run Batch Evaluation', + description: 'Run a batch evaluation against agent sessions via CloudWatch', + }, + { + id: 'batch-eval-history', + title: 'Batch Eval History', + description: 'View past batch evaluation results (local)', + }, { id: 'online-dashboard', title: 'Online Eval Dashboard', diff --git a/src/cli/tui/screens/generate/GenerateWizardUI.tsx b/src/cli/tui/screens/generate/GenerateWizardUI.tsx index 4b61e4689..9c6c79599 100644 --- a/src/cli/tui/screens/generate/GenerateWizardUI.tsx +++ b/src/cli/tui/screens/generate/GenerateWizardUI.tsx @@ -577,6 +577,12 @@ function ConfirmView({ config, credentialProjectName }: { config: GenerateConfig {config.sessionStorageMountPath} )} + {config.withConfigBundle && ( + + Config Bundle: + Yes (auto-created on deploy) + + )} ); diff --git a/src/cli/tui/screens/generate/types.ts b/src/cli/tui/screens/generate/types.ts index 697b318bd..1b764ea80 100644 --- a/src/cli/tui/screens/generate/types.ts +++ b/src/cli/tui/screens/generate/types.ts @@ -64,6 +64,8 @@ export interface GenerateConfig { maxLifetime?: number; /** Mount path for session filesystem storage (e.g. /mnt/session-storage) */ sessionStorageMountPath?: string; + /** When true, create a config bundle wired into the agent template */ + withConfigBundle?: boolean; } /** Base steps - apiKey, memory, subnets, securityGroups are conditionally added based on selections */ @@ -158,7 +160,14 @@ export const NETWORK_MODE_OPTIONS = [ { id: 'VPC', title: 'VPC', description: 'Attach to your VPC' }, ] as const; -export type AdvancedSettingId = 'dockerfile' | 'network' | 'headers' | 'auth' | 'lifecycle' | 'filesystem'; +export type AdvancedSettingId = + | 'dockerfile' + | 'network' + | 'headers' + | 'auth' + | 'lifecycle' + | 'filesystem' + | 'configBundle'; export const ADVANCED_SETTING_OPTIONS = [ { id: 'dockerfile', title: 'Custom Dockerfile', description: 'Specify a custom Dockerfile path' }, @@ -167,6 +176,11 @@ export const ADVANCED_SETTING_OPTIONS = [ { id: 'auth', title: 'Custom auth (JWT)', description: 'OIDC-based token validation for inbound requests' }, { id: 'lifecycle', title: 'Lifecycle timeouts', description: 'Idle timeout & max instance lifetime' }, { id: 'filesystem', title: 'Session filesystem storage', description: 'Persist files across session stop/resume' }, + { + id: 'configBundle', + title: 'Config bundle [preview]', + description: 'Manage system prompt and tool config without redeploying', + }, ] as const; /** Dockerfile filename regex — must match the Zod schema in agent-env.ts */ diff --git a/src/cli/tui/screens/generate/useGenerateWizard.ts b/src/cli/tui/screens/generate/useGenerateWizard.ts index 16e016ad6..411cfb151 100644 --- a/src/cli/tui/screens/generate/useGenerateWizard.ts +++ b/src/cli/tui/screens/generate/useGenerateWizard.ts @@ -89,6 +89,7 @@ export function useGenerateWizard(options?: UseGenerateWizardOptions) { if (advancedSettings.has('filesystem')) { subSteps.push('sessionStorageMountPath'); } + // Config bundle — no sub-steps needed, uses smart defaults filtered = [...filtered.slice(0, afterAdvanced), ...subSteps, ...filtered.slice(afterAdvanced)]; } // Add jwtConfig step after authorizerType when CUSTOM_JWT is selected @@ -234,9 +235,12 @@ export function useGenerateWizard(options?: UseGenerateWizardOptions) { idleRuntimeSessionTimeout: undefined, maxLifetime: undefined, sessionStorageMountPath: undefined, + withConfigBundle: undefined, })); setStep('confirm'); } else { + // Config bundle has no sub-steps — set flag immediately + setConfig(c => ({ ...c, withConfigBundle: selected.has('configBundle') || undefined })); // Navigate to first advanced sub-step — determined by the steps memo on next render. // Use setTimeout so the steps memo recalculates with the new advancedSettings first. setTimeout(() => { diff --git a/src/cli/tui/screens/home/HelpScreen.tsx b/src/cli/tui/screens/home/HelpScreen.tsx index 485c263c7..71b8d56e8 100644 --- a/src/cli/tui/screens/home/HelpScreen.tsx +++ b/src/cli/tui/screens/home/HelpScreen.tsx @@ -218,11 +218,11 @@ export function HelpScreen(props: { const interactiveItems = useMemo((): DisplayItem[] => { return commands.filter(cmd => !cmd.cliOnly).flatMap(filterCommand); - }, [commands, query]); + }, [commands, filterCommand]); const cliOnlyItems = useMemo((): DisplayItem[] => { return commands.filter(cmd => cmd.cliOnly).flatMap(filterCommand); - }, [commands, query]); + }, [commands, filterCommand]); const visibleCliOnlyItems = query ? cliOnlyItems : showCliOnly ? cliOnlyItems : []; diff --git a/src/cli/tui/screens/identity/useCreateIdentity.ts b/src/cli/tui/screens/identity/useCreateIdentity.ts index 42aace21b..e214d1e67 100644 --- a/src/cli/tui/screens/identity/useCreateIdentity.ts +++ b/src/cli/tui/screens/identity/useCreateIdentity.ts @@ -2,6 +2,7 @@ import { ConfigIO } from '../../../../lib'; import type { Credential } from '../../../../schema'; import type { AddCredentialOptions } from '../../../primitives/CredentialPrimitive'; import { credentialPrimitive } from '../../../primitives/registry'; +import { withAddTelemetry } from '../../../telemetry/cli-command-run.js'; import { useCallback, useEffect, useState } from 'react'; interface CreateStatus { @@ -16,7 +17,13 @@ export function useCreateIdentity() { const create = useCallback(async (config: AddCredentialOptions) => { setStatus({ state: 'loading' }); try { - const result = await credentialPrimitive.add(config); + const result = await withAddTelemetry( + 'add.credential', + { + credential_type: config.authorizerType === 'OAuthCredentialProvider' ? 'oauth' : 'api-key', + }, + () => credentialPrimitive.add(config) + ); if (!result.success) { throw new Error(result.error ?? 'Failed to create credential'); } diff --git a/src/cli/tui/screens/import/ArnInputScreen.tsx b/src/cli/tui/screens/import/ArnInputScreen.tsx index 11b921a90..9381ca7b8 100644 --- a/src/cli/tui/screens/import/ArnInputScreen.tsx +++ b/src/cli/tui/screens/import/ArnInputScreen.tsx @@ -4,8 +4,7 @@ import { Screen } from '../../components/Screen'; import { TextInput } from '../../components/TextInput'; import { HELP_TEXT } from '../../constants'; -const ARN_PATTERN = - /^arn:[^:]+:bedrock-agentcore:[^:]+:[^:]+:(runtime|memory|evaluator|online-evaluation-config|gateway)\/.+$/; +const ARN_PATTERN = /^arn:[^:]+:bedrock-agentcore:[^:]+:[^:]+:(runtime|memory|evaluator|online-evaluation-config)\/.+$/; function validateArn(value: string): true | string { if (!ARN_PATTERN.test(value)) { diff --git a/src/cli/tui/screens/invoke/useInvokeFlow.ts b/src/cli/tui/screens/invoke/useInvokeFlow.ts index 522a4b7a7..1f872370b 100644 --- a/src/cli/tui/screens/invoke/useInvokeFlow.ts +++ b/src/cli/tui/screens/invoke/useInvokeFlow.ts @@ -49,6 +49,7 @@ export interface InvokeConfig { networkMode?: NetworkMode; protocol?: ProtocolMode; authorizerType?: RuntimeAuthorizerType; + baggage?: string; }[]; harnesses: { name: string; @@ -150,9 +151,24 @@ export function useInvokeFlow(options: InvokeFlowOptions = {}): InvokeFlowState } const runtimes: InvokeConfig['runtimes'] = []; + const deployedBundles = targetState?.resources?.configBundles ?? {}; for (const agent of project.runtimes) { const state = targetState?.resources?.runtimes?.[agent.name]; if (!state) continue; + + // Build config bundle baggage if a bundle is associated with this agent + let baggage: string | undefined; + const bundleSpec = project.configBundles?.find(b => { + const keys = Object.keys(b.components ?? {}); + return keys.some(k => k === `{{runtime:${agent.name}}}`); + }); + if (bundleSpec) { + const bundleState = deployedBundles[bundleSpec.name]; + if (bundleState?.bundleArn && bundleState?.versionId) { + baggage = `aws.agentcore.configbundle_arn=${encodeURIComponent(bundleState.bundleArn)},aws.agentcore.configbundle_version=${encodeURIComponent(bundleState.versionId)}`; + } + } + runtimes.push({ name: agent.name, state, @@ -160,6 +176,7 @@ export function useInvokeFlow(options: InvokeFlowOptions = {}): InvokeFlowState networkMode: agent.networkMode, protocol: agent.protocol, authorizerType: agent.authorizerType, + baggage, }); } @@ -673,6 +690,7 @@ export function useInvokeFlow(options: InvokeFlowOptions = {}): InvokeFlowState logger, headers, bearerToken: bearerToken || undefined, + baggage: agent.baggage, }); if (result.sessionId) { diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx index 92c56d90e..243eba4e7 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx @@ -3,16 +3,17 @@ import { validateAwsCredentials } from '../../../aws/account'; import { listEvaluators } from '../../../aws/agentcore-control'; import { detectRegion } from '../../../aws/region'; import { getErrorMessage } from '../../../errors'; -import { ErrorPrompt } from '../../components'; +import { ErrorPrompt, GradientText } from '../../components'; import { useCreateOnlineEval, useExistingOnlineEvalNames } from '../../hooks/useCreateOnlineEval'; import { AddSuccessScreen } from '../add/AddSuccessScreen'; +import type { RuntimeInfoForEval } from './AddOnlineEvalScreen'; import { AddOnlineEvalScreen } from './AddOnlineEvalScreen'; import type { AddOnlineEvalConfig, EvaluatorItem } from './types'; import React, { useCallback, useEffect, useState } from 'react'; type FlowState = | { name: 'loading' } - | { name: 'create-wizard'; evaluators: EvaluatorItem[]; agentNames: string[] } + | { name: 'create-wizard'; evaluators: EvaluatorItem[]; agentNames: string[]; runtimes: RuntimeInfoForEval[] } | { name: 'create-success'; configName: string } | { name: 'creds-error'; message: string } | { name: 'error'; message: string }; @@ -55,7 +56,8 @@ export function AddOnlineEvalFlow({ isInteractive = true, onExit, onBack, onDev, description: e.description, })); - const agentNames = projectSpec.runtimes.map(a => a.name); + const runtimesList = projectSpec.runtimes ?? []; + const agentNames = runtimesList.map(a => a.name); if (agentNames.length === 0) { setFlow({ @@ -65,7 +67,16 @@ export function AddOnlineEvalFlow({ isInteractive = true, onExit, onBack, onDev, return; } - setFlow({ name: 'create-wizard', evaluators: items, agentNames }); + // Build runtime info with endpoints for the endpoint picker + const runtimesInfo: RuntimeInfoForEval[] = runtimesList.map(r => ({ + name: r.name, + endpoints: Object.entries(r.endpoints ?? {}).map(([epName, ep]) => ({ + name: epName, + version: ep.version, + })), + })); + + setFlow({ name: 'create-wizard', evaluators: items, agentNames, runtimes: runtimesInfo }); } catch (err) { if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); } @@ -96,7 +107,7 @@ export function AddOnlineEvalFlow({ isInteractive = true, onExit, onBack, onDev, ); if (flow.name === 'loading') { - return null; + return ; } if (flow.name === 'creds-error') { @@ -109,6 +120,7 @@ export function AddOnlineEvalFlow({ isInteractive = true, onExit, onBack, onDev, existingConfigNames={existingConfigNames} evaluatorItems={flow.evaluators} agentNames={flow.agentNames} + runtimes={flow.runtimes} onComplete={handleCreateComplete} onExit={onBack} /> diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index fc863a2d1..fd5fafcf6 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -12,11 +12,17 @@ import { import { HELP_TEXT } from '../../constants'; import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; import { generateUniqueName } from '../../utils'; -import type { AddOnlineEvalConfig, EvaluatorItem } from './types'; +import type { AddOnlineEvalConfig, EvaluatorItem, RuntimeEndpointEntry } from './types'; import { DEFAULT_SAMPLING_RATE, ONLINE_EVAL_STEP_LABELS } from './types'; import { useAddOnlineEvalWizard } from './useAddOnlineEvalWizard'; import { Box, Text } from 'ink'; -import React, { useMemo } from 'react'; +import React, { useCallback, useEffect, useMemo } from 'react'; + +/** Runtime info with endpoints, passed from the parent flow. */ +export interface RuntimeInfoForEval { + name: string; + endpoints: RuntimeEndpointEntry[]; +} interface AddOnlineEvalScreenProps { onComplete: (config: AddOnlineEvalConfig) => void; @@ -24,6 +30,8 @@ interface AddOnlineEvalScreenProps { existingConfigNames: string[]; evaluatorItems: EvaluatorItem[]; agentNames: string[]; + /** Runtime info including endpoints for the endpoint picker step. */ + runtimes?: RuntimeInfoForEval[]; } export function AddOnlineEvalScreen({ @@ -32,6 +40,7 @@ export function AddOnlineEvalScreen({ existingConfigNames, evaluatorItems: rawEvaluatorItems, agentNames, + runtimes = [], }: AddOnlineEvalScreenProps) { const wizard = useAddOnlineEvalWizard(agentNames.length); @@ -43,6 +52,36 @@ export function AddOnlineEvalScreen({ return wizard.config; }, [wizard.config, agentNames]); + // Determine endpoints for the currently selected agent + const agentEndpoints = useMemo(() => { + const agentName = effectiveConfig.agent; + if (!agentName) return []; + const rt = runtimes.find(r => r.name === agentName); + return rt?.endpoints ?? []; + }, [effectiveConfig.agent, runtimes]); + + // Skip endpoint step when the selected agent has no endpoints + const shouldSkipStep = useCallback( + (s: string) => { + if (s === 'endpoint' && agentEndpoints.length === 0) return true; + return false; + }, + [agentEndpoints.length] + ); + + useEffect(() => { + wizard.setSkipCheck(shouldSkipStep); + }, [shouldSkipStep]); // wizard.setSkipCheck is stable (useCallback with no deps) + + // Build endpoint picker items: DEFAULT (plain) + each endpoint + const endpointItems: SelectableItem[] = useMemo(() => { + const items: SelectableItem[] = [{ id: 'DEFAULT', title: 'DEFAULT' }]; + for (const ep of agentEndpoints) { + items.push({ id: ep.name, title: ep.name, description: `v${ep.version}` }); + } + return items; + }, [agentEndpoints]); + const evaluatorItems: SelectableItem[] = useMemo(() => { return rawEvaluatorItems.map(e => ({ id: e.arn, @@ -57,6 +96,7 @@ export function AddOnlineEvalScreen({ const isNameStep = wizard.step === 'name'; const isAgentStep = wizard.step === 'agent'; + const isEndpointStep = wizard.step === 'endpoint'; const isEvaluatorsStep = wizard.step === 'evaluators'; const isSamplingRateStep = wizard.step === 'samplingRate'; const isEnableOnCreateStep = wizard.step === 'enableOnCreate'; @@ -77,6 +117,16 @@ export function AddOnlineEvalScreen({ isActive: isAgentStep, }); + const endpointNav = useListNavigation({ + items: endpointItems, + onSelect: item => { + // DEFAULT means no endpoint filter — store undefined + wizard.setEndpoint(item.id === 'DEFAULT' ? undefined : item.id); + }, + onExit: () => wizard.goBack(), + isActive: isEndpointStep, + }); + const evaluatorsNav = useMultiSelectNavigation({ items: evaluatorItems, getId: item => item.id, @@ -102,7 +152,7 @@ export function AddOnlineEvalScreen({ const helpText = isEvaluatorsStep ? 'Space toggle · Enter confirm · Esc back' - : isAgentStep || isEnableOnCreateStep + : isAgentStep || isEndpointStep || isEnableOnCreateStep ? HELP_TEXT.NAVIGATE_SELECT : isConfirmStep ? HELP_TEXT.CONFIRM_CANCEL @@ -136,6 +186,14 @@ export function AddOnlineEvalScreen({ /> )} + {isEndpointStep && ( + + )} + {isEvaluatorsStep && ( = { name: 'Name', agent: 'Agent', + endpoint: 'Endpoint', evaluators: 'Evaluators', samplingRate: 'Rate', enableOnCreate: 'Enable', diff --git a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts index 0032469f2..239a95edc 100644 --- a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts +++ b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts @@ -1,40 +1,58 @@ import type { AddOnlineEvalConfig, AddOnlineEvalStep } from './types'; import { DEFAULT_SAMPLING_RATE } from './types'; -import { useCallback, useState } from 'react'; +import { useCallback, useRef, useState } from 'react'; function getAllSteps(agentCount: number): AddOnlineEvalStep[] { if (agentCount <= 1) { - return ['name', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + // endpoint step is included but will be skipped dynamically when no endpoints exist + return ['name', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; } - return ['name', 'agent', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return ['name', 'agent', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; } function getDefaultConfig(): AddOnlineEvalConfig { return { name: '', agent: '', + endpoint: undefined, evaluators: [], samplingRate: DEFAULT_SAMPLING_RATE, enableOnCreate: true, }; } +type StepSkipCheck = (step: AddOnlineEvalStep) => boolean; + export function useAddOnlineEvalWizard(agentCount: number) { const allSteps = getAllSteps(agentCount); const [config, setConfig] = useState(getDefaultConfig); const [step, setStep] = useState(allSteps[0]!); + const skipCheckRef = useRef(() => false); const currentIndex = allSteps.indexOf(step); + const setSkipCheck = useCallback((check: StepSkipCheck) => { + skipCheckRef.current = check; + }, []); + const goBack = useCallback(() => { - const prevStep = allSteps[currentIndex - 1]; - if (prevStep) setStep(prevStep); + for (let i = currentIndex - 1; i >= 0; i--) { + if (!skipCheckRef.current(allSteps[i]!)) { + setStep(allSteps[i]!); + return; + } + } }, [allSteps, currentIndex, setStep]); const nextStep = useCallback( (currentStep: AddOnlineEvalStep): AddOnlineEvalStep | undefined => { const idx = allSteps.indexOf(currentStep); - return allSteps[idx + 1]; + for (let i = idx + 1; i < allSteps.length; i++) { + if (!skipCheckRef.current(allSteps[i]!)) { + return allSteps[i]!; + } + } + return undefined; }, [allSteps] ); @@ -50,13 +68,22 @@ export function useAddOnlineEvalWizard(agentCount: number) { const setAgent = useCallback( (agent: string) => { - setConfig(c => ({ ...c, agent })); + setConfig(c => ({ ...c, agent, endpoint: undefined })); const next = nextStep('agent'); if (next) setStep(next); }, [nextStep, setConfig, setStep] ); + const setEndpoint = useCallback( + (endpoint: string | undefined) => { + setConfig(c => ({ ...c, endpoint })); + const next = nextStep('endpoint'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + const setEvaluators = useCallback( (evaluators: string[]) => { setConfig(c => ({ ...c, evaluators })); @@ -95,8 +122,10 @@ export function useAddOnlineEvalWizard(agentCount: number) { steps: allSteps, currentIndex, goBack, + setSkipCheck, setName, setAgent, + setEndpoint, setEvaluators, setSamplingRate, setEnableOnCreate, diff --git a/src/cli/tui/screens/policy/AddPolicyFlow.tsx b/src/cli/tui/screens/policy/AddPolicyFlow.tsx index 720984563..9b3542cb8 100644 --- a/src/cli/tui/screens/policy/AddPolicyFlow.tsx +++ b/src/cli/tui/screens/policy/AddPolicyFlow.tsx @@ -1,4 +1,6 @@ import { policyEnginePrimitive, policyPrimitive } from '../../../primitives/registry'; +import { withAddTelemetry } from '../../../telemetry/cli-command-run.js'; +import { AttachMode, ValidationMode, standardize } from '../../../telemetry/schemas/common-shapes.js'; import { ErrorPrompt, Panel, @@ -128,7 +130,14 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD }, []); const commitEngine = useCallback(async (engineName: string, gateways?: string[], mode?: 'LOG_ONLY' | 'ENFORCE') => { - const result = await policyEnginePrimitive.add({ name: engineName }); + const result = await withAddTelemetry( + 'add.policy-engine', + { + attach_gateway_count: gateways?.length ?? 0, + attach_mode: standardize(AttachMode, mode ?? 'log_only'), + }, + () => policyEnginePrimitive.add({ name: engineName }) + ); if (!result.success) { setFlow({ name: 'error', message: result.error }); return; @@ -155,13 +164,21 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD ); const handlePolicyComplete = useCallback(async (config: AddPolicyConfig) => { - const result = await policyPrimitive.add({ - name: config.name, - engine: config.engine, - statement: config.statement, - source: config.sourceFile || undefined, - validationMode: config.validationMode, - }); + const result = await withAddTelemetry( + 'add.policy', + { + source_type: config.sourceFile ? 'file' : config.sourceMethod === 'generate' ? 'generate' : 'statement', + validation_mode: standardize(ValidationMode, config.validationMode ?? 'FAIL_ON_ANY_FINDINGS'), + }, + () => + policyPrimitive.add({ + name: config.name, + engine: config.engine, + statement: config.statement, + source: config.sourceFile || undefined, + validationMode: config.validationMode, + }) + ); if (result.success) { setPolicyNames(prev => [...prev, config.name]); diff --git a/src/cli/tui/screens/recommendation/RecommendationFlow.tsx b/src/cli/tui/screens/recommendation/RecommendationFlow.tsx new file mode 100644 index 000000000..b28344df3 --- /dev/null +++ b/src/cli/tui/screens/recommendation/RecommendationFlow.tsx @@ -0,0 +1,552 @@ +import { ConfigIO } from '../../../../lib'; +import type { DeployedState } from '../../../../schema'; +import { validateAwsCredentials } from '../../../aws/account'; +import { listEvaluators } from '../../../aws/agentcore-control'; +import { detectRegion } from '../../../aws/region'; +import { getErrorMessage } from '../../../errors'; +import { applyRecommendationToBundle, runRecommendationCommand } from '../../../operations/recommendation'; +import type { RunRecommendationCommandResult } from '../../../operations/recommendation'; +import { saveRecommendationRun } from '../../../operations/recommendation/recommendation-storage'; +import { ErrorPrompt, GradientText, Panel, Screen, StepProgress } from '../../components'; +import type { Step } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { RecommendationScreen } from './RecommendationScreen'; +import type { + AgentItem, + ConfigBundleField, + ConfigBundleItem, + EvaluatorItem, + RecommendationWizardConfig, +} from './types'; +import { Box, Text } from 'ink'; +import React, { useCallback, useEffect, useState } from 'react'; + +type FlowState = + | { name: 'loading' } + | { name: 'wizard'; agents: AgentItem[]; evaluators: EvaluatorItem[]; configBundles: ConfigBundleItem[] } + | { + name: 'running'; + config: RecommendationWizardConfig; + steps: Step[]; + elapsed: number; + recommendationId?: string; + region?: string; + } + | { name: 'results'; result: RunRecommendationCommandResult; config: RecommendationWizardConfig; filePath?: string } + | { name: 'creds-error'; message: string } + | { name: 'error'; message: string; logFilePath?: string }; + +interface RecommendationFlowProps { + onExit: () => void; +} + +export function RecommendationFlow({ onExit }: RecommendationFlowProps) { + const [flow, setFlow] = useState({ name: 'loading' }); + + // Load agents and evaluators + useEffect(() => { + if (flow.name !== 'loading') return; + let cancelled = false; + + void (async () => { + try { + await validateAwsCredentials(); + } catch (err) { + if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) }); + return; + } + + try { + const configIO = new ConfigIO(); + const [{ region }, deployedState] = await Promise.all([detectRegion(), configIO.readDeployedState()]); + + if (cancelled) return; + + const agents = buildAgentItems(deployedState); + if (agents.length === 0) { + setFlow({ + name: 'error', + message: 'No deployed agents found. Run `agentcore deploy` first.', + }); + return; + } + + const evalResult = await listEvaluators({ region }); + if (cancelled) return; + + const evaluators: EvaluatorItem[] = evalResult.evaluators.map(e => ({ + id: e.evaluatorArn || e.evaluatorName, + title: e.evaluatorName, + description: e.description ?? e.evaluatorType, + })); + + const projectSpec = await configIO.readProjectSpec(); + const configBundles = buildConfigBundleItems(deployedState, projectSpec.configBundles ?? []); + + setFlow({ name: 'wizard', agents, evaluators, configBundles }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); + + const handleRunComplete = useCallback((config: RecommendationWizardConfig) => { + const willFetchSpans = config.traceSource === 'sessions'; + + const initialSteps: Step[] = [ + ...(willFetchSpans ? [{ label: 'Fetching session spans from CloudWatch...', status: 'pending' as const }] : []), + { label: 'Starting recommendation...', status: 'running' }, + { label: 'Polling for results', status: 'pending' }, + { label: 'Saving results', status: 'pending' }, + ]; + + // If auto-fetching, the first step is active + if (willFetchSpans) { + initialSteps[0] = { ...initialSteps[0]!, status: 'running' }; + initialSteps[1] = { ...initialSteps[1]!, status: 'pending' }; + } + + setFlow({ name: 'running', config, steps: initialSteps, elapsed: 0 }); + }, []); + + // Execute the recommendation when entering 'running' state + useEffect(() => { + if (flow.name !== 'running') return; + let cancelled = false; + + const { config } = flow; + const startTime = Date.now(); + + const timer = setInterval(() => { + if (!cancelled) { + setFlow(prev => { + if (prev.name !== 'running') return prev; + return { ...prev, elapsed: Math.floor((Date.now() - startTime) / 1000) }; + }); + } + }, 1000); + + void (async () => { + try { + const result = await runRecommendationCommand({ + type: config.type, + agent: config.agent, + evaluators: config.evaluators, + inputSource: config.inputSource, + inlineContent: config.inputSource === 'inline' ? config.content : undefined, + promptFile: config.inputSource === 'file' ? config.content : undefined, + bundleName: config.inputSource === 'config-bundle' ? config.bundleName : undefined, + bundleVersion: config.inputSource === 'config-bundle' ? config.bundleVersion : undefined, + systemPromptJsonPath: + config.inputSource === 'config-bundle' && config.systemPromptJsonPath + ? config.systemPromptJsonPath + : undefined, + toolDescJsonPaths: + config.inputSource === 'config-bundle' && config.toolDescJsonPaths.length > 0 + ? config.toolDescJsonPaths + : undefined, + tools: config.tools + ? config.tools + .split(/,(?=[a-zA-Z0-9_\-.]+:)/) + .map(t => t.trim()) + .filter(Boolean) + : undefined, + traceSource: config.traceSource, + lookbackDays: config.days, + sessionIds: config.sessionIds.length > 0 ? config.sessionIds : undefined, + onProgress: (status, _message) => { + if (cancelled) return; + const hasFetchStep = config.traceSource === 'sessions'; + const offset = hasFetchStep ? 1 : 0; + + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = [...prev.steps]; + if (status === 'fetching-spans') { + steps[0] = { ...steps[0]!, status: 'running' }; + } else if (status === 'starting') { + if (hasFetchStep) steps[0] = { ...steps[0]!, status: 'success' }; + steps[offset] = { ...steps[offset]!, status: 'running' }; + } else if (status === 'started' || status === 'polling') { + steps[offset] = { ...steps[offset]!, status: 'success' }; + steps[offset + 1] = { ...steps[offset + 1]!, status: 'running' }; + } + return { ...prev, steps }; + }); + }, + onStarted: info => { + setFlow(prev => { + if (prev.name !== 'running') return prev; + return { ...prev, recommendationId: info.recommendationId, region: info.region }; + }); + }, + }); + + clearInterval(timer); + if (cancelled) return; + + if (!result.success) { + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = prev.steps.map(s => + s.status === 'running' ? { ...s, status: 'error' as const, error: result.error } : s + ); + return { ...prev, steps }; + }); + await new Promise(resolve => setTimeout(resolve, 2000)); + if (cancelled) return; + setFlow({ name: 'error', message: result.error ?? 'Recommendation failed', logFilePath: result.logFilePath }); + return; + } + + // Mark polling success, saving running + const hasFetchStep = config.traceSource === 'sessions'; + const offset = hasFetchStep ? 1 : 0; + + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = [...prev.steps]; + steps[offset + 1] = { ...steps[offset + 1]!, status: 'success' }; + steps[offset + 2] = { ...steps[offset + 2]!, status: 'running' }; + return { ...prev, steps }; + }); + + // Save results locally + let filePath: string | undefined; + try { + if (result.recommendationId) { + filePath = saveRecommendationRun( + result.recommendationId, + result, + config.type, + config.agent, + config.evaluators + ); + } + } catch { + // Non-fatal + } + + setFlow({ name: 'results', result, config, filePath }); + } catch (err) { + clearInterval(timer); + if (!cancelled) { + const errorMsg = getErrorMessage(err); + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = prev.steps.map(s => + s.status === 'running' ? { ...s, status: 'error' as const, error: errorMsg } : s + ); + return { ...prev, steps }; + }); + await new Promise(resolve => setTimeout(resolve, 2000)); + setFlow({ name: 'error', message: errorMsg }); + } + } + })(); + + return () => { + cancelled = true; + clearInterval(timer); + }; + }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps + + // ── Render states ───────────────────────────────────────────────────────── + + if (flow.name === 'loading') { + return ( + + + + ); + } + + if (flow.name === 'creds-error') { + return ; + } + + if (flow.name === 'wizard') { + return ( + + ); + } + + if (flow.name === 'running') { + const minutes = Math.floor(flow.elapsed / 60); + const seconds = flow.elapsed % 60; + const timeStr = minutes > 0 ? `${minutes}m ${seconds}s` : `${seconds}s`; + + return ( + + + + + Agent: {flow.config.agent} + {' '} + Evaluator(s):{' '} + {flow.config.evaluators.map(e => (e.includes('/') ? e.split('/').pop()! : e)).join(', ')} + {' '} + ({timeStr}) + + + + + + ); + } + + if (flow.name === 'results') { + return ( + setFlow({ name: 'loading' })} + onExit={onExit} + /> + ); + } + + return ( + setFlow({ name: 'loading' })} + onExit={onExit} + /> + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Results view +// ───────────────────────────────────────────────────────────────────────────── + +interface ResultsViewProps { + result: RunRecommendationCommandResult; + config: RecommendationWizardConfig; + filePath?: string; + onRunAnother: () => void; + onExit: () => void; +} + +function ResultsView({ result, config, filePath, onRunAnother, onExit }: ResultsViewProps) { + const [applyStatus, setApplyStatus] = useState<{ applied: boolean; message: string } | null>(null); + + const isConfigBundle = config.inputSource === 'config-bundle' && config.bundleName; + const hasNewVersion = + !!result.result?.systemPromptRecommendationResult?.configurationBundle || + !!result.result?.toolDescriptionRecommendationResult?.configurationBundle; + const canApply = isConfigBundle && hasNewVersion && result.region && !applyStatus; + + const actions = [ + ...(canApply ? [{ id: 'apply', title: 'Sync new bundle version to local config' }] : []), + { id: 'another', title: 'Run another recommendation' }, + { id: 'back', title: 'Back' }, + ]; + + const handleApply = useCallback(async () => { + if (!result.result || !result.region) return; + try { + const applyResult = await applyRecommendationToBundle({ + bundleArn: config.bundleName, // TUI stores ARN in bundleName + result: result.result, + region: result.region, + }); + if (applyResult.success) { + setApplyStatus({ + applied: true, + message: `New bundle version (${applyResult.newVersionId}) created with recommended changes. Local config updated.`, + }); + } else { + setApplyStatus({ applied: false, message: applyResult.error ?? 'Unknown error' }); + } + } catch (err) { + setApplyStatus({ applied: false, message: getErrorMessage(err) }); + } + }, [result, config]); + + const nav = useListNavigation({ + items: actions, + onSelect: item => { + if (item.id === 'apply') void handleApply(); + else if (item.id === 'another') onRunAnother(); + else onExit(); + }, + onExit, + isActive: true, + }); + + const sysResult = result.result?.systemPromptRecommendationResult; + const toolResult = result.result?.toolDescriptionRecommendationResult; + + return ( + + + + ✓ Recommendation complete + + ID: {result.recommendationId} + {' '} + Agent: {config.agent} + + + {sysResult && ( + + {sysResult.recommendedSystemPrompt && ( + + + Recommended System Prompt: + + + {sysResult.recommendedSystemPrompt} + + + )} + + )} + + {toolResult?.tools && toolResult.tools.length > 0 && ( + + + Recommended Tool Descriptions: + + {toolResult.tools.map(tool => ( + + {tool.toolName} + {tool.recommendedToolDescription} + + ))} + + )} + + {!sysResult && !toolResult && ( + + No recommendation results returned. + + )} + + {filePath && ( + + Results saved to: {filePath} + + )} + + {applyStatus && ( + + {applyStatus.applied ? ( + ✓ {applyStatus.message} + ) : ( + Could not sync: {applyStatus.message} + )} + + )} + + + {actions.map((action, idx) => { + const selected = idx === nav.selectedIndex; + return ( + + {selected ? '❯' : ' '} + + {action.title} + + + ); + })} + + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function buildAgentItems(deployedState: DeployedState): AgentItem[] { + const agents: AgentItem[] = []; + const seen = new Set(); + + for (const target of Object.values(deployedState.targets)) { + const runtimeMap = target.resources?.runtimes; + if (!runtimeMap) continue; + for (const [name, state] of Object.entries(runtimeMap)) { + if (seen.has(name)) continue; + seen.add(name); + agents.push({ name, runtimeId: state.runtimeId, runtimeArn: state.runtimeArn }); + } + } + + return agents; +} + +/** + * Recursively collect all string-valued leaf fields from an object. + * Returns entries with their full dot-notation path and JSONPath equivalent. + * + * The recommendation API resolves JSONPath against the components map directly, + * using dot notation: `$.{componentArn}.configuration.{fieldName}` + */ +function collectStringFields(obj: unknown, prefix: string, jsonPathPrefix: string): ConfigBundleField[] { + const fields: ConfigBundleField[] = []; + if (obj === null || obj === undefined || typeof obj !== 'object') return fields; + + for (const [key, value] of Object.entries(obj as Record)) { + const path = prefix ? `${prefix}.${key}` : key; + const jp = jsonPathPrefix ? `${jsonPathPrefix}.${key}` : key; + if (typeof value === 'string' && value.trim().length > 0) { + fields.push({ path, jsonPath: jp, value }); + } else if (typeof value === 'object' && value !== null && !Array.isArray(value)) { + fields.push(...collectStringFields(value, path, jp)); + } + } + + return fields; +} + +function buildConfigBundleItems( + deployedState: DeployedState, + projectBundles: { name: string; components?: Record }> }[] +): ConfigBundleItem[] { + const bundles: ConfigBundleItem[] = []; + const seen = new Set(); + + for (const target of Object.values(deployedState.targets)) { + const bundleMap = target.resources?.configBundles; + if (!bundleMap) continue; + for (const [name, state] of Object.entries(bundleMap)) { + if (seen.has(name)) continue; + seen.add(name); + + const projBundle = projectBundles.find(pb => pb.name === name); + const fields = projBundle?.components ? collectStringFields(projBundle.components, '', '$') : []; + + bundles.push({ + name, + bundleId: state.bundleId, + bundleArn: state.bundleArn, + versionId: state.versionId, + fields, + }); + } + } + + return bundles; +} diff --git a/src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx b/src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx new file mode 100644 index 000000000..e1ee7e9d9 --- /dev/null +++ b/src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx @@ -0,0 +1,250 @@ +import type { RecommendationRunRecord } from '../../../operations/recommendation/recommendation-storage'; +import { listAllRecommendations } from '../../../operations/recommendation/recommendation-storage'; +import { Panel, Screen } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { Box, Text, useInput, useStdout } from 'ink'; +import React, { useMemo, useState } from 'react'; + +const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; + +function formatShortDate(timestamp: string): string { + const d = new Date(timestamp); + const mon = MONTHS[d.getMonth()]; + const day = d.getDate(); + const h = d.getHours(); + const m = d.getMinutes().toString().padStart(2, '0'); + const ampm = h >= 12 ? 'PM' : 'AM'; + const h12 = h % 12 || 12; + return `${mon} ${day} ${h12}:${m} ${ampm}`; +} + +function shortTypeName(type: string): string { + if (type === 'SYSTEM_PROMPT_RECOMMENDATION') return 'System Prompt'; + if (type === 'TOOL_DESCRIPTION_RECOMMENDATION') return 'Tool Description'; + return type; +} + +function statusColor(status: string): string { + if (status === 'COMPLETED' || status === 'SUCCEEDED') return 'green'; + if (status === 'FAILED') return 'red'; + if (status === 'IN_PROGRESS' || status === 'PENDING') return 'yellow'; + return 'gray'; +} + +const CHROME_LINES = 9; + +// ───────────────────────────────────────────────────────────────────────────── +// List view +// ───────────────────────────────────────────────────────────────────────────── + +function RecommendationListView({ + records, + onSelect, + onExit, + availableHeight, +}: { + records: RecommendationRunRecord[]; + onSelect: (record: RecommendationRunRecord) => void; + onExit: () => void; + availableHeight: number; +}) { + const nav = useListNavigation({ + items: records, + onSelect: item => onSelect(item), + onExit, + isActive: true, + }); + + const maxVisible = Math.max(1, availableHeight - 3); + const visible = useMemo(() => { + let start = 0; + if (nav.selectedIndex >= maxVisible) { + start = nav.selectedIndex - maxVisible + 1; + } + return { items: records.slice(start, start + maxVisible), startIdx: start }; + }, [records, nav.selectedIndex, maxVisible]); + + return ( + + + Recommendation History + + {records.length} recommendation{records.length !== 1 ? 's' : ''} + + + {visible.items.map((rec, vIdx) => { + const idx = visible.startIdx + vIdx; + const selected = idx === nav.selectedIndex; + const date = rec.startedAt ? formatShortDate(rec.startedAt) : 'unknown'; + + return ( + + {selected ? '❯' : ' '} + {date.padEnd(16)} + {rec.status.padEnd(12)} + {shortTypeName(rec.type).padEnd(18)} + {rec.agent} + + ); + })} + {visible.startIdx + maxVisible < records.length && ( + ↓ {records.length - visible.startIdx - maxVisible} more + )} + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Detail view +// ───────────────────────────────────────────────────────────────────────────── + +function RecommendationDetailView({ record, onBack }: { record: RecommendationRunRecord; onBack: () => void }) { + useInput((input, key) => { + if (key.escape || input === 'b') { + onBack(); + } + }); + + const sysResult = record.result?.systemPromptRecommendationResult; + const toolResult = record.result?.toolDescriptionRecommendationResult; + + return ( + + + + ID: {record.recommendationId} + + + Type: {shortTypeName(record.type)} + {' '} + Agent: {record.agent} + {' '} + Status: {record.status} + + + Evaluators: {record.evaluators.join(', ')} + + {record.startedAt && ( + + Started: {new Date(record.startedAt).toLocaleString()} + + )} + {record.completedAt && ( + + Completed: {new Date(record.completedAt).toLocaleString()} + + )} + + {sysResult && ( + + {sysResult.recommendedSystemPrompt && ( + + + Recommended System Prompt: + + + {sysResult.recommendedSystemPrompt} + + + )} + + )} + + {toolResult?.tools && toolResult.tools.length > 0 && ( + + + Recommended Tool Descriptions: + + {toolResult.tools.map(tool => ( + + {tool.toolName} + {tool.recommendedToolDescription} + + ))} + + )} + + {!sysResult && !toolResult && ( + + No recommendation results available. + + )} + + + Press Esc or B to go back + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main screen +// ───────────────────────────────────────────────────────────────────────────── + +interface RecommendationHistoryScreenProps { + onExit: () => void; +} + +export function RecommendationHistoryScreen({ onExit }: RecommendationHistoryScreenProps) { + const { stdout } = useStdout(); + const terminalHeight = stdout?.rows ?? 24; + const availableHeight = Math.max(6, terminalHeight - CHROME_LINES); + + const [selectedRecord, setSelectedRecord] = useState(null); + + const [records, loaded, error] = useMemo(() => { + try { + return [listAllRecommendations(), true, null] as const; + } catch (err) { + return [[] as RecommendationRunRecord[], true, err instanceof Error ? err.message : String(err)] as const; + } + }, []); + + if (!loaded) { + return ( + + Loading... + + ); + } + + if (error) { + return ( + + {error} + + ); + } + + if (records.length === 0) { + return ( + + + No recommendation runs found. + Run `agentcore run recommendation` to create one. + + + ); + } + + const helpText = selectedRecord ? 'Esc/B back to list' : HELP_TEXT.NAVIGATE_SELECT; + + return ( + + {selectedRecord ? ( + setSelectedRecord(null)} /> + ) : ( + + )} + + ); +} diff --git a/src/cli/tui/screens/recommendation/RecommendationScreen.tsx b/src/cli/tui/screens/recommendation/RecommendationScreen.tsx new file mode 100644 index 000000000..adac72ef8 --- /dev/null +++ b/src/cli/tui/screens/recommendation/RecommendationScreen.tsx @@ -0,0 +1,599 @@ +import { detectRegion } from '../../../aws/region'; +import type { SessionInfo } from '../../../operations/eval'; +import { discoverSessions } from '../../../operations/eval'; +import { loadDeployedProjectConfig, resolveAgent } from '../../../operations/resolve-agent'; +import type { SelectableItem } from '../../components'; +import { + ConfirmReview, + GradientText, + Panel, + PathInput, + Screen, + StepIndicator, + TextInput, + WizardMultiSelect, + WizardSelect, +} from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; +import type { AgentItem, ConfigBundleItem, EvaluatorItem, RecommendationWizardConfig } from './types'; +import { DEFAULT_LOOKBACK_DAYS, RECOMMENDATION_STEP_LABELS } from './types'; +import { useRecommendationWizard } from './useRecommendationWizard'; +import { Box, Text } from 'ink'; +import React, { useEffect, useMemo, useRef, useState } from 'react'; + +interface RecommendationScreenProps { + agents: AgentItem[]; + evaluators: EvaluatorItem[]; + configBundles: ConfigBundleItem[]; + onComplete: (config: RecommendationWizardConfig) => void; + onExit: () => void; +} + +export function RecommendationScreen({ + agents, + evaluators, + configBundles, + onComplete, + onExit, +}: RecommendationScreenProps) { + const wizard = useRecommendationWizard(); + + // ── Selectable items ────────────────────────────────────────────────────── + + const typeItems: SelectableItem[] = useMemo( + () => [ + { + id: 'SYSTEM_PROMPT_RECOMMENDATION', + title: 'System Prompt', + description: "Optimize your agent's system prompt based on traces", + }, + { + id: 'TOOL_DESCRIPTION_RECOMMENDATION', + title: 'Tool Description', + description: 'Optimize tool descriptions for better tool selection', + }, + ], + [] + ); + + const agentItems: SelectableItem[] = useMemo( + () => + agents.map(a => ({ + id: a.name, + title: a.name, + description: `Runtime: ${a.runtimeId}`, + })), + [agents] + ); + + const evaluatorItems: SelectableItem[] = useMemo( + () => + evaluators.map(e => ({ + id: e.id, + title: e.title, + description: e.description, + })), + [evaluators] + ); + + const isToolDesc = wizard.config.type === 'TOOL_DESCRIPTION_RECOMMENDATION'; + + const inputSourceItems: SelectableItem[] = useMemo( + () => + isToolDesc + ? [ + { id: 'inline', title: 'Enter inline', description: 'Type tool name:description pairs directly' }, + { + id: 'config-bundle', + title: 'Config bundle', + description: 'Read tool descriptions from a deployed config bundle', + }, + ] + : [ + { id: 'inline', title: 'Enter inline', description: 'Type or paste content directly' }, + { id: 'file', title: 'Load from file', description: 'Read content from a file path' }, + { + id: 'config-bundle', + title: 'Config bundle', + description: 'Use system prompt from a deployed config bundle', + }, + ], + [isToolDesc] + ); + + const traceSourceItems: SelectableItem[] = useMemo( + () => [ + { id: 'cloudwatch', title: 'CloudWatch Logs', description: 'Discover traces from agent runtime logs' }, + { id: 'sessions', title: 'Session IDs', description: 'Provide specific session IDs manually' }, + ], + [] + ); + + // ── Session discovery ────────────────────────────────────────────────────── + + type SessionResult = { phase: 'loaded'; sessions: SessionInfo[] } | { phase: 'error'; message: string }; + + const [sessionResult, setSessionResult] = useState(); + const fetchingRef = useRef(''); + + // ── Step flags ──────────────────────────────────────────────────────────── + + const isTypeStep = wizard.step === 'type'; + const isAgentStep = wizard.step === 'agent'; + const isEvaluatorStep = wizard.step === 'evaluator'; + const isInputSourceStep = wizard.step === 'inputSource'; + const isContentStep = wizard.step === 'content'; + const isBundleStep = wizard.step === 'bundle'; + const isBundleFieldStep = wizard.step === 'bundleField'; + const isToolsStep = wizard.step === 'tools'; + const isTraceSourceStep = wizard.step === 'traceSource'; + const isDaysStep = wizard.step === 'days'; + const isSessionsStep = wizard.step === 'sessions'; + const isConfirmStep = wizard.step === 'confirm'; + + const isSystemPrompt = wizard.config.type === 'SYSTEM_PROMPT_RECOMMENDATION'; + + // ── Session discovery effect ────────────────────────────────────────────── + + const fetchKey = `${wizard.config.agent}:${wizard.config.days}`; + const sessionPhase = !isSessionsStep ? 'idle' : sessionResult?.key === fetchKey ? sessionResult.phase : 'loading'; + + useEffect(() => { + if (!isSessionsStep) return; + if (sessionResult?.key === fetchKey) return; + if (fetchingRef.current === fetchKey) return; + fetchingRef.current = fetchKey; + let cancelled = false; + + void (async () => { + try { + const context = await loadDeployedProjectConfig(); + const { region } = await detectRegion(); + const agentResult = resolveAgent(context, { runtime: wizard.config.agent }); + if (!agentResult.success) { + if (!cancelled) setSessionResult({ key: fetchKey, phase: 'error', message: agentResult.error }); + return; + } + + const sessions = await discoverSessions({ + runtimeId: agentResult.agent.runtimeId, + region, + lookbackDays: wizard.config.days, + }); + + if (cancelled) return; + + if (sessions.length === 0) { + setSessionResult({ + key: fetchKey, + phase: 'error', + message: 'No sessions found in the lookback window. Try increasing the lookback days.', + }); + } else { + setSessionResult({ key: fetchKey, phase: 'loaded', sessions }); + } + } catch (err) { + if (!cancelled) { + setSessionResult({ + key: fetchKey, + phase: 'error', + message: err instanceof Error ? err.message : 'Failed to discover sessions', + }); + } + } + })(); + + return () => { + cancelled = true; + }; + }, [isSessionsStep, fetchKey]); // eslint-disable-line react-hooks/exhaustive-deps + + const sessionItems: SelectableItem[] = useMemo(() => { + const sessions = sessionResult?.phase === 'loaded' ? sessionResult.sessions : []; + return sessions.map(s => { + const date = s.firstSeen + ? new Date(s.firstSeen).toLocaleString([], { + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + }) + : ''; + const shortId = s.sessionId.length > 36 ? s.sessionId.slice(0, 36) + '…' : s.sessionId; + return { + id: s.sessionId, + title: shortId, + description: `${s.spanCount} spans · ${date}`, + }; + }); + }, [sessionResult]); + + // ── Navigation hooks ────────────────────────────────────────────────────── + + const typeNav = useListNavigation({ + items: typeItems, + onSelect: item => wizard.setType(item.id as 'SYSTEM_PROMPT_RECOMMENDATION' | 'TOOL_DESCRIPTION_RECOMMENDATION'), + onExit, + isActive: isTypeStep, + }); + + const agentNav = useListNavigation({ + items: agentItems, + onSelect: item => wizard.setAgent(item.id), + onExit: () => wizard.goBack(), + isActive: isAgentStep, + }); + + const evaluatorNav = useListNavigation({ + items: evaluatorItems, + onSelect: item => wizard.setEvaluators([item.id]), + onExit: () => wizard.goBack(), + isActive: isEvaluatorStep, + }); + + const inputSourceNav = useListNavigation({ + items: inputSourceItems, + onSelect: item => wizard.setInputSource(item.id as 'inline' | 'file' | 'config-bundle'), + onExit: () => wizard.goBack(), + isActive: isInputSourceStep, + }); + + const bundleItems: SelectableItem[] = useMemo( + () => + configBundles.map(cb => ({ + id: cb.bundleArn, + title: cb.name, + description: `Version: ${cb.versionId.slice(0, 8)}`, + })), + [configBundles] + ); + + const bundleNav = useListNavigation({ + items: bundleItems, + onSelect: item => { + const cb = configBundles.find(b => b.bundleArn === item.id); + if (cb) wizard.setBundle(cb.bundleArn, cb.versionId); + }, + onExit: () => wizard.goBack(), + isActive: isBundleStep, + }); + + // Build selectable items from recursively-discovered fields in the selected config bundle + const selectedBundle = useMemo( + () => configBundles.find(cb => cb.bundleArn === wizard.config.bundleName), + [configBundles, wizard.config.bundleName] + ); + + const bundleFieldItems: SelectableItem[] = useMemo(() => { + if (!selectedBundle) return []; + return selectedBundle.fields.map(field => { + // Shorten display: strip the long component ARN key, keep the meaningful tail. + // "components.arn:aws:...:runtime/name.configuration.systemPrompt" → "configuration.systemPrompt" + const segments = field.path.split('.'); + const configIdx = segments.indexOf('configuration'); + const displayPath = configIdx >= 0 ? segments.slice(configIdx).join('.') : segments.slice(-2).join('.'); + return { + id: field.path, + title: displayPath, + description: field.value.length > 80 ? field.value.slice(0, 80) + '…' : field.value, + }; + }); + }, [selectedBundle]); + + // Single-select for: system prompt (always), or tool desc with only 1 field (just press Enter) + const useFieldSingleSelect = !isToolDesc || bundleFieldItems.length <= 1; + const bundleFieldNav = useListNavigation({ + items: bundleFieldItems, + onSelect: item => { + const field = selectedBundle?.fields.find(f => f.path === item.id); + if (!field) return; + wizard.setBundleFields([item.id], { systemPromptJsonPath: field.jsonPath }); + }, + onExit: () => wizard.goBack(), + isActive: isBundleFieldStep && useFieldSingleSelect, + }); + + // Tool description multi-select: only when there are 2+ fields to choose from + const bundleFieldMultiNav = useMultiSelectNavigation({ + items: bundleFieldItems, + getId: item => item.id, + onConfirm: ids => { + const toolDescJsonPaths = ids + .map(id => { + const field = selectedBundle?.fields.find(f => f.path === id); + if (!field) return undefined; + // Use the last segment of the path as the tool name + const toolName = field.path.split('.').pop()!; + return { toolName, toolDescriptionJsonPath: field.jsonPath }; + }) + .filter((p): p is { toolName: string; toolDescriptionJsonPath: string } => p !== undefined); + wizard.setBundleFields(ids, { toolDescJsonPaths }); + }, + onExit: () => wizard.goBack(), + isActive: isBundleFieldStep && !useFieldSingleSelect, + requireSelection: true, + }); + + const traceSourceNav = useListNavigation({ + items: traceSourceItems, + onSelect: item => wizard.setTraceSource(item.id as 'cloudwatch' | 'sessions'), + onExit: () => wizard.goBack(), + isActive: isTraceSourceStep, + }); + + // Handle Esc during session loading/error (when multi-select is not yet active) + useListNavigation({ + items: [{ id: 'back', title: 'Back' }], + onSelect: () => wizard.goBack(), + onExit: () => wizard.goBack(), + isActive: isSessionsStep && sessionPhase !== 'loaded', + }); + + const sessionsNav = useMultiSelectNavigation({ + items: sessionItems, + getId: item => item.id, + onConfirm: ids => wizard.setSessions(ids), + onExit: () => wizard.goBack(), + isActive: isSessionsStep && sessionPhase === 'loaded', + requireSelection: true, + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete(wizard.config), + onExit: () => wizard.goBack(), + isActive: isConfirmStep, + }); + + // ── Help text ───────────────────────────────────────────────────────────── + + const helpText = isEvaluatorStep + ? HELP_TEXT.NAVIGATE_SELECT + : isSessionsStep + ? sessionPhase === 'loading' + ? '' + : sessionPhase === 'error' + ? HELP_TEXT.CONFIRM_CANCEL + : 'Space toggle · Enter confirm · Esc back' + : isBundleFieldStep && !useFieldSingleSelect + ? 'Space to select · Enter confirm · Esc back' + : isTypeStep || isAgentStep || isInputSourceStep || isTraceSourceStep || isBundleStep || isBundleFieldStep + ? HELP_TEXT.NAVIGATE_SELECT + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : HELP_TEXT.TEXT_INPUT; + + const headerContent = ( + + ); + + // ── Confirm fields ──────────────────────────────────────────────────────── + + const confirmFields = [ + { label: 'Type', value: isSystemPrompt ? 'System Prompt' : 'Tool Description' }, + { label: 'Agent', value: wizard.config.agent }, + ...(isSystemPrompt + ? [ + { + label: 'Evaluator', + value: + wizard.config.evaluators.map(e => (e.includes('/') ? e.split('/').pop()! : e)).join(', ') || '(none)', + }, + ] + : []), + { + label: 'Input', + value: + wizard.config.inputSource === 'file' + ? `File: ${wizard.config.content}` + : wizard.config.inputSource === 'config-bundle' + ? `Bundle: ${configBundles.find(b => b.bundleArn === wizard.config.bundleName)?.name ?? wizard.config.bundleName} (${wizard.config.bundleFields.length === 1 ? `field: ${wizard.config.bundleFields[0]}` : `fields: ${wizard.config.bundleFields.join(', ')}`})` + : 'Inline', + }, + { + label: 'Traces', + value: + wizard.config.traceSource === 'sessions' + ? `${wizard.config.sessionIds.length} session${wizard.config.sessionIds.length !== 1 ? 's' : ''} selected (auto-fetch)` + : `CloudWatch (${wizard.config.days}d)`, + }, + ]; + + if (!isSystemPrompt && wizard.config.inputSource !== 'config-bundle') { + confirmFields.push({ label: 'Tools', value: wizard.config.tools || '(none)' }); + } + + // ── Render ──────────────────────────────────────────────────────────────── + + return ( + + + {isTypeStep && ( + + )} + + {isAgentStep && ( + + )} + + {isEvaluatorStep && ( + + )} + + {isInputSourceStep && ( + + )} + + {isContentStep && wizard.config.inputSource === 'inline' && ( + wizard.goBack()} + expandable + /> + )} + + {isContentStep && wizard.config.inputSource === 'file' && ( + wizard.goBack()} + placeholder="/path/to/prompt.txt" + pathType="file" + /> + )} + + {isBundleStep && configBundles.length === 0 && ( + + Select config bundle + + No deployed config bundles found. Run `agentcore add config-bundle` and `agentcore deploy` first. + + Press Esc to go back and choose a different input source. + + )} + + {isBundleStep && configBundles.length > 0 && ( + + )} + + {isBundleFieldStep && bundleFieldItems.length === 0 && ( + + Select field + No text fields found in this config bundle's configuration. + Press Esc to go back and choose a different bundle. + + )} + + {isBundleFieldStep && bundleFieldItems.length > 0 && useFieldSingleSelect && ( + + )} + + {isBundleFieldStep && bundleFieldItems.length > 0 && !useFieldSingleSelect && ( + + )} + + {isToolsStep && ( + + Enter tool names and descriptions as comma-separated toolName:description pairs. + wizard.goBack()} + expandable + /> + + )} + + {isTraceSourceStep && ( + + )} + + {isDaysStep && ( + + Note: Traces may take 5-10 min to appear after agent invocations. + { + const days = parseInt(value, 10); + if (!isNaN(days) && days >= 1 && days <= 90) { + wizard.setDays(days); + } + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const days = parseInt(value, 10); + if (isNaN(days)) return 'Must be a number'; + if (days < 1 || days > 90) return 'Must be between 1 and 90'; + return true; + }} + /> + + )} + + {isSessionsStep && sessionPhase === 'loading' && } + + {isSessionsStep && sessionResult?.phase === 'error' && {sessionResult.message}} + + {isSessionsStep && sessionPhase === 'loaded' && ( + + )} + + {isConfirmStep && } + + + ); +} diff --git a/src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx b/src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx new file mode 100644 index 000000000..2e53e0ebd --- /dev/null +++ b/src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx @@ -0,0 +1,43 @@ +import { Screen, WizardSelect } from '../../components'; +import type { SelectableItem } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import React, { useMemo } from 'react'; + +export type RecommendationsHubView = 'run-recommendation' | 'recommendation-history'; + +interface RecommendationsHubScreenProps { + onSelect: (view: RecommendationsHubView) => void; + onExit: () => void; +} + +export function RecommendationsHubScreen({ onSelect, onExit }: RecommendationsHubScreenProps) { + const items: SelectableItem[] = useMemo( + () => [ + { + id: 'run-recommendation', + title: 'Run Recommendation', + description: 'Optimize system prompts and tool descriptions using agent traces', + }, + { + id: 'recommendation-history', + title: 'Recommendation History', + description: 'View past recommendation results (local)', + }, + ], + [] + ); + + const nav = useListNavigation({ + items, + onSelect: item => onSelect(item.id as RecommendationsHubView), + onExit, + isActive: true, + }); + + return ( + + + + ); +} diff --git a/src/cli/tui/screens/recommendation/index.ts b/src/cli/tui/screens/recommendation/index.ts new file mode 100644 index 000000000..3c2e16fe7 --- /dev/null +++ b/src/cli/tui/screens/recommendation/index.ts @@ -0,0 +1,3 @@ +export { RecommendationFlow } from './RecommendationFlow'; +export { RecommendationHistoryScreen } from './RecommendationHistoryScreen'; +export { RecommendationsHubScreen } from './RecommendationsHubScreen'; diff --git a/src/cli/tui/screens/recommendation/types.ts b/src/cli/tui/screens/recommendation/types.ts new file mode 100644 index 000000000..587ea4a20 --- /dev/null +++ b/src/cli/tui/screens/recommendation/types.ts @@ -0,0 +1,86 @@ +import type { + RecommendationInputSourceKind, + RecommendationType, + TraceSourceKind, +} from '../../../operations/recommendation'; + +export type RecommendationStep = + | 'type' + | 'agent' + | 'evaluator' + | 'inputSource' + | 'content' + | 'bundle' + | 'bundleField' + | 'tools' + | 'traceSource' + | 'days' + | 'sessions' + | 'confirm'; + +export interface RecommendationWizardConfig { + type: RecommendationType; + agent: string; + evaluators: string[]; + inputSource: RecommendationInputSourceKind; + content: string; + tools: string; + traceSource: TraceSourceKind; + days: number; + sessionIds: string[]; + bundleName: string; + bundleVersion: string; + bundleFields: string[]; + /** JSONPath for system prompt within the config bundle (set when user picks a field) */ + systemPromptJsonPath: string; + /** Tool name → JSONPath pairs for tool descriptions within the config bundle */ + toolDescJsonPaths: { toolName: string; toolDescriptionJsonPath: string }[]; +} + +export const RECOMMENDATION_STEP_LABELS: Record = { + type: 'Type', + agent: 'Agent', + evaluator: 'Evaluator', + inputSource: 'Source', + content: 'Content', + bundle: 'Bundle', + bundleField: 'Fields', + tools: 'Tools', + traceSource: 'Traces', + days: 'Lookback', + sessions: 'Sessions', + confirm: 'Confirm', +}; + +export const DEFAULT_LOOKBACK_DAYS = 7; + +export interface AgentItem { + name: string; + runtimeId: string; + runtimeArn: string; +} + +export interface EvaluatorItem { + id: string; + title: string; + description: string; +} + +/** A string field found at an arbitrary depth inside a config bundle's JSON. */ +export interface ConfigBundleField { + /** Dot-notation path from the bundle root, e.g. "components.myAgent.configuration.systemPrompt" */ + path: string; + /** JSONPath expression for the API, e.g. "$.components.myAgent.configuration.systemPrompt" */ + jsonPath: string; + /** The string value at this path */ + value: string; +} + +export interface ConfigBundleItem { + name: string; + bundleId: string; + bundleArn: string; + versionId: string; + /** All string-valued fields found recursively across the bundle's components. */ + fields: ConfigBundleField[]; +} diff --git a/src/cli/tui/screens/recommendation/useRecommendationWizard.ts b/src/cli/tui/screens/recommendation/useRecommendationWizard.ts new file mode 100644 index 000000000..94c3c66d1 --- /dev/null +++ b/src/cli/tui/screens/recommendation/useRecommendationWizard.ts @@ -0,0 +1,232 @@ +import type { + RecommendationInputSourceKind, + RecommendationType, + TraceSourceKind, +} from '../../../operations/recommendation'; +import type { RecommendationStep, RecommendationWizardConfig } from './types'; +import { DEFAULT_LOOKBACK_DAYS } from './types'; +import { useCallback, useState } from 'react'; + +function getAllSteps( + type: RecommendationType, + inputSource: RecommendationInputSourceKind, + traceSource: TraceSourceKind +): RecommendationStep[] { + const steps: RecommendationStep[] = ['type', 'agent']; + + // Evaluator step only for system prompt recommendations (tool desc API does not accept evaluators) + if (type === 'SYSTEM_PROMPT_RECOMMENDATION') { + steps.push('evaluator'); + } + + // Input source selection (both types support inline and config-bundle) + steps.push('inputSource'); + + if (type === 'SYSTEM_PROMPT_RECOMMENDATION') { + if (inputSource === 'inline' || inputSource === 'file') { + steps.push('content'); + } else if (inputSource === 'config-bundle') { + steps.push('bundle'); + steps.push('bundleField'); + } + } else { + // TOOL_DESCRIPTION_RECOMMENDATION + if (inputSource === 'config-bundle') { + steps.push('bundle'); + steps.push('bundleField'); + } else { + steps.push('tools'); + } + } + + steps.push('traceSource'); + + if (traceSource === 'sessions') { + // When using session IDs: ask lookback days first (for discovery), then select sessions + steps.push('days'); + steps.push('sessions'); + } else { + // CloudWatch: just ask lookback days + steps.push('days'); + } + + steps.push('confirm'); + return steps; +} + +function getDefaultConfig(): RecommendationWizardConfig { + return { + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: '', + evaluators: [], + inputSource: 'inline', + content: '', + tools: '', + traceSource: 'cloudwatch', + days: DEFAULT_LOOKBACK_DAYS, + sessionIds: [], + bundleName: '', + bundleVersion: '', + bundleFields: [], + systemPromptJsonPath: '', + toolDescJsonPaths: [], + }; +} + +export function useRecommendationWizard() { + const [config, setConfig] = useState(getDefaultConfig); + const [step, setStep] = useState('type'); + + const allSteps = getAllSteps(config.type, config.inputSource, config.traceSource); + const currentIndex = allSteps.indexOf(step); + + const advance = useCallback( + ( + fromStep: RecommendationStep, + overrides?: { + type?: RecommendationType; + inputSource?: RecommendationInputSourceKind; + traceSource?: TraceSourceKind; + } + ) => { + const steps = getAllSteps( + overrides?.type ?? config.type, + overrides?.inputSource ?? config.inputSource, + overrides?.traceSource ?? config.traceSource + ); + const idx = steps.indexOf(fromStep); + const next = steps[idx + 1]; + if (next) setStep(next); + }, + [config.type, config.inputSource, config.traceSource] + ); + + const goBack = useCallback(() => { + const prevStep = allSteps[currentIndex - 1]; + if (prevStep) setStep(prevStep); + }, [allSteps, currentIndex]); + + const setType = useCallback( + (type: RecommendationType) => { + setConfig(c => ({ ...c, type })); + advance('type', { type }); + }, + [advance] + ); + + const setAgent = useCallback( + (agent: string) => { + setConfig(c => ({ ...c, agent })); + advance('agent'); + }, + [advance] + ); + + const setEvaluators = useCallback( + (evaluators: string[]) => { + setConfig(c => ({ ...c, evaluators })); + advance('evaluator'); + }, + [advance] + ); + + const setInputSource = useCallback( + (inputSource: RecommendationInputSourceKind) => { + setConfig(c => ({ ...c, inputSource })); + advance('inputSource', { inputSource }); + }, + [advance] + ); + + const setContent = useCallback( + (content: string) => { + setConfig(c => ({ ...c, content })); + advance('content'); + }, + [advance] + ); + + const setTools = useCallback( + (tools: string) => { + setConfig(c => ({ ...c, tools })); + advance('tools'); + }, + [advance] + ); + + const setTraceSource = useCallback( + (traceSource: TraceSourceKind) => { + setConfig(c => ({ ...c, traceSource })); + advance('traceSource', { traceSource }); + }, + [advance] + ); + + const setDays = useCallback( + (days: number) => { + setConfig(c => ({ ...c, days })); + advance('days'); + }, + [advance] + ); + + const setBundle = useCallback( + (bundleName: string, bundleVersion: string) => { + setConfig(c => ({ ...c, bundleName, bundleVersion })); + advance('bundle'); + }, + [advance] + ); + + const setBundleFields = useCallback( + ( + bundleFields: string[], + jsonPathInfo?: { + systemPromptJsonPath?: string; + toolDescJsonPaths?: { toolName: string; toolDescriptionJsonPath: string }[]; + } + ) => { + setConfig(c => ({ + ...c, + bundleFields, + ...(jsonPathInfo?.systemPromptJsonPath && { systemPromptJsonPath: jsonPathInfo.systemPromptJsonPath }), + ...(jsonPathInfo?.toolDescJsonPaths && { toolDescJsonPaths: jsonPathInfo.toolDescJsonPaths }), + })); + advance('bundleField'); + }, + [advance] + ); + + const setSessions = useCallback( + (sessionIds: string[]) => { + setConfig(c => ({ ...c, sessionIds })); + advance('sessions'); + }, + [advance] + ); + + const reset = useCallback(() => { + setConfig(getDefaultConfig()); + setStep('type'); + }, []); + + return { + config, + step, + steps: allSteps, + currentIndex, + goBack, + setType, + setAgent, + setEvaluators, + setInputSource, + setContent, + setBundle, + setBundleFields, + setTools, + setTraceSource, + setDays, + setSessions, + reset, + }; +} diff --git a/src/cli/tui/screens/remove/RemoveConfigBundleScreen.tsx b/src/cli/tui/screens/remove/RemoveConfigBundleScreen.tsx new file mode 100644 index 000000000..90299eb30 --- /dev/null +++ b/src/cli/tui/screens/remove/RemoveConfigBundleScreen.tsx @@ -0,0 +1,26 @@ +import type { RemovableConfigBundle } from '../../../primitives/ConfigBundlePrimitive'; +import { SelectScreen } from '../../components'; +import React from 'react'; + +interface RemoveConfigBundleScreenProps { + configBundles: RemovableConfigBundle[]; + onSelect: (bundleName: string) => void; + onExit: () => void; +} + +export function RemoveConfigBundleScreen({ configBundles, onSelect, onExit }: RemoveConfigBundleScreenProps) { + const items = configBundles.map(bundle => ({ + id: bundle.name, + title: bundle.name, + description: 'Configuration Bundle', + })); + + return ( + onSelect(item.id)} + onExit={onExit} + /> + ); +} diff --git a/src/cli/tui/screens/remove/RemoveFlow.tsx b/src/cli/tui/screens/remove/RemoveFlow.tsx index 6c8eb46b0..6ad51fb47 100644 --- a/src/cli/tui/screens/remove/RemoveFlow.tsx +++ b/src/cli/tui/screens/remove/RemoveFlow.tsx @@ -1,7 +1,9 @@ import type { RemovableGatewayTarget, RemovalPreview } from '../../../operations/remove'; import { ErrorPrompt, Panel, Screen } from '../../components'; import { + useRemovableABTests, useRemovableAgents, + useRemovableConfigBundles, useRemovableEvaluators, useRemovableGatewayTargets, useRemovableGateways, @@ -13,7 +15,9 @@ import { useRemovablePolicyEngines, useRemovableRuntimeEndpoints, useRemovalPreview, + useRemoveABTest, useRemoveAgent, + useRemoveConfigBundle, useRemoveEvaluator, useRemoveGateway, useRemoveGatewayTarget, @@ -25,8 +29,10 @@ import { useRemovePolicyEngine, useRemoveRuntimeEndpoint, } from '../../hooks/useRemove'; +import { RemoveABTestScreen } from '../ab-test/RemoveABTestScreen'; import { RemoveAgentScreen } from './RemoveAgentScreen'; import { RemoveAllScreen } from './RemoveAllScreen'; +import { RemoveConfigBundleScreen } from './RemoveConfigBundleScreen'; import { RemoveConfirmScreen } from './RemoveConfirmScreen'; import { RemoveEvaluatorScreen } from './RemoveEvaluatorScreen'; import { RemoveGatewayScreen } from './RemoveGatewayScreen'; @@ -57,6 +63,8 @@ type FlowState = | { name: 'select-policy' } | { name: 'select-harness' } | { name: 'confirm-harness'; harnessName: string; preview: RemovalPreview } + | { name: 'select-config-bundle' } + | { name: 'select-ab-test' } | { name: 'select-runtime-endpoint' } | { name: 'confirm-agent'; agentName: string; preview: RemovalPreview } | { name: 'confirm-gateway'; gatewayName: string; preview: RemovalPreview } @@ -67,6 +75,8 @@ type FlowState = | { name: 'confirm-online-eval'; configName: string; preview: RemovalPreview } | { name: 'confirm-policy-engine'; engineName: string; preview: RemovalPreview } | { name: 'confirm-policy'; compositeKey: string; policyName: string; preview: RemovalPreview } + | { name: 'confirm-config-bundle'; bundleName: string; preview: RemovalPreview } + | { name: 'confirm-ab-test'; testName: string; preview: RemovalPreview } | { name: 'confirm-runtime-endpoint'; endpointName: string; preview: RemovalPreview } | { name: 'loading'; message: string } | { name: 'harness-success'; harnessName: string; logFilePath?: string } @@ -79,6 +89,8 @@ type FlowState = | { name: 'online-eval-success'; configName: string; logFilePath?: string } | { name: 'policy-engine-success'; engineName: string; logFilePath?: string } | { name: 'policy-success'; policyName: string; logFilePath?: string } + | { name: 'config-bundle-success'; bundleName: string; logFilePath?: string } + | { name: 'ab-test-success'; testName: string; logFilePath?: string } | { name: 'runtime-endpoint-success'; endpointName: string; logFilePath?: string } | { name: 'remove-all' } | { name: 'error'; message: string }; @@ -103,7 +115,9 @@ interface RemoveFlowProps { | 'evaluator' | 'online-eval' | 'policy-engine' - | 'policy'; + | 'policy' + | 'config-bundle' + | 'ab-test'; /** Initial resource name to auto-select (for CLI --name flag) */ initialResourceName?: string; } @@ -139,6 +153,10 @@ export function RemoveFlow({ return { name: 'select-harness' }; case 'policy': return { name: 'select-policy' }; + case 'config-bundle': + return { name: 'select-config-bundle' }; + case 'ab-test': + return { name: 'select-ab-test' }; case 'runtime-endpoint': return { name: 'select-runtime-endpoint' }; default: @@ -166,6 +184,12 @@ export function RemoveFlow({ refresh: refreshPolicyEngines, } = useRemovablePolicyEngines(); const { policies, isLoading: isLoadingPolicies, refresh: refreshPolicies } = useRemovablePolicies(); + const { + configBundles, + isLoading: isLoadingConfigBundles, + refresh: refreshConfigBundles, + } = useRemovableConfigBundles(); + const { abTests } = useRemovableABTests(); const { endpoints: runtimeEndpoints, isLoading: isLoadingRuntimeEndpoints, @@ -184,6 +208,7 @@ export function RemoveFlow({ isLoadingOnlineEvals || isLoadingPolicyEngines || isLoadingPolicies || + isLoadingConfigBundles || isLoadingRuntimeEndpoints; // Preview hook @@ -198,6 +223,8 @@ export function RemoveFlow({ loadOnlineEvalPreview, loadPolicyEnginePreview, loadPolicyPreview, + loadConfigBundlePreview, + loadABTestPreview, loadRuntimeEndpointPreview, reset: resetPreview, } = useRemovalPreview(); @@ -213,6 +240,8 @@ export function RemoveFlow({ const { remove: removeOnlineEvalOp, reset: resetRemoveOnlineEval } = useRemoveOnlineEvalConfig(); const { remove: removePolicyEngineOp, reset: resetRemovePolicyEngine } = useRemovePolicyEngine(); const { remove: removePolicyOp, reset: resetRemovePolicy } = useRemovePolicy(); + const { remove: removeConfigBundleOp, reset: resetRemoveConfigBundle } = useRemoveConfigBundle(); + const { remove: removeABTestOp, reset: resetRemoveABTest } = useRemoveABTest(); const { remove: removeRuntimeEndpointOp, reset: resetRemoveRuntimeEndpoint } = useRemoveRuntimeEndpoint(); // Track pending result state @@ -245,6 +274,8 @@ export function RemoveFlow({ 'online-eval-success', 'policy-engine-success', 'policy-success', + 'config-bundle-success', + 'ab-test-success', 'runtime-endpoint-success', ]; if (successStates.includes(flow.name)) { @@ -288,6 +319,12 @@ export function RemoveFlow({ case 'policy': setFlow({ name: 'select-policy' }); break; + case 'config-bundle': + setFlow({ name: 'select-config-bundle' }); + break; + case 'ab-test': + setFlow({ name: 'select-ab-test' }); + break; case 'runtime-endpoint': setFlow({ name: 'select-runtime-endpoint' }); break; @@ -523,6 +560,50 @@ export function RemoveFlow({ [loadPolicyPreview, force, removePolicyOp] ); + const handleSelectConfigBundle = useCallback( + async (bundleName: string) => { + const result = await loadConfigBundlePreview(bundleName); + if (result.ok) { + if (force) { + setFlow({ name: 'loading', message: `Removing configuration bundle ${bundleName}...` }); + const removeResult = await removeConfigBundleOp(bundleName, result.preview); + if (removeResult.success) { + setFlow({ name: 'config-bundle-success', bundleName }); + } else { + setFlow({ name: 'error', message: removeResult.error }); + } + } else { + setFlow({ name: 'confirm-config-bundle', bundleName, preview: result.preview }); + } + } else { + setFlow({ name: 'error', message: result.error }); + } + }, + [loadConfigBundlePreview, force, removeConfigBundleOp] + ); + + const handleSelectABTest = useCallback( + async (testName: string) => { + const result = await loadABTestPreview(testName); + if (result.ok) { + if (force) { + setFlow({ name: 'loading', message: `Removing AB test ${testName}...` }); + const removeResult = await removeABTestOp(testName, result.preview); + if (removeResult.success) { + setFlow({ name: 'ab-test-success', testName }); + } else { + setFlow({ name: 'error', message: removeResult.error }); + } + } else { + setFlow({ name: 'confirm-ab-test', testName, preview: result.preview }); + } + } else { + setFlow({ name: 'error', message: result.error }); + } + }, + [loadABTestPreview, force, removeABTestOp] + ); + const handleSelectRuntimeEndpoint = useCallback( async (endpointName: string) => { const result = await loadRuntimeEndpointPreview(endpointName); @@ -581,6 +662,12 @@ export function RemoveFlow({ case 'policy': void handleSelectPolicy(initialResourceName); break; + case 'config-bundle': + void handleSelectConfigBundle(initialResourceName); + break; + case 'ab-test': + void handleSelectABTest(initialResourceName); + break; case 'runtime-endpoint': void handleSelectRuntimeEndpoint(initialResourceName); break; @@ -598,6 +685,8 @@ export function RemoveFlow({ handleSelectOnlineEval, handleSelectPolicyEngine, handleSelectPolicy, + handleSelectConfigBundle, + handleSelectABTest, handleSelectRuntimeEndpoint, ]); @@ -762,6 +851,38 @@ export function RemoveFlow({ [removePolicyOp] ); + const handleConfirmConfigBundle = useCallback( + async (bundleName: string, preview: RemovalPreview) => { + pendingResultRef.current = null; + setResultReady(false); + setFlow({ name: 'loading', message: `Removing configuration bundle ${bundleName}...` }); + const result = await removeConfigBundleOp(bundleName, preview); + if (result.success) { + pendingResultRef.current = { name: 'config-bundle-success', bundleName, logFilePath: result.logFilePath }; + } else { + pendingResultRef.current = { name: 'error', message: result.error }; + } + setResultReady(true); + }, + [removeConfigBundleOp] + ); + + const handleConfirmABTest = useCallback( + async (testName: string, preview: RemovalPreview) => { + pendingResultRef.current = null; + setResultReady(false); + setFlow({ name: 'loading', message: `Removing AB test ${testName}...` }); + const result = await removeABTestOp(testName, preview); + if (result.success) { + pendingResultRef.current = { name: 'ab-test-success', testName, logFilePath: result.logFilePath }; + } else { + pendingResultRef.current = { name: 'error', message: result.error }; + } + setResultReady(true); + }, + [removeABTestOp] + ); + const handleConfirmRuntimeEndpoint = useCallback( async (endpointName: string, preview: RemovalPreview) => { pendingResultRef.current = null; @@ -790,6 +911,8 @@ export function RemoveFlow({ resetRemoveOnlineEval(); resetRemovePolicyEngine(); resetRemovePolicy(); + resetRemoveConfigBundle(); + resetRemoveABTest(); resetRemoveRuntimeEndpoint(); }, [ resetPreview, @@ -803,6 +926,8 @@ export function RemoveFlow({ resetRemoveOnlineEval, resetRemovePolicyEngine, resetRemovePolicy, + resetRemoveConfigBundle, + resetRemoveABTest, resetRemoveRuntimeEndpoint, ]); @@ -818,6 +943,7 @@ export function RemoveFlow({ refreshOnlineEvals(), refreshPolicyEngines(), refreshPolicies(), + refreshConfigBundles(), refreshRuntimeEndpoints(), ]); }, [ @@ -831,6 +957,7 @@ export function RemoveFlow({ refreshOnlineEvals, refreshPolicyEngines, refreshPolicies, + refreshConfigBundles, refreshRuntimeEndpoints, ]); @@ -853,6 +980,8 @@ export function RemoveFlow({ onlineEvalCount={onlineEvalConfigs.length} policyEngineCount={policyEngines.length} policyCount={policies.length} + configBundleCount={configBundles.length} + abTestCount={abTests.length} runtimeEndpointCount={runtimeEndpoints.length} /> ); @@ -1001,6 +1130,32 @@ export function RemoveFlow({ ); } + if (flow.name === 'select-config-bundle') { + if (initialResourceName && isLoading) { + return null; + } + return ( + void handleSelectConfigBundle(name)} + onExit={() => setFlow({ name: 'select' })} + /> + ); + } + + if (flow.name === 'select-ab-test') { + if (initialResourceName && isLoading) { + return null; + } + return ( + void handleSelectABTest(name)} + onExit={() => setFlow({ name: 'select' })} + /> + ); + } + if (flow.name === 'select-runtime-endpoint') { if (initialResourceName && isLoading) { return null; @@ -1125,6 +1280,28 @@ export function RemoveFlow({ ); } + if (flow.name === 'confirm-config-bundle') { + return ( + void handleConfirmConfigBundle(flow.bundleName, flow.preview)} + onCancel={() => setFlow({ name: 'select-config-bundle' })} + /> + ); + } + + if (flow.name === 'confirm-ab-test') { + return ( + void handleConfirmABTest(flow.testName, flow.preview)} + onCancel={() => setFlow({ name: 'select-ab-test' })} + /> + ); + } + if (flow.name === 'confirm-runtime-endpoint') { return ( { + resetAll(); + void refreshAll().then(() => setFlow({ name: 'select' })); + }} + onExit={onExit} + /> + ); + } + + if (flow.name === 'ab-test-success') { + return ( + { + resetAll(); + void refreshAll().then(() => setFlow({ name: 'select' })); + }} + onExit={onExit} + /> + ); + } + if (flow.name === 'runtime-endpoint-success') { return ( { resetAll(); setFlow({ name: 'select' }); diff --git a/src/cli/tui/screens/remove/RemoveScreen.tsx b/src/cli/tui/screens/remove/RemoveScreen.tsx index 2775023aa..d14786985 100644 --- a/src/cli/tui/screens/remove/RemoveScreen.tsx +++ b/src/cli/tui/screens/remove/RemoveScreen.tsx @@ -13,6 +13,8 @@ const REMOVE_RESOURCES = [ { id: 'policy', title: 'Policy', description: 'Remove a policy from a policy engine' }, { id: 'gateway', title: 'Gateway', description: 'Remove a gateway' }, { id: 'gateway-target', title: 'Gateway Target', description: 'Remove a gateway target' }, + { id: 'config-bundle', title: 'Configuration Bundle [preview]', description: 'Remove a configuration bundle' }, + { id: 'ab-test', title: 'AB Test [preview]', description: 'Remove an A/B test' }, { id: 'runtime-endpoint', title: 'Runtime Endpoint', description: 'Remove a runtime endpoint' }, { id: 'all', title: 'All', description: 'Reset entire agentcore project' }, ] as const; @@ -42,6 +44,10 @@ interface RemoveScreenProps { policyEngineCount: number; /** Number of policies available for removal */ policyCount: number; + /** Number of configuration bundles available for removal */ + configBundleCount: number; + /** Number of AB tests available for removal */ + abTestCount: number; /** Number of runtime endpoints available for removal */ runtimeEndpointCount: number; } @@ -59,6 +65,8 @@ export function RemoveScreen({ onlineEvalCount, policyEngineCount, policyCount, + configBundleCount, + abTestCount, runtimeEndpointCount, }: RemoveScreenProps) { const items: SelectableItem[] = useMemo(() => { @@ -127,6 +135,18 @@ export function RemoveScreen({ description = 'No policies to remove'; } break; + case 'config-bundle': + if (configBundleCount === 0) { + disabled = true; + description = 'No configuration bundles to remove'; + } + break; + case 'ab-test': + if (abTestCount === 0) { + disabled = true; + description = 'No AB tests to remove'; + } + break; case 'runtime-endpoint': if (runtimeEndpointCount === 0) { disabled = true; @@ -151,6 +171,8 @@ export function RemoveScreen({ onlineEvalCount, policyEngineCount, policyCount, + configBundleCount, + abTestCount, runtimeEndpointCount, ]); diff --git a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx index 5418bb3eb..9f148fa68 100644 --- a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx +++ b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx @@ -13,6 +13,8 @@ describe('RemoveScreen', () => { onSelect={onSelect} onExit={onExit} agentCount={1} + configBundleCount={0} + abTestCount={0} harnessCount={1} gatewayCount={1} mcpToolCount={1} @@ -45,6 +47,8 @@ describe('RemoveScreen', () => { onSelect={onSelect} onExit={onExit} agentCount={0} + configBundleCount={0} + abTestCount={0} harnessCount={0} gatewayCount={0} mcpToolCount={0} diff --git a/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx b/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx new file mode 100644 index 000000000..a1903f7d0 --- /dev/null +++ b/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx @@ -0,0 +1,307 @@ +import type { BatchEvalRunRecord } from '../../../operations/eval/batch-eval-storage'; +import { listBatchEvalRuns } from '../../../operations/eval/batch-eval-storage'; +import { Panel, Screen } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { Box, Text, useInput, useStdout } from 'ink'; +import React, { useMemo, useState } from 'react'; + +const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; + +function formatShortDate(timestamp: string): string { + const d = new Date(timestamp); + const mon = MONTHS[d.getMonth()]; + const day = d.getDate(); + const h = d.getHours(); + const m = d.getMinutes().toString().padStart(2, '0'); + const ampm = h >= 12 ? 'PM' : 'AM'; + const h12 = h % 12 || 12; + return `${mon} ${day} ${h12}:${m} ${ampm}`; +} + +function statusColor(status: string): string { + if (status === 'COMPLETED' || status === 'SUCCEEDED') return 'green'; + if (status === 'FAILED') return 'red'; + if (status === 'IN_PROGRESS' || status === 'PENDING') return 'yellow'; + return 'gray'; +} + +function scoreColor(score: number): string { + if (score >= 0.8) return 'green'; + if (score >= 0.5) return 'yellow'; + return 'red'; +} + +const CHROME_LINES = 9; + +// ───────────────────────────────────────────────────────────────────────────── +// List view +// ───────────────────────────────────────────────────────────────────────────── + +function BatchEvalListView({ + records, + onSelect, + onExit, + availableHeight, +}: { + records: BatchEvalRunRecord[]; + onSelect: (record: BatchEvalRunRecord) => void; + onExit: () => void; + availableHeight: number; +}) { + const nav = useListNavigation({ + items: records, + onSelect: item => onSelect(item), + onExit, + isActive: true, + }); + + const maxVisible = Math.max(1, availableHeight - 3); + const visible = useMemo(() => { + let start = 0; + if (nav.selectedIndex >= maxVisible) { + start = nav.selectedIndex - maxVisible + 1; + } + return { items: records.slice(start, start + maxVisible), startIdx: start }; + }, [records, nav.selectedIndex, maxVisible]); + + return ( + + + Batch Evaluation History + + {records.length} batch evaluation{records.length !== 1 ? 's' : ''} + + + {visible.items.map((rec, vIdx) => { + const idx = visible.startIdx + vIdx; + const selected = idx === nav.selectedIndex; + const date = rec.startedAt ? formatShortDate(rec.startedAt) : 'unknown'; + + // Build a short score summary from evaluationResults or results + const summaries = rec.evaluationResults?.evaluatorSummaries; + let scoreText = ''; + if (summaries && summaries.length > 0) { + scoreText = summaries + .map(s => { + const avg = s.statistics?.averageScore; + return avg != null ? avg.toFixed(2) : 'N/A'; + }) + .join(', '); + } else if (rec.results.length > 0) { + const byEval = new Map(); + for (const r of rec.results) { + if (r.score != null) { + const scores = byEval.get(r.evaluatorId) ?? []; + scores.push(r.score); + byEval.set(r.evaluatorId, scores); + } + } + scoreText = [...byEval.entries()] + .map(([, scores]) => (scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(2)) + .join(', '); + } + + return ( + + {selected ? '>' : ' '} + {date.padEnd(16)} + {rec.status.padEnd(12)} + {scoreText && {scoreText.padEnd(10)}} + {rec.name} + + ); + })} + {visible.startIdx + maxVisible < records.length && ( + {records.length - visible.startIdx - maxVisible} more + )} + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Detail view +// ───────────────────────────────────────────────────────────────────────────── + +function BatchEvalDetailView({ record, onBack }: { record: BatchEvalRunRecord; onBack: () => void }) { + useInput((input, key) => { + if (key.escape || input === 'b') { + onBack(); + } + }); + + const evalRes = record.evaluationResults; + const summaries = evalRes?.evaluatorSummaries; + + // Fall back to local grouping when API summaries aren't available + const byEvaluator = useMemo(() => { + if (summaries && summaries.length > 0) return null; + const map = new Map(); + for (const r of record.results) { + const entry = map.get(r.evaluatorId) ?? { scores: [], errors: 0 }; + if (r.error) { + entry.errors++; + } else if (r.score != null) { + entry.scores.push(r.score); + } + map.set(r.evaluatorId, entry); + } + return map; + }, [record.results, summaries]); + + return ( + + + + ID: {record.batchEvaluationId} + + + Name: {record.name} + {' '} + Status: {record.status} + + + Evaluators: {record.evaluators.join(', ')} + + {record.startedAt && ( + + Started: {new Date(record.startedAt).toLocaleString()} + + )} + {record.completedAt && ( + + Completed: {new Date(record.completedAt).toLocaleString()} + + )} + + {evalRes?.totalNumberOfSessions != null && ( + + Sessions: {evalRes.totalNumberOfSessions} total + {evalRes.numberOfSessionsCompleted != null && , {evalRes.numberOfSessionsCompleted} completed} + {evalRes.numberOfSessionsFailed ? , {evalRes.numberOfSessionsFailed} failed : null} + + )} + + {summaries && summaries.length > 0 ? ( + + Scores (0 worst — 1 best): + {summaries.map(s => { + const avg = s.statistics?.averageScore; + const avgStr = avg != null ? avg.toFixed(2) : 'N/A'; + const color = avg != null ? scoreColor(avg) : undefined; + return ( + + {' '} + {s.evaluatorId} + {' '} + {avgStr} + {s.totalFailed ? ({s.totalFailed} failed) : null} + {s.totalEvaluated != null && [{s.totalEvaluated} evaluated]} + + ); + })} + + ) : byEvaluator && byEvaluator.size > 0 ? ( + + Scores (0 worst — 1 best): + {[...byEvaluator.entries()].map(([evalId, { scores, errors }]) => { + const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0; + return ( + + {' '} + {evalId} + {' '} + {avg.toFixed(2)} + {errors > 0 && ({errors} errors)} + + ); + })} + + ) : ( + + No evaluation results available. + + )} + + + Press Esc or B to go back + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main screen +// ───────────────────────────────────────────────────────────────────────────── + +interface BatchEvalHistoryScreenProps { + onExit: () => void; +} + +export function BatchEvalHistoryScreen({ onExit }: BatchEvalHistoryScreenProps) { + const { stdout } = useStdout(); + const terminalHeight = stdout?.rows ?? 24; + const availableHeight = Math.max(6, terminalHeight - CHROME_LINES); + + const [selectedRecord, setSelectedRecord] = useState(null); + + const [records, loaded, error] = useMemo(() => { + try { + return [listBatchEvalRuns(), true, null] as const; + } catch (err) { + return [[] as BatchEvalRunRecord[], true, err instanceof Error ? err.message : String(err)] as const; + } + }, []); + + if (!loaded) { + return ( + + Loading... + + ); + } + + if (error) { + return ( + + {error} + + ); + } + + if (records.length === 0) { + return ( + + + No batch evaluation runs found. + Run a batch evaluation from the TUI or CLI to see results here. + + + ); + } + + const helpText = selectedRecord ? 'Esc/B back to list' : HELP_TEXT.NAVIGATE_SELECT; + + return ( + + {selectedRecord ? ( + setSelectedRecord(null)} /> + ) : ( + + )} + + ); +} diff --git a/src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx b/src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx new file mode 100644 index 000000000..38c03c150 --- /dev/null +++ b/src/cli/tui/screens/run-eval/RunBatchEvalFlow.tsx @@ -0,0 +1,968 @@ +import { validateAwsCredentials } from '../../../aws/account'; +import { stopBatchEvaluation } from '../../../aws/agentcore-batch-evaluation'; +import type { SessionMetadataEntry } from '../../../aws/agentcore-batch-evaluation'; +import { listEvaluators } from '../../../aws/agentcore-control'; +import { detectRegion } from '../../../aws/region'; +import { getErrorMessage } from '../../../errors'; +import type { SessionInfo } from '../../../operations/eval'; +import { discoverSessions } from '../../../operations/eval'; +import { saveBatchEvalRun } from '../../../operations/eval/batch-eval-storage'; +import { runBatchEvaluationCommand } from '../../../operations/eval/run-batch-evaluation'; +import type { + BatchEvaluationResult, + RunBatchEvaluationCommandResult, +} from '../../../operations/eval/run-batch-evaluation'; +import { loadDeployedProjectConfig, resolveAgent } from '../../../operations/resolve-agent'; +import { + ConfirmReview, + ErrorPrompt, + GradientText, + Panel, + PathInput, + Screen, + StepIndicator, + StepProgress, + TextInput, + WizardMultiSelect, + WizardSelect, +} from '../../components'; +import type { SelectableItem, Step } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; +import type { EvaluatorItem } from '../online-eval/types'; +import { GroundTruthForm } from './GroundTruthForm'; +import type { AgentItem } from './types'; +import type { GroundTruthData } from './useRunEvalWizard'; +import { Box, Text, useInput } from 'ink'; +import { readFileSync } from 'node:fs'; +import { resolve as resolvePath } from 'node:path'; +import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'; + +// ============================================================================ +// Types +// ============================================================================ + +const DEFAULT_LOOKBACK_DAYS = 7; + +type BatchEvalStep = 'agent' | 'evaluators' | 'days' | 'sessions' | 'ground-truth' | 'name' | 'confirm'; + +interface BatchEvalConfig { + agent: string; + evaluators: string[]; + evaluatorNames: string[]; + days: number; + sessionIds: string[]; + groundTruthFile: string; + sessionMetadata?: SessionMetadataEntry[]; + name: string; +} + +const STEP_LABELS: Record = { + agent: 'Agent', + evaluators: 'Evaluators', + days: 'Lookback', + sessions: 'Sessions', + 'ground-truth': 'Ground Truth', + name: 'Name', + confirm: 'Confirm', +}; + +type FlowState = + | { name: 'loading' } + | { name: 'wizard'; agents: AgentItem[]; evaluators: EvaluatorItem[] } + | { + name: 'running'; + config: BatchEvalConfig; + steps: Step[]; + elapsed: number; + batchEvaluationId?: string; + region?: string; + } + | { name: 'results'; result: RunBatchEvaluationCommandResult; savedFilePath?: string } + | { name: 'creds-error'; message: string } + | { name: 'error'; message: string; logFilePath?: string }; + +// ============================================================================ +// Flow Component +// ============================================================================ + +interface RunBatchEvalFlowProps { + onExit: () => void; +} + +export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) { + const [flow, setFlow] = useState({ name: 'loading' }); + const stoppingRef = useRef(false); + + // Handle Esc to stop a running batch evaluation + useInput((_input, key) => { + if (flow.name !== 'running' || !flow.batchEvaluationId || !flow.region || stoppingRef.current) return; + if (key.escape) { + stoppingRef.current = true; + void stopBatchEvaluation({ region: flow.region, batchEvaluationId: flow.batchEvaluationId }).catch(() => { + // Best-effort — the poll loop will pick up the final status + }); + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = prev.steps.map(s => + s.status === 'running' ? { ...s, status: 'error' as const, error: 'Stopping...' } : s + ); + return { ...prev, steps }; + }); + } + }); + + // Load agents and evaluators + useEffect(() => { + if (flow.name !== 'loading') return; + let cancelled = false; + + void (async () => { + try { + await validateAwsCredentials(); + } catch (err) { + if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) }); + return; + } + + try { + const context = await loadDeployedProjectConfig(); + const targetRegion = context.awsTargets?.[0]?.region; + const { region: detectedRegion } = await detectRegion(); + const region = targetRegion ?? detectedRegion; + const evalResult = await listEvaluators({ region }); + + if (cancelled) return; + + const evaluators: EvaluatorItem[] = evalResult.evaluators.map(e => ({ + arn: e.evaluatorArn, + name: e.evaluatorName, + type: e.evaluatorType, + description: e.description, + })); + + // Only show deployed agents + const deployedAgentNames = new Set(); + for (const target of Object.values(context.deployedState.targets)) { + const runtimeStates = target.resources?.runtimes; + if (runtimeStates) { + for (const name of Object.keys(runtimeStates)) { + deployedAgentNames.add(name); + } + } + } + + const agents: AgentItem[] = context.project.runtimes + .filter((a: { name: string }) => deployedAgentNames.has(a.name)) + .map((a: { name: string; build: string }) => ({ name: a.name, build: a.build })); + + if (agents.length === 0) { + if (!cancelled) { + setFlow({ + name: 'error', + message: + context.project.runtimes.length === 0 + ? 'No agents found in project. Run `agentcore add agent` first.' + : 'No deployed agents found. Run `agentcore deploy` first.', + }); + } + return; + } + + if (evaluators.length === 0) { + if (!cancelled) { + setFlow({ name: 'error', message: 'No evaluators found in your account. Create an evaluator first.' }); + } + return; + } + + setFlow({ name: 'wizard', agents, evaluators }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); + + const handleWizardComplete = useCallback((config: BatchEvalConfig) => { + stoppingRef.current = false; + const initialSteps: Step[] = [ + { label: 'Starting batch evaluation...', status: 'running' }, + { label: 'Polling for results', status: 'pending' }, + { label: 'Fetching scores', status: 'pending' }, + ]; + setFlow({ name: 'running', config, steps: initialSteps, elapsed: 0 }); + }, []); + + // Execute batch evaluation + useEffect(() => { + if (flow.name !== 'running') return; + let cancelled = false; + + const { config } = flow; + const startTime = Date.now(); + + const timer = setInterval(() => { + if (!cancelled) { + setFlow(prev => { + if (prev.name !== 'running') return prev; + return { ...prev, elapsed: Math.floor((Date.now() - startTime) / 1000) }; + }); + } + }, 1000); + + void (async () => { + try { + const result = await runBatchEvaluationCommand({ + agent: config.agent, + evaluators: config.evaluators, + name: config.name || undefined, + sessionIds: config.sessionIds.length > 0 ? config.sessionIds : undefined, + lookbackDays: config.days, + sessionMetadata: config.sessionMetadata, + onProgress: (status, _message) => { + if (cancelled) return; + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = [...prev.steps]; + if (status === 'running') { + steps[0] = { ...steps[0]!, status: 'success' }; + steps[1] = { ...steps[1]!, status: 'running' }; + } + return { ...prev, steps }; + }); + }, + onStarted: info => { + setFlow(prev => { + if (prev.name !== 'running') return prev; + return { ...prev, batchEvaluationId: info.batchEvaluationId, region: info.region }; + }); + }, + }); + + clearInterval(timer); + if (cancelled) return; + + // Save results locally + let savedFilePath: string | undefined; + if (result.success) { + try { + savedFilePath = saveBatchEvalRun(result); + } catch { + // Non-fatal + } + } + + if (!result.success) { + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = prev.steps.map(s => + s.status === 'running' ? { ...s, status: 'error' as const, error: result.error } : s + ); + return { ...prev, steps }; + }); + await new Promise(resolve => setTimeout(resolve, 2000)); + if (cancelled) return; + setFlow({ + name: 'error', + message: result.error ?? 'Batch evaluation failed', + logFilePath: result.logFilePath, + }); + return; + } + + // Mark all steps success + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = prev.steps.map(s => ({ ...s, status: 'success' as const })); + return { ...prev, steps }; + }); + + setFlow({ name: 'results', result, savedFilePath }); + } catch (err) { + clearInterval(timer); + if (!cancelled) { + const errorMsg = getErrorMessage(err); + setFlow(prev => { + if (prev.name !== 'running') return prev; + const steps = prev.steps.map(s => + s.status === 'running' ? { ...s, status: 'error' as const, error: errorMsg } : s + ); + return { ...prev, steps }; + }); + await new Promise(resolve => setTimeout(resolve, 2000)); + setFlow({ name: 'error', message: errorMsg }); + } + } + })(); + + return () => { + cancelled = true; + clearInterval(timer); + }; + }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps + + if (flow.name === 'loading') { + return ( + + + + ); + } + + if (flow.name === 'creds-error') { + return ; + } + + if (flow.name === 'wizard') { + return ( + + ); + } + + if (flow.name === 'running') { + const minutes = Math.floor(flow.elapsed / 60); + const seconds = flow.elapsed % 60; + const timeStr = minutes > 0 ? `${minutes}m ${seconds}s` : `${seconds}s`; + + return ( + + + + + Agent: {flow.config.agent} + {' '} + Evaluators: {flow.config.evaluatorNames.join(', ')} + {' '} + ({timeStr}) + + + This may take a few minutes... + {flow.batchEvaluationId && Press Esc to stop the evaluation} + + + + ); + } + + if (flow.name === 'results') { + return ( + setFlow({ name: 'loading' })} + onExit={onExit} + /> + ); + } + + return ( + setFlow({ name: 'loading' })} + onExit={onExit} + /> + ); +} + +// ============================================================================ +// Wizard Component +// ============================================================================ + +interface BatchEvalWizardProps { + agents: AgentItem[]; + evaluators: EvaluatorItem[]; + onComplete: (config: BatchEvalConfig) => void; + onExit: () => void; +} + +function BatchEvalWizard({ agents, evaluators: rawEvaluators, onComplete, onExit }: BatchEvalWizardProps) { + const skipAgent = agents.length <= 1; + const allSteps = useMemo( + () => + skipAgent + ? ['evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm'] + : ['agent', 'evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm'], + [skipAgent] + ); + + const [step, setStep] = useState(allSteps[0]!); + const [config, setConfig] = useState({ + agent: skipAgent ? agents[0]!.name : '', + evaluators: [], + evaluatorNames: [], + days: DEFAULT_LOOKBACK_DAYS, + sessionIds: [], + groundTruthFile: '', + sessionMetadata: undefined, + name: '', + }); + + const currentIndex = allSteps.indexOf(step); + const [groundTruthError, setGroundTruthError] = useState(null); + const [gtMode, setGtMode] = useState<'choose' | 'file' | 'inline'>('choose'); + + const goBack = useCallback(() => { + const prev = allSteps[currentIndex - 1]; + if (prev) { + if (prev === 'ground-truth') setGtMode('choose'); + setStep(prev); + } else onExit(); + }, [allSteps, currentIndex, onExit]); + + const goNext = useCallback(() => { + const next = allSteps[currentIndex + 1]; + if (next) setStep(next); + }, [allSteps, currentIndex]); + + const agentItems: SelectableItem[] = useMemo( + () => agents.map(a => ({ id: a.name, title: a.name, description: a.build })), + [agents] + ); + + const evaluatorItems: SelectableItem[] = useMemo( + () => + rawEvaluators.map(e => ({ + id: e.arn, + title: e.name, + description: e.type === 'Builtin' ? 'Built-in evaluator' : (e.description ?? 'Custom evaluator'), + })), + [rawEvaluators] + ); + + // ── Session discovery ────────────────────────────────────────────────────── + + type SessionResult = { phase: 'loaded'; sessions: SessionInfo[] } | { phase: 'error'; message: string }; + + const [sessionResult, setSessionResult] = useState(); + const fetchingRef = useRef(''); + + const isAgentStep = step === 'agent'; + const isEvaluatorsStep = step === 'evaluators'; + const isDaysStep = step === 'days'; + const isSessionsStep = step === 'sessions'; + const isGroundTruthStep = step === 'ground-truth'; + const isNameStep = step === 'name'; + const isConfirmStep = step === 'confirm'; + + const fetchKey = `${config.agent}:${config.days}`; + const sessionPhase = !isSessionsStep ? 'idle' : sessionResult?.key === fetchKey ? sessionResult.phase : 'loading'; + + useEffect(() => { + if (!isSessionsStep) return; + if (sessionResult?.key === fetchKey) return; + if (fetchingRef.current === fetchKey) return; + fetchingRef.current = fetchKey; + let cancelled = false; + + void (async () => { + try { + const context = await loadDeployedProjectConfig(); + const targetRegion = context.awsTargets?.[0]?.region; + const { region: detectedRegion } = await detectRegion(); + const region = targetRegion ?? detectedRegion; + const agentResult = resolveAgent(context, { runtime: config.agent }); + if (!agentResult.success) { + if (!cancelled) setSessionResult({ key: fetchKey, phase: 'error', message: agentResult.error }); + return; + } + + const sessions = await discoverSessions({ + runtimeId: agentResult.agent.runtimeId, + region, + lookbackDays: config.days, + }); + + if (cancelled) return; + + if (sessions.length === 0) { + setSessionResult({ + key: fetchKey, + phase: 'error', + message: 'No sessions found in the lookback window. Try increasing the lookback days.', + }); + } else { + setSessionResult({ key: fetchKey, phase: 'loaded', sessions }); + } + } catch (err) { + if (!cancelled) { + setSessionResult({ + key: fetchKey, + phase: 'error', + message: err instanceof Error ? err.message : 'Failed to discover sessions', + }); + } + } + })(); + + return () => { + cancelled = true; + }; + }, [isSessionsStep, fetchKey]); // eslint-disable-line react-hooks/exhaustive-deps + + const sessionItems: SelectableItem[] = useMemo(() => { + const sessions = sessionResult?.phase === 'loaded' ? sessionResult.sessions : []; + return sessions.map(s => { + const date = s.firstSeen + ? new Date(s.firstSeen).toLocaleString([], { + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + }) + : ''; + const shortId = s.sessionId.length > 36 ? s.sessionId.slice(0, 36) + '…' : s.sessionId; + return { + id: s.sessionId, + title: shortId, + description: `${s.spanCount} spans · ${date}`, + }; + }); + }, [sessionResult]); + + // ── Navigation hooks ────────────────────────────────────────────────────── + + const agentNav = useListNavigation({ + items: agentItems, + onSelect: item => { + setConfig(c => ({ ...c, agent: item.id })); + goNext(); + }, + onExit, + isActive: isAgentStep, + }); + + const evaluatorsNav = useMultiSelectNavigation({ + items: evaluatorItems, + getId: item => item.id, + onConfirm: ids => { + const names = ids.map(id => { + const item = rawEvaluators.find(e => e.arn === id); + return item?.name ?? id; + }); + setConfig(c => ({ ...c, evaluators: ids, evaluatorNames: names })); + goNext(); + }, + onExit: () => goBack(), + isActive: isEvaluatorsStep, + requireSelection: true, + }); + + // Handle Esc during session loading/error + useListNavigation({ + items: [{ id: 'back', title: 'Back' }], + onSelect: () => goBack(), + onExit: () => goBack(), + isActive: isSessionsStep && sessionPhase !== 'loaded', + }); + + const sessionsNav = useMultiSelectNavigation({ + items: sessionItems, + getId: item => item.id, + onConfirm: ids => { + setConfig(c => ({ ...c, sessionIds: ids })); + goNext(); + }, + onExit: () => goBack(), + isActive: isSessionsStep && sessionPhase === 'loaded', + requireSelection: true, + }); + + const gtChoiceItems: SelectableItem[] = useMemo( + () => [ + { id: 'skip', title: 'Skip', description: 'No ground truth' }, + { id: 'file', title: 'Load from file', description: 'JSON file with session metadata and ground truth' }, + { id: 'inline', title: 'Enter manually', description: 'Type assertions, trajectory, and expected response' }, + ], + [] + ); + + const gtChoiceNav = useListNavigation({ + items: gtChoiceItems, + onSelect: item => { + setGroundTruthError(null); + if (item.id === 'skip') { + setConfig(c => ({ ...c, groundTruthFile: '', sessionMetadata: undefined })); + goNext(); + } else if (item.id === 'file') { + setGtMode('file'); + } else { + setGtMode('inline'); + } + }, + onExit: () => goBack(), + isActive: isGroundTruthStep && gtMode === 'choose', + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete(config), + onExit: () => goBack(), + isActive: isConfirmStep, + }); + + const helpText = isAgentStep + ? HELP_TEXT.NAVIGATE_SELECT + : isEvaluatorsStep + ? 'Space toggle · Enter confirm · Esc back' + : isDaysStep + ? HELP_TEXT.TEXT_INPUT + : isSessionsStep + ? sessionPhase === 'loading' + ? '' + : sessionPhase === 'error' + ? HELP_TEXT.CONFIRM_CANCEL + : 'Space toggle · Enter confirm · Esc back' + : isGroundTruthStep + ? gtMode === 'choose' + ? HELP_TEXT.NAVIGATE_SELECT + : gtMode === 'file' + ? HELP_TEXT.TEXT_INPUT + : 'Enter value · Enter on empty to skip section · Esc back' + : isNameStep + ? HELP_TEXT.TEXT_INPUT + : HELP_TEXT.CONFIRM_CANCEL; + + const headerContent = ; + + return ( + + + {isAgentStep && ( + + )} + + {isEvaluatorsStep && ( + + )} + + {isDaysStep && ( + + Note: Traces may take 5–10 min to appear after agent invocations. + { + const days = parseInt(value, 10); + if (!isNaN(days) && days >= 1 && days <= 90) { + setConfig(c => ({ ...c, days })); + goNext(); + } + }} + onCancel={() => goBack()} + customValidation={value => { + const days = parseInt(value, 10); + if (isNaN(days)) return 'Must be a number'; + if (days < 1 || days > 90) return 'Must be between 1 and 90'; + return true; + }} + /> + + )} + + {isSessionsStep && sessionPhase === 'loading' && } + + {isSessionsStep && sessionResult?.phase === 'error' && {sessionResult.message}} + + {isSessionsStep && sessionPhase === 'loaded' && ( + + )} + + {isGroundTruthStep && gtMode === 'choose' && ( + + )} + + {isGroundTruthStep && gtMode === 'file' && ( + + Select a JSON file with session ground truth (assertions, expected trajectory, turns). + {groundTruthError && {groundTruthError}} + { + setGroundTruthError(null); + try { + const resolved = resolvePath(value.trim()); + const content = readFileSync(resolved, 'utf-8'); + const parsed = JSON.parse(content) as Record; + const metadata: SessionMetadataEntry[] = Array.isArray(parsed) + ? (parsed as SessionMetadataEntry[]) + : (parsed.sessionMetadata as SessionMetadataEntry[]); + if (!Array.isArray(metadata)) { + setGroundTruthError('File must be a JSON array or contain a "sessionMetadata" array'); + return; + } + setConfig(c => ({ ...c, groundTruthFile: resolved, sessionMetadata: metadata })); + goNext(); + } catch (err) { + setGroundTruthError(`Failed to load file: ${err instanceof Error ? err.message : String(err)}`); + } + }} + onCancel={() => { + setGroundTruthError(null); + setGtMode('choose'); + }} + /> + + )} + + {isGroundTruthStep && gtMode === 'inline' && ( + { + // Apply the same ground truth to all selected sessions + const metadata: SessionMetadataEntry[] = config.sessionIds.map(sid => ({ + sessionId: sid, + groundTruth: { + inline: { + ...(gt.assertions.length > 0 ? { assertions: gt.assertions.map(text => ({ text })) } : {}), + ...(gt.expectedTrajectory.length > 0 + ? { expectedTrajectory: { toolNames: gt.expectedTrajectory } } + : {}), + ...(gt.expectedResponse + ? { + turns: [ + { + input: { prompt: '' }, + expectedResponse: { text: gt.expectedResponse }, + }, + ], + } + : {}), + }, + }, + })); + setConfig(c => ({ ...c, groundTruthFile: '', sessionMetadata: metadata })); + goNext(); + }} + onCancel={() => { + setGtMode('choose'); + }} + /> + )} + + {isNameStep && ( + + Optional — leave blank for auto-generated name. + { + setConfig(c => ({ ...c, name: value })); + goNext(); + }} + onCancel={() => goBack()} + /> + + )} + + {isConfirmStep && ( + + )} + + + ); +} + +// ============================================================================ +// Results View +// ============================================================================ + +function scoreColor(score: number): string { + if (score >= 0.8) return 'green'; + if (score >= 0.5) return 'yellow'; + return 'red'; +} + +interface ResultsViewProps { + result: RunBatchEvaluationCommandResult; + savedFilePath?: string; + onRunAnother: () => void; + onExit: () => void; +} + +function ResultsView({ result, savedFilePath, onRunAnother, onExit }: ResultsViewProps) { + const actions = [ + { id: 'another', title: 'Run another batch evaluation' }, + { id: 'back', title: 'Back' }, + ]; + + const nav = useListNavigation({ + items: actions, + onSelect: item => { + if (item.id === 'another') onRunAnother(); + else onExit(); + }, + onExit, + isActive: true, + }); + + const evalRes = result.evaluationResults; + const summaries = evalRes?.evaluatorSummaries; + + // Fall back to local grouping when API summaries aren't available + const byEvaluator = useMemo(() => { + if (summaries && summaries.length > 0) return null; + const map = new Map(); + for (const r of result.results) { + const group = map.get(r.evaluatorId) ?? []; + group.push(r); + map.set(r.evaluatorId, group); + } + return map; + }, [result.results, summaries]); + + return ( + + + + ✓ Batch evaluation complete + + ID: {result.batchEvaluationId} + {' '} + Status: {result.status} + + {result.name && ( + + Name: {result.name} + + )} + + {evalRes?.totalNumberOfSessions != null && ( + + Sessions: {evalRes.totalNumberOfSessions} total + {evalRes.numberOfSessionsCompleted != null && ( + , {evalRes.numberOfSessionsCompleted} completed + )} + {evalRes.numberOfSessionsFailed ? ( + , {evalRes.numberOfSessionsFailed} failed + ) : null} + + )} + + {summaries && summaries.length > 0 ? ( + + Scores range from 0 (worst) to 1 (best). + {summaries.map(s => { + const avg = s.statistics?.averageScore; + const avgStr = avg != null ? avg.toFixed(2) : 'N/A'; + const color = avg != null ? scoreColor(avg) : undefined; + return ( + + {' '} + {s.evaluatorId} + {' '} + {avgStr} + {s.totalFailed ? ({s.totalFailed} failed) : null} + {s.totalEvaluated != null && [{s.totalEvaluated} evaluated]} + + ); + })} + + ) : byEvaluator && byEvaluator.size > 0 ? ( + + Scores range from 0 (worst) to 1 (best). + {[...byEvaluator.entries()].map(([evalId, evalResults]) => { + const scores = evalResults.filter(r => !r.error).map(r => r.score!); + const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0; + const errors = evalResults.filter(r => r.error).length; + return ( + + {' '} + {evalId} + {' '} + {avg.toFixed(2)} + {errors > 0 && ({errors} errors)} + + ); + })} + + ) : ( + + No evaluation results returned. + + )} + + {savedFilePath && ( + + Results saved to: {savedFilePath} + + )} + {result.logFilePath && ( + + Log: {result.logFilePath} + + )} + + + {actions.map((action, idx) => { + const selected = idx === nav.selectedIndex; + return ( + + {selected ? '❯' : ' '} + + {action.title} + + + ); + })} + + + + + ); +} diff --git a/src/cli/tui/screens/run-eval/RunScreen.tsx b/src/cli/tui/screens/run-eval/RunScreen.tsx index 6675983f9..a9a797a37 100644 --- a/src/cli/tui/screens/run-eval/RunScreen.tsx +++ b/src/cli/tui/screens/run-eval/RunScreen.tsx @@ -6,10 +6,12 @@ import React, { useMemo } from 'react'; interface RunScreenProps { onRunEval: () => void; + onRunBatchEval: () => void; + onRunRecommendation: () => void; onExit: () => void; } -export function RunScreen({ onRunEval, onExit }: RunScreenProps) { +export function RunScreen({ onRunEval, onRunBatchEval, onRunRecommendation, onExit }: RunScreenProps) { const items: SelectableItem[] = useMemo( () => [ { @@ -17,13 +19,27 @@ export function RunScreen({ onRunEval, onExit }: RunScreenProps) { title: 'On-demand Evaluation', description: 'Evaluate agent traces with selected evaluators. CLI also supports --agent-arn.', }, + { + id: 'run-batch-eval', + title: 'Batch Evaluation', + description: 'Run a batch evaluation against agent sessions via CloudWatch.', + }, + { + id: 'run-recommendation', + title: 'Recommendation', + description: 'Optimize system prompts or tool descriptions using agent traces.', + }, ], [] ); const nav = useListNavigation({ items, - onSelect: () => onRunEval(), + onSelect: item => { + if (item.id === 'run-eval') onRunEval(); + else if (item.id === 'run-batch-eval') onRunBatchEval(); + else if (item.id === 'run-recommendation') onRunRecommendation(); + }, onExit, isActive: true, }); diff --git a/src/cli/tui/screens/run-eval/index.ts b/src/cli/tui/screens/run-eval/index.ts index d76e0e086..7c56bd639 100644 --- a/src/cli/tui/screens/run-eval/index.ts +++ b/src/cli/tui/screens/run-eval/index.ts @@ -1,3 +1,5 @@ +export { BatchEvalHistoryScreen } from './BatchEvalHistoryScreen'; +export { RunBatchEvalFlow } from './RunBatchEvalFlow'; export { RunEvalFlow } from './RunEvalFlow'; export { RunEvalScreen } from './RunEvalScreen'; export { RunScreen } from './RunScreen'; diff --git a/src/cli/tui/screens/runtime-endpoint/AddRuntimeEndpointFlow.tsx b/src/cli/tui/screens/runtime-endpoint/AddRuntimeEndpointFlow.tsx index 2404109fc..83bda78e5 100644 --- a/src/cli/tui/screens/runtime-endpoint/AddRuntimeEndpointFlow.tsx +++ b/src/cli/tui/screens/runtime-endpoint/AddRuntimeEndpointFlow.tsx @@ -1,5 +1,6 @@ import { ConfigIO } from '../../../../lib'; import { runtimeEndpointPrimitive } from '../../../primitives/registry'; +import { withAddTelemetry } from '../../../telemetry/cli-command-run.js'; import { ErrorPrompt } from '../../components'; import { AddSuccessScreen } from '../add/AddSuccessScreen'; import { AddRuntimeEndpointScreen } from './AddRuntimeEndpointScreen'; @@ -78,24 +79,24 @@ export function AddRuntimeEndpointFlow({ }, [isInteractive, flow.name, onExit]); const handleCreateComplete = useCallback((config: RuntimeEndpointWizardConfig) => { - void runtimeEndpointPrimitive - .add({ + void withAddTelemetry('add.runtime-endpoint', {}, () => + runtimeEndpointPrimitive.add({ runtime: config.runtimeName, endpoint: config.endpointName, version: config.version, description: config.description, }) - .then(result => { - if (result.success) { - setFlow({ - name: 'create-success', - endpointName: config.endpointName, - runtimeName: config.runtimeName, - }); - return; - } - setFlow({ name: 'error', message: result.error ?? 'Unknown error' }); - }); + ).then(result => { + if (result.success) { + setFlow({ + name: 'create-success', + endpointName: config.endpointName, + runtimeName: config.runtimeName, + }); + return; + } + setFlow({ name: 'error', message: result.error ?? 'Unknown error' }); + }); }, []); if (flow.name === 'loading') { diff --git a/src/cli/tui/utils/commands.ts b/src/cli/tui/utils/commands.ts index 1ea68e3b5..912c6a7f6 100644 --- a/src/cli/tui/utils/commands.ts +++ b/src/cli/tui/utils/commands.ts @@ -12,12 +12,12 @@ export interface CommandMeta { /** * Commands hidden from TUI entirely (meta commands). */ -const HIDDEN_FROM_TUI = ['help', 'telemetry'] as const; +const HIDDEN_FROM_TUI = ['help', 'telemetry', 'promote'] as const; /** * Commands that are CLI-only (shown but marked as requiring CLI invocation). */ -const CLI_ONLY_COMMANDS = ['logs', 'traces', 'pause', 'resume'] as const; +const CLI_ONLY_COMMANDS = ['logs', 'traces', 'pause', 'resume', 'stop'] as const; /** * Commands hidden from TUI when inside an existing project. diff --git a/src/lib/packaging/__tests__/helpers.test.ts b/src/lib/packaging/__tests__/helpers.test.ts index f27c171fa..a554edb0e 100644 --- a/src/lib/packaging/__tests__/helpers.test.ts +++ b/src/lib/packaging/__tests__/helpers.test.ts @@ -477,11 +477,8 @@ describe('nested agentcore directory is preserved (issue #843)', () => { }); // ── createZipFromDir (async) ── - // The zip stage should NOT exclude agentcore/ — that's copySourceTree's job. - // When zipping a staging directory, any agentcore/ present is a legitimate - // Python package installed by uv, not the project config dir. - it('zip: does not exclude agentcore/ directories (staging has no project config)', async () => { + it('zip: excludes top-level agentcore/ but includes nested agentcore/', async () => { const src = buildFixture(join(root, 'zip-async')); const zipPath = join(root, 'zip-async.zip'); @@ -490,17 +487,21 @@ describe('nested agentcore directory is preserved (issue #843)', () => { const zipBuffer = await readFile(zipPath); const entries = Object.keys(unzipSync(new Uint8Array(zipBuffer))); - // Both top-level and nested agentcore/ are preserved in the zip — - // the zip function zips everything; exclusion is copySourceTree's concern - expect(entries).toContain('agentcore/config.yaml'); + // Top-level agentcore/ should NOT appear + expect(entries.some(e => e === 'agentcore/config.yaml')).toBe(false); + expect(entries.some(e => e.startsWith('agentcore/'))).toBe(false); + + // Nested agentcore/ SHOULD appear expect(entries).toContain('lib/langgraph_checkpoint_aws/agentcore/__init__.py'); expect(entries).toContain('lib/langgraph_checkpoint_aws/agentcore/core.py'); + + // Regular files present expect(entries).toContain('main.py'); }); // ── createZipFromDirSync ── - it('sync zip: does not exclude agentcore/ directories (staging has no project config)', () => { + it('sync zip: excludes top-level agentcore/ but includes nested agentcore/', () => { const src = buildFixture(join(root, 'zip-sync')); const zipPath = join(root, 'zip-sync.zip'); @@ -509,61 +510,9 @@ describe('nested agentcore directory is preserved (issue #843)', () => { const zipBuffer = readFileSync(zipPath); const entries = Object.keys(unzipSync(new Uint8Array(zipBuffer))); - expect(entries).toContain('agentcore/config.yaml'); + expect(entries.some(e => e.startsWith('agentcore/'))).toBe(false); expect(entries).toContain('lib/langgraph_checkpoint_aws/agentcore/__init__.py'); expect(entries).toContain('lib/langgraph_checkpoint_aws/agentcore/core.py'); expect(entries).toContain('main.py'); }); - - // ── Staging directory scenario (the actual bug) ── - // After uv installs deps into staging, copySourceTree copies user source on top. - // The staging dir may contain a top-level agentcore/ from a Python package. - // createZipFromDir must NOT strip it. - - it('zip preserves top-level agentcore/ Python package in staging dir', async () => { - const staging = join(root, 'staging-zip-async'); - mkdirSync(staging, { recursive: true }); - - // Simulate uv-installed dependency with top-level agentcore/ package - const agentcorePkg = join(staging, 'langgraph_checkpoint_aws', 'agentcore'); - mkdirSync(agentcorePkg, { recursive: true }); - writeFileSync(join(staging, 'langgraph_checkpoint_aws', '__init__.py'), '# init'); - writeFileSync(join(agentcorePkg, '__init__.py'), '# agentcore init'); - writeFileSync(join(agentcorePkg, 'saver.py'), 'class AgentCoreMemorySaver: pass'); - - // User source copied on top by copySourceTree - writeFileSync(join(staging, 'main.py'), 'print("hello")'); - - const zipPath = join(root, 'staging-async.zip'); - await createZipFromDir(staging, zipPath); - - const zipBuffer = await readFile(zipPath); - const entries = Object.keys(unzipSync(new Uint8Array(zipBuffer))); - - expect(entries).toContain('langgraph_checkpoint_aws/agentcore/__init__.py'); - expect(entries).toContain('langgraph_checkpoint_aws/agentcore/saver.py'); - expect(entries).toContain('main.py'); - }); - - it('sync zip preserves top-level agentcore/ Python package in staging dir', () => { - const staging = join(root, 'staging-zip-sync'); - mkdirSync(staging, { recursive: true }); - - const agentcorePkg = join(staging, 'langgraph_checkpoint_aws', 'agentcore'); - mkdirSync(agentcorePkg, { recursive: true }); - writeFileSync(join(staging, 'langgraph_checkpoint_aws', '__init__.py'), '# init'); - writeFileSync(join(agentcorePkg, '__init__.py'), '# agentcore init'); - writeFileSync(join(agentcorePkg, 'saver.py'), 'class AgentCoreMemorySaver: pass'); - writeFileSync(join(staging, 'main.py'), 'print("hello")'); - - const zipPath = join(root, 'staging-sync.zip'); - createZipFromDirSync(staging, zipPath); - - const zipBuffer = readFileSync(zipPath); - const entries = Object.keys(unzipSync(new Uint8Array(zipBuffer))); - - expect(entries).toContain('langgraph_checkpoint_aws/agentcore/__init__.py'); - expect(entries).toContain('langgraph_checkpoint_aws/agentcore/saver.py'); - expect(entries).toContain('main.py'); - }); }); diff --git a/src/lib/packaging/build-args.ts b/src/lib/packaging/build-args.ts index e5af34045..eadc35875 100644 --- a/src/lib/packaging/build-args.ts +++ b/src/lib/packaging/build-args.ts @@ -1,11 +1,11 @@ -import { readCliConfig } from '../schemas/io/cli-config'; +import { readGlobalConfigSync } from '../schemas/io/global-config'; /** * Return Docker --build-arg flags for UV index URLs configured in ~/.agentcore/config.json. * Returns an empty array when no custom indexes are configured. */ export function getUvBuildArgs(): string[] { - const config = readCliConfig(); + const config = readGlobalConfigSync(); const args: string[] = []; if (config.uvDefaultIndex) args.push('--build-arg', `UV_DEFAULT_INDEX=${config.uvDefaultIndex}`); if (config.uvIndex) args.push('--build-arg', `UV_INDEX=${config.uvIndex}`); diff --git a/src/lib/packaging/helpers.ts b/src/lib/packaging/helpers.ts index 36074395c..31c74b298 100644 --- a/src/lib/packaging/helpers.ts +++ b/src/lib/packaging/helpers.ts @@ -192,23 +192,24 @@ export async function createZipFromDir(sourceDir: string, outputZip: string): Pr await rm(outputZip, { force: true }); await mkdir(dirname(outputZip), { recursive: true }); - const files = await collectFiles(sourceDir); + const files = await collectFiles(sourceDir, sourceDir); const zipped = zipSync(files); await writeFile(outputZip, zipped); } -async function collectFiles(directory: string, basePath = ''): Promise { +async function collectFiles(directory: string, rootDir: string, basePath = ''): Promise { const result: Zippable = {}; const entries = await readdir(directory, { withFileTypes: true }); for (const entry of entries) { if (EXCLUDED_ENTRIES.has(entry.name)) continue; + if (entry.name === CONFIG_DIR && resolve(directory) === resolve(rootDir)) continue; const fullPath = join(directory, entry.name); const zipPath = basePath ? `${basePath}/${entry.name}` : entry.name; if (entry.isDirectory()) { - Object.assign(result, await collectFiles(fullPath, zipPath)); + Object.assign(result, await collectFiles(fullPath, rootDir, zipPath)); } else if (entry.isFile()) { result[zipPath] = [await readFile(fullPath), { level: 6 }]; } @@ -324,18 +325,19 @@ export function ensureBinaryAvailableSync(binary: string, installHint?: string): throw new MissingDependencyError(binary, installHint); } -function collectFilesSync(directory: string, basePath = ''): Zippable { +function collectFilesSync(directory: string, rootDir: string, basePath = ''): Zippable { const result: Zippable = {}; const entries = readdirSync(directory, { withFileTypes: true }); for (const entry of entries) { if (EXCLUDED_ENTRIES.has(entry.name)) continue; + if (entry.name === CONFIG_DIR && resolve(directory) === resolve(rootDir)) continue; const fullPath = join(directory, entry.name); const zipPath = basePath ? `${basePath}/${entry.name}` : entry.name; if (entry.isDirectory()) { - Object.assign(result, collectFilesSync(fullPath, zipPath)); + Object.assign(result, collectFilesSync(fullPath, rootDir, zipPath)); } else if (entry.isFile()) { result[zipPath] = [readFileSync(fullPath), { level: 6 }]; } @@ -347,7 +349,7 @@ export function createZipFromDirSync(sourceDir: string, outputZip: string): void rmSync(outputZip, { force: true }); mkdirSync(dirname(outputZip), { recursive: true }); - const files = collectFilesSync(sourceDir); + const files = collectFilesSync(sourceDir, sourceDir); const zipped = zipSync(files); writeFileSync(outputZip, zipped); } diff --git a/src/lib/schemas/io/cli-config.ts b/src/lib/schemas/io/cli-config.ts deleted file mode 100644 index aa36d82f1..000000000 --- a/src/lib/schemas/io/cli-config.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { readFileSync } from 'fs'; -import { homedir } from 'os'; -import { join } from 'path'; - -const CONFIG_FILE = join(homedir(), '.agentcore', 'config.json'); - -export interface CliConfig { - uvDefaultIndex?: string; - uvIndex?: string; - disableTransactionSearch?: boolean; - transactionSearchIndexPercentage?: number; -} - -/** - * Read the global CLI config from ~/.agentcore/config.json. - * Returns an empty object if the file doesn't exist or is malformed. - */ -export function readCliConfig(): CliConfig { - try { - const data = readFileSync(CONFIG_FILE, 'utf-8'); - const parsed: Record = JSON.parse(data) as Record; - const config: CliConfig = {}; - if (typeof parsed.uvDefaultIndex === 'string') config.uvDefaultIndex = parsed.uvDefaultIndex; - if (typeof parsed.uvIndex === 'string') config.uvIndex = parsed.uvIndex; - if (parsed.disableTransactionSearch === true) config.disableTransactionSearch = true; - if (typeof parsed.transactionSearchIndexPercentage === 'number') { - const pct = parsed.transactionSearchIndexPercentage; - if (pct >= 0 && pct <= 100) { - config.transactionSearchIndexPercentage = pct; - } - } - return config; - } catch { - return {}; - } -} diff --git a/src/lib/schemas/io/config-io.ts b/src/lib/schemas/io/config-io.ts index cacb73a5f..e4b2740e4 100644 --- a/src/lib/schemas/io/config-io.ts +++ b/src/lib/schemas/io/config-io.ts @@ -117,7 +117,12 @@ export class ConfigIO { */ async writeProjectSpec(data: AgentCoreProjectSpec): Promise { const filePath = this.pathResolver.getAgentConfigPath(); - await this.validateAndWrite(filePath, 'AgentCore Project Config', AgentCoreProjectSpecSchema, data); + // TODO: extend this to all resource arrays so empty defaults never pollute agentcore.json + const cleaned = { ...data }; + if (cleaned.configBundles?.length === 0) delete (cleaned as Record).configBundles; + if (cleaned.abTests?.length === 0) delete (cleaned as Record).abTests; + if (cleaned.httpGateways?.length === 0) delete (cleaned as Record).httpGateways; + await this.validateAndWrite(filePath, 'AgentCore Project Config', AgentCoreProjectSpecSchema, cleaned); } /** diff --git a/src/cli/global-config.ts b/src/lib/schemas/io/global-config.ts similarity index 73% rename from src/cli/global-config.ts rename to src/lib/schemas/io/global-config.ts index 267ad6669..fc64eb39b 100644 --- a/src/cli/global-config.ts +++ b/src/lib/schemas/io/global-config.ts @@ -1,3 +1,4 @@ +import { readFileSync } from 'fs'; import { mkdir, readFile, writeFile } from 'fs/promises'; import { randomUUID } from 'node:crypto'; import { homedir } from 'os'; @@ -9,18 +10,19 @@ export const GLOBAL_CONFIG_FILE = join(GLOBAL_CONFIG_DIR, 'config.json'); const GlobalConfigSchema = z .object({ - installationId: z.string().optional(), - uvDefaultIndex: z.string().optional(), - uvIndex: z.string().optional(), - disableTransactionSearch: z.boolean().optional(), - transactionSearchIndexPercentage: z.number().min(0).max(100).optional(), + installationId: z.string().optional().catch(undefined), + uvDefaultIndex: z.string().optional().catch(undefined), + uvIndex: z.string().optional().catch(undefined), + disableTransactionSearch: z.boolean().optional().catch(undefined), + transactionSearchIndexPercentage: z.number().int().min(0).max(100).optional().catch(undefined), telemetry: z .object({ - enabled: z.boolean().optional(), - endpoint: z.string().optional(), - audit: z.boolean().optional(), + enabled: z.boolean().optional().catch(undefined), + endpoint: z.string().optional().catch(undefined), + audit: z.boolean().optional().catch(undefined), }) - .optional(), + .optional() + .catch(undefined), }) .passthrough(); @@ -35,6 +37,15 @@ export async function readGlobalConfig(configFile = GLOBAL_CONFIG_FILE): Promise } } +export function readGlobalConfigSync(configFile = GLOBAL_CONFIG_FILE): GlobalConfig { + try { + const data = readFileSync(configFile, 'utf-8'); + return GlobalConfigSchema.parse(JSON.parse(data)); + } catch { + return {}; + } +} + export async function updateGlobalConfig( partial: GlobalConfig, configDir = GLOBAL_CONFIG_DIR, diff --git a/src/lib/schemas/io/index.ts b/src/lib/schemas/io/index.ts index e8ddddc0f..212468ffe 100644 --- a/src/lib/schemas/io/index.ts +++ b/src/lib/schemas/io/index.ts @@ -11,4 +11,3 @@ export { type PathConfig, } from './path-resolver'; export { ConfigIO, createConfigIO, getSchemaUrlForVersion } from './config-io'; -export { readCliConfig, type CliConfig } from './cli-config'; diff --git a/src/schema/llm-compacted/agentcore.ts b/src/schema/llm-compacted/agentcore.ts index 5819cbd47..c86c14cb7 100644 --- a/src/schema/llm-compacted/agentcore.ts +++ b/src/schema/llm-compacted/agentcore.ts @@ -18,6 +18,16 @@ interface AgentCoreProjectSpec { runtimes: AgentEnvSpec[]; // Unique by name memories: Memory[]; // Unique by name credentials: Credential[]; // Unique by name + evaluators: Evaluator[]; // Unique by name — custom evaluator definitions + onlineEvalConfigs: OnlineEvalConfig[]; // Unique by name — online evaluation configs + agentCoreGateways: AgentCoreGateway[]; // Unique by name — MCP gateways + mcpRuntimeTools?: AgentCoreMcpRuntimeTool[]; // Unique by name — standalone MCP runtime tools (not behind a gateway) + unassignedTargets?: AgentCoreGatewayTarget[]; // Unique by name — targets not yet assigned to a gateway + policyEngines: PolicyEngine[]; // Unique by name — Cedar policy engines + configBundles: ConfigBundle[]; // Unique by name — configuration bundles for versioned config + abTests: ABTest[]; // Unique by name — A/B test experiments + /** @internal Auto-managed by AB test creation. Do not configure directly. */ + httpGateways: HttpGateway[]; // Unique by name — HTTP gateways bound to a runtime } // ───────────────────────────────────────────────────────────────────────────── @@ -36,6 +46,15 @@ interface NetworkConfig { type MemoryStrategyType = 'SEMANTIC' | 'SUMMARIZATION' | 'USER_PREFERENCE' | 'EPISODIC'; type ModelProvider = 'Bedrock' | 'Gemini' | 'OpenAI' | 'Anthropic'; +type EvaluationLevel = 'SESSION' | 'TRACE' | 'TOOL_CALL'; +type GatewayTargetType = 'lambda' | 'mcpServer' | 'openApiSchema' | 'smithyModel' | 'apiGateway' | 'lambdaFunctionArn'; +type OutboundAuthType = 'OAUTH' | 'API_KEY' | 'NONE'; +type GatewayAuthorizerType = 'NONE' | 'AWS_IAM' | 'CUSTOM_JWT'; +type GatewayExceptionLevel = 'NONE' | 'DEBUG'; +type PolicyEngineMode = 'LOG_ONLY' | 'ENFORCE'; +type ValidationMode = 'FAIL_ON_ANY_FINDINGS' | 'IGNORE_ALL_FINDINGS'; +type ComputeHost = 'Lambda' | 'AgentCoreRuntime'; +type ABTestVariantName = 'C' | 'T1'; // ───────────────────────────────────────────────────────────────────────────── // AGENT @@ -74,8 +93,10 @@ interface EnvVar { interface Memory { name: string; // @regex ^[a-zA-Z][a-zA-Z0-9_]{0,47}$ @max 48 eventExpiryDuration: number; // @min 3 @max 365 (days) - strategies: MemoryStrategy[]; // @min 1, unique by type + strategies: MemoryStrategy[]; // Unique by type. Can be empty (short-term memory). tags?: Record; + encryptionKeyArn?: string; + executionRoleArn?: string; } interface MemoryStrategy { @@ -93,4 +114,290 @@ interface MemoryStrategy { interface Credential { authorizerType: 'ApiKeyCredentialProvider' | 'OAuthCredentialProvider'; name: string; // @regex ^[a-zA-Z0-9\-_]+$ @min 1 @max 128 + // Additional fields for OAuthCredentialProvider: + discoveryUrl?: string; // OIDC discovery URL (OAuth only) + scopes?: string[]; // Supported scopes (OAuth only) + vendor?: string; // Credential provider vendor type (OAuth only, default: 'CustomOauth2') + managed?: boolean; // Whether auto-created by CLI (OAuth only) + usage?: 'inbound' | 'outbound'; // Auth direction (OAuth only) +} + +// ───────────────────────────────────────────────────────────────────────────── +// EVALUATOR +// ───────────────────────────────────────────────────────────────────────────── + +interface Evaluator { + name: string; // @regex ^[a-zA-Z][a-zA-Z0-9_]{0,47}$ @max 48 + level: EvaluationLevel; + description?: string; + config: EvaluatorConfig; // Must have either llmAsAJudge or codeBased, not both + tags?: Record; +} + +interface EvaluatorConfig { + llmAsAJudge?: LlmAsAJudgeConfig; + codeBased?: CodeBasedConfig; +} + +interface LlmAsAJudgeConfig { + model: string; // Bedrock model ID or ARN + instructions: string; // Evaluation instructions + ratingScale: RatingScale; // Must have either numerical or categorical, not both +} + +interface RatingScale { + numerical?: { value: number; label: string; definition: string }[]; + categorical?: { label: string; definition: string }[]; +} + +interface CodeBasedConfig { + managed?: ManagedCodeBasedConfig; + external?: ExternalCodeBasedConfig; +} + +interface ManagedCodeBasedConfig { + codeLocation: string; + entrypoint: string; // default 'lambda_function.handler' + timeoutSeconds: number; // @min 1 @max 300 (default 60) + additionalPolicies?: string[]; +} + +interface ExternalCodeBasedConfig { + lambdaArn: string; // @regex ^arn:aws[a-z-]*:lambda:[a-z0-9-]+:\d{12}:function:.+$ +} + +// ───────────────────────────────────────────────────────────────────────────── +// ONLINE EVAL CONFIG +// ───────────────────────────────────────────────────────────────────────────── + +interface OnlineEvalConfig { + name: string; // @regex ^[a-zA-Z][a-zA-Z0-9_]{0,47}$ @max 48 + agent: string; // Agent name — must match a project agent + evaluators: string[]; // @min 1 — evaluator names, Builtin.* IDs, or evaluator ARNs + samplingRate: number; // @min 0.01 @max 100 (percentage) + description?: string; // @max 200 + enableOnCreate?: boolean; // Whether to enable on create (default: true) + tags?: Record; +} + +// ───────────────────────────────────────────────────────────────────────────── +// GATEWAY (MCP) +// ───────────────────────────────────────────────────────────────────────────── + +interface AgentCoreGateway { + name: string; // @regex ^[0-9a-zA-Z](?:[0-9a-zA-Z-]*[0-9a-zA-Z])?$ @max 100 + description?: string; + targets: AgentCoreGatewayTarget[]; // Gateway targets + authorizerType?: GatewayAuthorizerType; // default 'NONE' + authorizerConfiguration?: AuthorizerConfig; // Required when authorizerType is 'CUSTOM_JWT' + enableSemanticSearch?: boolean; // default true + exceptionLevel?: GatewayExceptionLevel; // default 'NONE' + policyEngineConfiguration?: GatewayPolicyEngineConfiguration; + tags?: Record; +} + +interface AuthorizerConfig { + customJwtAuthorizer?: { + discoveryUrl: string; // OIDC discovery URL (HTTPS, must end with /.well-known/openid-configuration) + allowedAudience?: string[]; + allowedClients?: string[]; + allowedScopes?: string[]; + customClaims?: CustomClaimValidation[]; + }; +} + +interface CustomClaimValidation { + inboundTokenClaimName: string; // @regex ^[A-Za-z0-9_.:-]+$ @max 255 + inboundTokenClaimValueType: 'STRING' | 'STRING_ARRAY'; + authorizingClaimMatchValue: { + claimMatchOperator: 'EQUALS' | 'CONTAINS' | 'CONTAINS_ANY'; + claimMatchValue: { + matchValueString?: string; // @regex ^[A-Za-z0-9_.-]+$ @max 255 + matchValueStringList?: string[]; // each @regex ^[A-Za-z0-9_.-]+$ @max 255 + }; + }; +} + +interface GatewayPolicyEngineConfiguration { + policyEngineName: string; // Reference to a PolicyEngine name + mode: PolicyEngineMode; +} + +// ───────────────────────────────────────────────────────────────────────────── +// GATEWAY TARGET +// ───────────────────────────────────────────────────────────────────────────── + +interface AgentCoreGatewayTarget { + name: string; + targetType: GatewayTargetType; + toolDefinitions?: ToolDefinition[]; // Required for 'lambda' targets + compute?: ToolComputeConfig; // Required for 'lambda' and scaffold targets + endpoint?: string; // URL — required for external 'mcpServer' targets + outboundAuth?: OutboundAuth; + apiGateway?: ApiGatewayConfig; // Required for 'apiGateway' target type + schemaSource?: SchemaSource; // Required for 'openApiSchema' / 'smithyModel' targets + lambdaFunctionArn?: LambdaFunctionArnConfig; // Required for 'lambdaFunctionArn' target type +} + +interface OutboundAuth { + type: OutboundAuthType; // default 'NONE' + credentialName?: string; // Required when type is not 'NONE' + scopes?: string[]; +} + +interface ToolDefinition { + name: string; + description?: string; + inputSchema: object; // JSON Schema + outputSchema?: object; +} + +interface ToolComputeConfig { + host: ComputeHost; + implementation: ToolImplementationBinding; + // Lambda-specific: + nodeVersion?: NodeRuntime; // Required for TypeScript Lambda + pythonVersion?: PythonRuntime; // Required for Python Lambda + timeout?: number; // @min 1 @max 900 + memorySize?: number; // @min 128 @max 10240 + iamPolicy?: object; // IAM policy document + // AgentCoreRuntime-specific: + runtime?: RuntimeConfig; +} + +interface ToolImplementationBinding { + language: 'TypeScript' | 'Python'; + path: string; + handler: string; +} + +interface RuntimeConfig { + artifact: 'CodeZip'; + pythonVersion: PythonRuntime; + name: string; // @regex ^[a-zA-Z][a-zA-Z0-9_]{0,47}$ @max 48 + entrypoint: string; // Python file path with optional handler + codeLocation: string; + instrumentation?: Instrumentation; + networkMode?: NetworkMode; // default 'PUBLIC' + description?: string; +} + +interface ApiGatewayConfig { + restApiId: string; + stage: string; + apiGatewayToolConfiguration: { + toolFilters: { + filterPath: string; + methods: ('GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH' | 'HEAD' | 'OPTIONS')[]; + }[]; + toolOverrides?: { name: string; path: string; method: string; description?: string }[]; + }; +} + +interface LambdaFunctionArnConfig { + lambdaArn: string; // @max 170 + toolSchemaFile: string; +} + +type SchemaSource = { inline: { path: string } } | { s3: { uri: string; bucketOwnerAccountId?: string } }; + +// ───────────────────────────────────────────────────────────────────────────── +// MCP RUNTIME TOOL +// ───────────────────────────────────────────────────────────────────────────── + +interface AgentCoreMcpRuntimeTool { + name: string; + toolDefinition: ToolDefinition; + compute: { + host: 'AgentCoreRuntime'; // Only AgentCoreRuntime (Python only) + implementation: ToolImplementationBinding; + runtime?: RuntimeConfig; + iamPolicy?: object; + }; + bindings?: McpRuntimeBinding[]; // Grant agents permission to invoke this tool +} + +interface McpRuntimeBinding { + runtimeName: string; // Agent runtime name to bind to + envVarName: string; // @regex ^[A-Za-z_][A-Za-z0-9_]*$ — env var for runtime ARN +} + +// ───────────────────────────────────────────────────────────────────────────── +// POLICY ENGINE +// ───────────────────────────────────────────────────────────────────────────── + +interface PolicyEngine { + name: string; // @regex ^[A-Za-z][A-Za-z0-9_]{0,47}$ @max 48 + description?: string; // @max 4096 + encryptionKeyArn?: string; + tags?: Record; + policies: Policy[]; // Unique by name +} + +interface Policy { + name: string; // @regex ^[A-Za-z][A-Za-z0-9_]{0,47}$ @max 48 + description?: string; // @max 4096 + statement: string; // Cedar policy statement + sourceFile?: string; + validationMode: ValidationMode; // default 'FAIL_ON_ANY_FINDINGS' +} + +// ───────────────────────────────────────────────────────────────────────────── +// CONFIG BUNDLE +// ───────────────────────────────────────────────────────────────────────────── + +interface ConfigBundle { + name: string; // @regex ^[a-zA-Z][a-zA-Z0-9_]{0,99}$ @max 100 + description?: string; // @max 500 + /** Component configurations keyed by component ARN or placeholder (e.g. {{runtime:}}) */ + components: Record; + branchName?: string; // @max 128 — optional branch name for versioning + commitMessage?: string; // @max 500 — optional commit message +} + +interface ComponentConfiguration { + configuration: Record; // Freeform configuration for the component +} + +// ───────────────────────────────────────────────────────────────────────────── +// AB TEST +// ───────────────────────────────────────────────────────────────────────────── + +interface ABTest { + name: string; // @regex ^[a-zA-Z][a-zA-Z0-9_]{0,47}$ @max 48 + description?: string; // @max 200 + gatewayRef: string; // Reference to the gateway (ARN or {{gateway:name}} placeholder) + roleArn?: string; + variants: [ABTestVariant, ABTestVariant]; // Exactly 2 — one 'C' (control) and one 'T1' (treatment). Weights must sum to 100. + evaluationConfig: { + onlineEvaluationConfigArn: string; + }; + trafficAllocationConfig?: { + routeOnHeader: { headerName: string }; + }; + maxDurationDays?: number; // @min 1 @max 90 + enableOnCreate?: boolean; +} + +interface ABTestVariant { + name: ABTestVariantName; + weight: number; // @min 1 @max 100 + variantConfiguration: { + configurationBundle: { + bundleArn: string; + bundleVersion: string; + }; + }; +} + +// ───────────────────────────────────────────────────────────────────────────── +// HTTP GATEWAY +// ───────────────────────────────────────────────────────────────────────────── + +/** @internal HTTP gateway auto-created when setting up an AB test. */ +interface HttpGateway { + name: string; // @regex ^[a-zA-Z][a-zA-Z0-9-]{0,47}$ @max 48 + description?: string; // @max 200 + runtimeRef: string; // Reference to a runtime name from spec.runtimes + roleArn?: string; // IAM role ARN — auto-created if omitted } diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts index 7812c8db3..f8e413b52 100644 --- a/src/schema/schemas/agentcore-project.ts +++ b/src/schema/schemas/agentcore-project.ts @@ -9,8 +9,11 @@ import { isReservedProjectName } from '../constants'; import { AgentEnvSpecSchema } from './agent-env'; import { AgentCoreGatewaySchema, AgentCoreGatewayTargetSchema, AgentCoreMcpRuntimeToolSchema } from './mcp'; +import { ABTestSchema } from './primitives/ab-test'; +import { ConfigBundleSchema } from './primitives/config-bundle'; import { EvaluationLevelSchema, EvaluatorConfigSchema, EvaluatorNameSchema } from './primitives/evaluator'; import { HarnessNameSchema } from './primitives/harness'; +import { HttpGatewaySchema } from './primitives/http-gateway'; import { DEFAULT_EPISODIC_REFLECTION_NAMESPACES, DEFAULT_STRATEGY_NAMESPACES, @@ -51,11 +54,18 @@ export { HarnessToolTypeSchema, HarnessModelProviderSchema, } from './primitives/harness'; +export { ConfigBundleSchema }; +export type { ComponentConfiguration, ComponentConfigurationMap, ConfigBundle } from './primitives/config-bundle'; +export { ConfigBundleNameSchema, ComponentConfigurationMapSchema } from './primitives/config-bundle'; export { PolicyEngineSchema }; export type { Policy, PolicyEngine, ValidationMode } from './primitives/policy'; export { PolicyEngineNameSchema, PolicyNameSchema, PolicySchema, ValidationModeSchema } from './primitives/policy'; export { TagsSchema }; export type { Tags } from './primitives/tags'; +export type { ABTestMode, TargetRef, GatewayFilter, PerVariantOnlineEvaluationConfig } from './primitives/ab-test'; +export { ABTestModeSchema, TargetRefSchema, GatewayFilterSchema } from './primitives/ab-test'; +export type { HttpGatewayTarget } from './primitives/http-gateway'; +export { HttpGatewayTargetSchema } from './primitives/http-gateway'; // ============================================================================ // ManagedBy Schema @@ -341,6 +351,36 @@ export const AgentCoreProjectSpecSchema = z name => `Duplicate harness name: ${name}` ) ), + + configBundles: z + .array(ConfigBundleSchema) + .default([]) + .superRefine( + uniqueBy( + bundle => bundle.name, + name => `Duplicate config bundle name: ${name}` + ) + ), + + abTests: z + .array(ABTestSchema) + .default([]) + .superRefine( + uniqueBy( + test => test.name, + name => `Duplicate AB test name: ${name}` + ) + ), + + httpGateways: z + .array(HttpGatewaySchema) + .default([]) + .superRefine( + uniqueBy( + gw => gw.name, + name => `Duplicate HTTP gateway name: ${name}` + ) + ), }) .strict() .superRefine((spec, ctx) => { @@ -368,6 +408,70 @@ export const AgentCoreProjectSpecSchema = z } } } + + // Validate HTTP gateway runtimeRef references + for (const gw of spec.httpGateways ?? []) { + const runtimeExists = spec.runtimes.some(r => r.name === gw.runtimeRef); + if (!runtimeExists) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `HTTP gateway "${gw.name}" references unknown runtime "${gw.runtimeRef}"`, + }); + } + } + + // Validate AB test gateway references + for (const test of spec.abTests ?? []) { + const gwField = test.gatewayRef; + if (gwField && typeof gwField === 'string') { + const match = /^\{\{gateway:(.+)\}\}$/.exec(gwField); + if (match) { + const gwName = match[1]; + const gwExists = (spec.httpGateways ?? []).some(gw => gw.name === gwName); + if (!gwExists) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `AB test "${test.name}" references gateway "${gwName}" which does not exist in httpGateways`, + }); + } + + // For target-based AB tests, validate target names exist in the gateway's targets array + if (test.mode === 'target-based') { + const gw = (spec.httpGateways ?? []).find(g => g.name === gwName); + if (gw) { + const gwTargetNames = new Set((gw.targets ?? []).map(t => t.name)); + for (const variant of test.variants) { + const targetName = variant.variantConfiguration.target?.targetName; + if (targetName && !gwTargetNames.has(targetName)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `AB test "${test.name}" variant "${variant.name}" references target "${targetName}" which does not exist in gateway "${gwName}" targets`, + }); + } + } + } + } + } + } + } + + // Validate HTTP gateway target runtimeRef and qualifier references + for (const gw of spec.httpGateways ?? []) { + for (const target of gw.targets ?? []) { + const runtime = spec.runtimes.find(r => r.name === target.runtimeRef); + if (!runtime) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `HTTP gateway "${gw.name}" target "${target.name}" references unknown runtime "${target.runtimeRef}"`, + }); + } else if (target.qualifier && target.qualifier !== 'DEFAULT' && !runtime.endpoints?.[target.qualifier]) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `HTTP gateway "${gw.name}" target "${target.name}" references qualifier "${target.qualifier}" which is not an endpoint on runtime "${target.runtimeRef}"`, + }); + } + } + } }); export type AgentCoreProjectSpec = z.infer; diff --git a/src/schema/schemas/deployed-state.ts b/src/schema/schemas/deployed-state.ts index 3d2727958..355bc560d 100644 --- a/src/schema/schemas/deployed-state.ts +++ b/src/schema/schemas/deployed-state.ts @@ -182,10 +182,58 @@ export const OnlineEvalDeployedStateSchema = z.object({ onlineEvaluationConfigId: z.string().min(1), onlineEvaluationConfigArn: z.string().min(1), executionStatus: z.enum(['ENABLED', 'DISABLED']).optional(), + /** Agent name this online eval config monitors. */ + agent: z.string().min(1).optional(), + /** Runtime endpoint name scoped to this online eval config. */ + endpoint: z.string().min(1).optional(), }); export type OnlineEvalDeployedState = z.infer; +// ============================================================================ +// Configuration Bundle Deployed State +// ============================================================================ + +export const ConfigBundleDeployedStateSchema = z.object({ + bundleId: z.string().min(1), + bundleArn: z.string().min(1), + versionId: z.string().min(1), +}); + +export type ConfigBundleDeployedState = z.infer; + +// ============================================================================ +// AB Test Deployed State +// ============================================================================ + +export const ABTestDeployedStateSchema = z.object({ + abTestId: z.string().min(1), + abTestArn: z.string().min(1), + /** IAM role ARN used by this AB test. */ + roleArn: z.string().min(1).optional(), + /** Whether the CLI auto-created this role (true = CLI should delete on cleanup). */ + roleCreatedByCli: z.boolean().optional(), + /** SHA-256 hash of the AB test configuration for change detection. */ + configHash: z.string().optional(), +}); + +export type ABTestDeployedState = z.infer; + +// ============================================================================ +// HTTP Gateway Deployed State +// ============================================================================ + +export const HttpGatewayDeployedStateSchema = z.object({ + gatewayId: z.string().min(1), + gatewayArn: z.string().min(1), + gatewayUrl: z.string().optional(), + targetId: z.string().min(1).optional(), + roleArn: z.string().min(1).optional(), + roleCreatedByCli: z.boolean().optional(), +}); + +export type HttpGatewayDeployedState = z.infer; + // ============================================================================ // Runtime Endpoint Deployed State // ============================================================================ @@ -209,6 +257,9 @@ export const DeployedResourceStateSchema = z.object({ credentials: z.record(z.string(), CredentialDeployedStateSchema).optional(), evaluators: z.record(z.string(), EvaluatorDeployedStateSchema).optional(), onlineEvalConfigs: z.record(z.string(), OnlineEvalDeployedStateSchema).optional(), + configBundles: z.record(z.string(), ConfigBundleDeployedStateSchema).optional(), + abTests: z.record(z.string(), ABTestDeployedStateSchema).optional(), + httpGateways: z.record(z.string(), HttpGatewayDeployedStateSchema).optional(), policyEngines: z.record(z.string(), PolicyEngineDeployedStateSchema).optional(), policies: z.record(z.string(), PolicyDeployedStateSchema).optional(), harnesses: z.record(z.string(), HarnessDeployedStateSchema).optional(), diff --git a/src/schema/schemas/primitives/__tests__/ab-test.test.ts b/src/schema/schemas/primitives/__tests__/ab-test.test.ts new file mode 100644 index 000000000..874cd7d13 --- /dev/null +++ b/src/schema/schemas/primitives/__tests__/ab-test.test.ts @@ -0,0 +1,228 @@ +import { + ABTestDescriptionSchema, + ABTestNameSchema, + ABTestSchema, + VariantNameSchema, + VariantWeightSchema, +} from '../ab-test'; +import { describe, expect, it } from 'vitest'; + +describe('ABTestNameSchema', () => { + it('accepts valid name starting with letter', () => { + expect(ABTestNameSchema.safeParse('MyTest_1').success).toBe(true); + }); + + it('rejects empty string', () => { + expect(ABTestNameSchema.safeParse('').success).toBe(false); + }); + + it('rejects name starting with number', () => { + expect(ABTestNameSchema.safeParse('1test').success).toBe(false); + }); + + it('rejects name with hyphens', () => { + expect(ABTestNameSchema.safeParse('my-test').success).toBe(false); + }); + + it('rejects name over 48 chars', () => { + expect(ABTestNameSchema.safeParse('a'.repeat(49)).success).toBe(false); + }); + + it('accepts name at 48 chars', () => { + expect(ABTestNameSchema.safeParse('a'.repeat(48)).success).toBe(true); + }); +}); + +describe('ABTestDescriptionSchema', () => { + it('accepts undefined (optional)', () => { + expect(ABTestDescriptionSchema.safeParse(undefined).success).toBe(true); + }); + + it('rejects empty string', () => { + expect(ABTestDescriptionSchema.safeParse('').success).toBe(false); + }); + + it('rejects string over 200 chars', () => { + expect(ABTestDescriptionSchema.safeParse('x'.repeat(201)).success).toBe(false); + }); + + it('accepts string at exactly 200 chars', () => { + expect(ABTestDescriptionSchema.safeParse('x'.repeat(200)).success).toBe(true); + }); +}); + +describe('VariantNameSchema', () => { + it('accepts C', () => { + expect(VariantNameSchema.safeParse('C').success).toBe(true); + }); + + it('accepts T1', () => { + expect(VariantNameSchema.safeParse('T1').success).toBe(true); + }); + + it('rejects other names', () => { + expect(VariantNameSchema.safeParse('T2').success).toBe(false); + }); +}); + +describe('VariantWeightSchema', () => { + it('accepts 1', () => { + expect(VariantWeightSchema.safeParse(1).success).toBe(true); + }); + + it('accepts 100', () => { + expect(VariantWeightSchema.safeParse(100).success).toBe(true); + }); + + it('rejects 0', () => { + expect(VariantWeightSchema.safeParse(0).success).toBe(false); + }); + + it('rejects 101', () => { + expect(VariantWeightSchema.safeParse(101).success).toBe(false); + }); + + it('rejects non-integer', () => { + expect(VariantWeightSchema.safeParse(50.5).success).toBe(false); + }); +}); + +describe('ABTestSchema', () => { + const validABTest = { + name: 'TestOne', + gatewayRef: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:gateway/gw-123', + variants: [ + { + name: 'C', + weight: 80, + variantConfiguration: { + configurationBundle: { bundleArn: 'arn:bundle:control', bundleVersion: 'v1' }, + }, + }, + { + name: 'T1', + weight: 20, + variantConfiguration: { + configurationBundle: { bundleArn: 'arn:bundle:treatment', bundleVersion: 'v1' }, + }, + }, + ], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval:config' }, + }; + + it('accepts valid minimal AB test', () => { + expect(ABTestSchema.safeParse(validABTest).success).toBe(true); + }); + + it('accepts with optional fields', () => { + const result = ABTestSchema.safeParse({ + ...validABTest, + description: 'A test', + roleArn: 'arn:aws:iam::123:role/MyRole', + maxDurationDays: 30, + enableOnCreate: true, + trafficAllocationConfig: { routeOnHeader: { headerName: 'X-AB-Route' } }, + }); + expect(result.success).toBe(true); + }); + + it('rejects with only 1 variant', () => { + const result = ABTestSchema.safeParse({ + ...validABTest, + variants: [validABTest.variants[0]], + }); + expect(result.success).toBe(false); + }); + + it('rejects with 3 variants', () => { + const result = ABTestSchema.safeParse({ + ...validABTest, + variants: [...validABTest.variants, validABTest.variants[0]], + }); + expect(result.success).toBe(false); + }); + + it('rejects maxDurationDays outside 1-90', () => { + expect(ABTestSchema.safeParse({ ...validABTest, maxDurationDays: 0 }).success).toBe(false); + expect(ABTestSchema.safeParse({ ...validABTest, maxDurationDays: 91 }).success).toBe(false); + }); + + describe('variant weight sum validation', () => { + it('accepts weights summing to 100 (50/50)', () => { + const test = { + ...validABTest, + variants: [ + { ...validABTest.variants[0], weight: 50 }, + { ...validABTest.variants[1], weight: 50 }, + ], + }; + expect(ABTestSchema.safeParse(test).success).toBe(true); + }); + + it('accepts weights summing to 100 (1/99)', () => { + const test = { + ...validABTest, + variants: [ + { ...validABTest.variants[0], weight: 1 }, + { ...validABTest.variants[1], weight: 99 }, + ], + }; + expect(ABTestSchema.safeParse(test).success).toBe(true); + }); + + it('rejects weights summing to 150', () => { + const test = { + ...validABTest, + variants: [ + { ...validABTest.variants[0], weight: 80 }, + { ...validABTest.variants[1], weight: 70 }, + ], + }; + const result = ABTestSchema.safeParse(test); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('sum to 100'))).toBe(true); + } + }); + + it('rejects weights summing to 2', () => { + const test = { + ...validABTest, + variants: [ + { ...validABTest.variants[0], weight: 1 }, + { ...validABTest.variants[1], weight: 1 }, + ], + }; + expect(ABTestSchema.safeParse(test).success).toBe(false); + }); + }); + + describe('variant uniqueness validation', () => { + it('rejects two control variants', () => { + const test = { + ...validABTest, + variants: [ + { ...validABTest.variants[0], name: 'C', weight: 50 }, + { ...validABTest.variants[1], name: 'C', weight: 50 }, + ], + }; + const result = ABTestSchema.safeParse(test); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('control (C) and one treatment (T1)'))).toBe(true); + } + }); + + it('rejects two treatment variants', () => { + const test = { + ...validABTest, + variants: [ + { ...validABTest.variants[0], name: 'T1', weight: 50 }, + { ...validABTest.variants[1], name: 'T1', weight: 50 }, + ], + }; + const result = ABTestSchema.safeParse(test); + expect(result.success).toBe(false); + }); + }); +}); diff --git a/src/schema/schemas/primitives/__tests__/http-gateway.test.ts b/src/schema/schemas/primitives/__tests__/http-gateway.test.ts new file mode 100644 index 000000000..259fe1a21 --- /dev/null +++ b/src/schema/schemas/primitives/__tests__/http-gateway.test.ts @@ -0,0 +1,82 @@ +import { HttpGatewayNameSchema, HttpGatewaySchema } from '../http-gateway'; +import { describe, expect, it } from 'vitest'; + +describe('HttpGatewayNameSchema', () => { + it('accepts valid name starting with letter', () => { + expect(HttpGatewayNameSchema.safeParse('MyGateway1').success).toBe(true); + }); + + it('accepts name with hyphens', () => { + expect(HttpGatewayNameSchema.safeParse('my-gateway').success).toBe(true); + }); + + it('rejects empty string', () => { + expect(HttpGatewayNameSchema.safeParse('').success).toBe(false); + }); + + it('rejects name starting with number', () => { + expect(HttpGatewayNameSchema.safeParse('1gateway').success).toBe(false); + }); + + it('rejects name with underscores', () => { + expect(HttpGatewayNameSchema.safeParse('my_gateway').success).toBe(false); + }); + + it('rejects name over 48 chars', () => { + expect(HttpGatewayNameSchema.safeParse('a'.repeat(49)).success).toBe(false); + }); + + it('accepts name at 48 chars', () => { + expect(HttpGatewayNameSchema.safeParse('a'.repeat(48)).success).toBe(true); + }); +}); + +describe('HttpGatewaySchema', () => { + const validHttpGateway = { + name: 'MyGateway', + runtimeRef: 'my-runtime', + }; + + it('accepts valid HTTP gateway with required fields', () => { + expect(HttpGatewaySchema.safeParse(validHttpGateway).success).toBe(true); + }); + + it('accepts valid HTTP gateway with all optional fields', () => { + const result = HttpGatewaySchema.safeParse({ + ...validHttpGateway, + description: 'A test gateway', + roleArn: 'arn:aws:iam::123456789012:role/MyRole', + }); + expect(result.success).toBe(true); + }); + + it('rejects missing name', () => { + const { name: _, ...withoutName } = validHttpGateway; + expect(HttpGatewaySchema.safeParse(withoutName).success).toBe(false); + }); + + it('rejects missing runtimeRef', () => { + const { runtimeRef: _, ...withoutRuntimeRef } = validHttpGateway; + expect(HttpGatewaySchema.safeParse(withoutRuntimeRef).success).toBe(false); + }); + + it('rejects name too long (>48 chars)', () => { + expect(HttpGatewaySchema.safeParse({ ...validHttpGateway, name: 'a'.repeat(49) }).success).toBe(false); + }); + + it('rejects name starting with number', () => { + expect(HttpGatewaySchema.safeParse({ ...validHttpGateway, name: '1Gateway' }).success).toBe(false); + }); + + it('rejects name with invalid characters (underscores)', () => { + expect(HttpGatewaySchema.safeParse({ ...validHttpGateway, name: 'my_gateway' }).success).toBe(false); + }); + + it('rejects extra unknown fields (.strict())', () => { + const result = HttpGatewaySchema.safeParse({ + ...validHttpGateway, + unknownField: 'should fail', + }); + expect(result.success).toBe(false); + }); +}); diff --git a/src/schema/schemas/primitives/ab-test.ts b/src/schema/schemas/primitives/ab-test.ts new file mode 100644 index 000000000..ec04ab4f7 --- /dev/null +++ b/src/schema/schemas/primitives/ab-test.ts @@ -0,0 +1,147 @@ +import { z } from 'zod'; + +// ============================================================================ +// AB Test Types +// ============================================================================ + +export const ABTestNameSchema = z + .string() + .min(1, 'Name is required') + .max(48) + .regex( + /^[a-zA-Z][a-zA-Z0-9_]{0,47}$/, + 'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)' + ); + +export const ABTestDescriptionSchema = z.string().min(1).max(200).optional(); + +export const ABTestModeSchema = z.enum(['config-bundle', 'target-based']).optional().default('config-bundle'); + +export type ABTestMode = z.infer; + +export const VariantNameSchema = z.enum(['C', 'T1']); + +export const VariantWeightSchema = z.number().int().min(1).max(100); + +// ── Config Bundle variant configuration ──────────────────────────────────── + +export const ConfigurationBundleRefSchema = z.object({ + bundleArn: z.string().min(1), + bundleVersion: z.string().min(1), +}); + +export type ConfigurationBundleRef = z.infer; + +// ── Target-based variant configuration ───────────────────────────────────── + +export const TargetRefSchema = z.object({ + targetName: z.string().min(1).max(100), +}); + +export type TargetRef = z.infer; + +// ── Variant configuration union ──────────────────────────────────────────── +// Exactly one of configurationBundle or target must be set (XOR). + +const ConfigBundleVariantConfigSchema = z.object({ + configurationBundle: ConfigurationBundleRefSchema, + target: z.never().optional(), +}); + +const TargetVariantConfigSchema = z.object({ + configurationBundle: z.never().optional(), + target: TargetRefSchema, +}); + +export const VariantConfigurationSchema = z.union([ConfigBundleVariantConfigSchema, TargetVariantConfigSchema]); + +export type VariantConfiguration = z.infer; + +export const ABTestVariantSchema = z.object({ + name: VariantNameSchema, + weight: VariantWeightSchema, + variantConfiguration: VariantConfigurationSchema, +}); + +export type ABTestVariant = z.infer; + +// ── Evaluation config union ──────────────────────────────────────────────── + +export const PerVariantOnlineEvaluationConfigSchema = z.object({ + treatmentName: VariantNameSchema, + onlineEvaluationConfigArn: z.string().min(1), +}); + +export type PerVariantOnlineEvaluationConfig = z.infer; + +export const ABTestEvaluationConfigSchema = z.union([ + z.object({ onlineEvaluationConfigArn: z.string().min(1) }), + z.object({ + perVariantOnlineEvaluationConfig: z.array(PerVariantOnlineEvaluationConfigSchema).length(2), + }), +]); + +export type ABTestEvaluationConfig = z.infer; + +// ── Gateway filter ───────────────────────────────────────────────────────── + +export const GatewayFilterSchema = z.object({ + targetPaths: z.array(z.string().min(1).max(500)).max(1), +}); + +export type GatewayFilter = z.infer; + +// ── Traffic allocation ───────────────────────────────────────────────────── + +export const TrafficRouteOnHeaderSchema = z.object({ + headerName: z.string().min(1), +}); + +export const TrafficAllocationConfigSchema = z.object({ + routeOnHeader: TrafficRouteOnHeaderSchema, +}); + +export type TrafficAllocationConfig = z.infer; + +// ── AB Test schema ───────────────────────────────────────────────────────── + +export const ABTestSchema = z + .object({ + name: ABTestNameSchema, + description: ABTestDescriptionSchema, + mode: ABTestModeSchema, + gatewayRef: z.string().min(1), + roleArn: z.string().min(1).optional(), + variants: z.array(ABTestVariantSchema).length(2), + evaluationConfig: ABTestEvaluationConfigSchema, + gatewayFilter: GatewayFilterSchema.optional(), + trafficAllocationConfig: TrafficAllocationConfigSchema.optional(), + maxDurationDays: z.number().int().min(1).max(90).optional(), + enableOnCreate: z.boolean().optional(), + promoted: z.boolean().optional(), + }) + .refine( + data => { + const names = data.variants.map(v => v.name); + return names.includes('C') && names.includes('T1'); + }, + { message: 'Variants must include exactly one control (C) and one treatment (T1)', path: ['variants'] } + ) + .refine(data => data.variants.reduce((sum, v) => sum + v.weight, 0) === 100, { + message: 'Variant weights must sum to 100', + path: ['variants'], + }) + .refine( + data => { + if (data.mode === 'target-based') { + return data.variants.every(v => v.variantConfiguration.target != null); + } + return data.variants.every(v => v.variantConfiguration.configurationBundle != null); + }, + { + message: 'Target-based mode requires target on each variant; config-bundle mode requires configurationBundle', + path: ['variants'], + } + ); + +export type ABTest = z.infer; diff --git a/src/schema/schemas/primitives/config-bundle.ts b/src/schema/schemas/primitives/config-bundle.ts new file mode 100644 index 000000000..06702bd3c --- /dev/null +++ b/src/schema/schemas/primitives/config-bundle.ts @@ -0,0 +1,49 @@ +import { z } from 'zod'; + +// ============================================================================ +// Configuration Bundle Types +// ============================================================================ + +export const ConfigBundleNameSchema = z + .string() + .min(1, 'Name is required') + .max(100) + .regex( + /^[a-zA-Z][a-zA-Z0-9_]{0,99}$/, + 'Must begin with a letter and contain only alphanumeric characters and underscores (max 100 chars)' + ); + +export const ConfigBundleDescriptionSchema = z.string().min(1).max(500).optional(); + +/** Freeform configuration for a single component within a bundle. */ +export const ComponentConfigurationSchema = z.object({ + configuration: z.record(z.string(), z.unknown()), +}); + +export type ComponentConfiguration = z.infer; + +/** + * Map of component identifier (ARN or placeholder) to its configuration. + * + * Keys are typically resource ARNs (runtime ARN, gateway ARN) but may use + * placeholder tokens like `{{runtime:}}` when the bundle is created + * before deploy and ARNs are not yet available. + */ +export const ComponentConfigurationMapSchema = z.record(z.string(), ComponentConfigurationSchema); + +export type ComponentConfigurationMap = z.infer; + +export const ConfigBundleSchema = z.object({ + name: ConfigBundleNameSchema, + /** Discriminator required by the CDK package's schema validation. */ + type: z.literal('ConfigurationBundle').default('ConfigurationBundle'), + description: ConfigBundleDescriptionSchema, + /** Component configurations keyed by component ARN or placeholder. */ + components: ComponentConfigurationMapSchema, + /** Optional branch name for versioning. */ + branchName: z.string().max(128).optional(), + /** Optional commit message for this version. */ + commitMessage: z.string().max(500).optional(), +}); + +export type ConfigBundle = z.infer; diff --git a/src/schema/schemas/primitives/http-gateway.ts b/src/schema/schemas/primitives/http-gateway.ts new file mode 100644 index 000000000..f40505b5f --- /dev/null +++ b/src/schema/schemas/primitives/http-gateway.ts @@ -0,0 +1,42 @@ +import { z } from 'zod'; + +// ============================================================================ +// HTTP Gateway Types +// ============================================================================ + +export const HttpGatewayNameSchema = z + .string() + .min(1, 'Name is required') + .max(48) + .regex( + /^[a-zA-Z][a-zA-Z0-9-]{0,47}$/, + 'Must begin with a letter and contain only alphanumeric characters and hyphens (max 48 chars)' + ); + +export const HttpGatewayTargetSchema = z.object({ + /** Gateway target name (referenced by AB test variants) */ + name: z.string().min(1).max(100), + /** Reference to a runtime name from spec.runtimes */ + runtimeRef: z.string().min(1), + /** Endpoint qualifier on the runtime (e.g., 'prod', 'staging'). Defaults to 'DEFAULT'. */ + qualifier: z.string().min(1).default('DEFAULT'), +}); + +export type HttpGatewayTarget = z.infer; + +export const HttpGatewaySchema = z + .object({ + /** Unique name for the HTTP gateway */ + name: HttpGatewayNameSchema, + /** Optional description */ + description: z.string().min(1).max(200).optional(), + /** Reference to a runtime name from spec.runtimes. One target is created per gateway pointing to this runtime. */ + runtimeRef: z.string().min(1), + /** IAM role ARN for gateway execution. Auto-created if omitted. */ + roleArn: z.string().min(1).optional(), + /** Additional targets for the gateway (for target-based AB testing). */ + targets: z.array(HttpGatewayTargetSchema).optional(), + }) + .strict(); + +export type HttpGateway = z.infer; diff --git a/src/schema/schemas/primitives/index.ts b/src/schema/schemas/primitives/index.ts index 0df1f3c3e..734a976ef 100644 --- a/src/schema/schemas/primitives/index.ts +++ b/src/schema/schemas/primitives/index.ts @@ -1,3 +1,24 @@ +export type { + ABTest, + ABTestVariant, + ABTestEvaluationConfig, + ConfigurationBundleRef, + TrafficAllocationConfig, + VariantConfiguration, +} from './ab-test'; +export { + ABTestNameSchema, + ABTestDescriptionSchema, + ABTestSchema, + ABTestVariantSchema, + ABTestEvaluationConfigSchema, + ConfigurationBundleRefSchema, + TrafficAllocationConfigSchema, + VariantConfigurationSchema, + VariantNameSchema, + VariantWeightSchema, +} from './ab-test'; + export type { MemoryStrategy, MemoryStrategyType } from './memory'; export { DEFAULT_EPISODIC_REFLECTION_NAMESPACES, @@ -68,3 +89,6 @@ export { HarnessTruncationConfigSchema, HarnessTruncationStrategySchema, } from './harness'; + +export type { HttpGateway } from './http-gateway'; +export { HttpGatewayNameSchema, HttpGatewaySchema } from './http-gateway'; diff --git a/src/schema/schemas/primitives/online-eval-config.ts b/src/schema/schemas/primitives/online-eval-config.ts index 6dbc0787f..5b6f13cb6 100644 --- a/src/schema/schemas/primitives/online-eval-config.ts +++ b/src/schema/schemas/primitives/online-eval-config.ts @@ -18,6 +18,8 @@ export const OnlineEvalConfigSchema = z.object({ name: OnlineEvalConfigNameSchema, /** Agent name to monitor (must match a project agent) */ agent: z.string().min(1, 'Agent name is required'), + /** Optional runtime endpoint name to scope monitoring to a specific endpoint */ + endpoint: z.string().min(1).optional(), /** Evaluator names (custom), Builtin.* IDs, or evaluator ARNs */ evaluators: z.array(z.string().min(1)).min(1, 'At least one evaluator is required'), /** Sampling rate as a percentage (0.01 to 100) */ diff --git a/src/test-utils/cli-runner.ts b/src/test-utils/cli-runner.ts index 789624364..4526546c5 100644 --- a/src/test-utils/cli-runner.ts +++ b/src/test-utils/cli-runner.ts @@ -72,6 +72,14 @@ function getCLIPath(): string { * Run the AgentCore CLI via the local build (unit/integ tests). * Skips dependency installation by default for speed. */ -export async function runCLI(args: string[], cwd: string, skipInstall = true): Promise { - return spawnAndCollect('node', [getCLIPath(), ...args], cwd, skipInstall ? { AGENTCORE_SKIP_INSTALL: '1' } : {}); +export async function runCLI( + args: string[], + cwd: string, + options: { skipInstall?: boolean; env?: Record } = {} +): Promise { + const { skipInstall = true, env } = options; + return spawnAndCollect('node', [getCLIPath(), ...args], cwd, { + ...(skipInstall ? { AGENTCORE_SKIP_INSTALL: '1' } : {}), + ...env, + }); } diff --git a/src/test-utils/index.ts b/src/test-utils/index.ts index ff127a35e..2920b822a 100644 --- a/src/test-utils/index.ts +++ b/src/test-utils/index.ts @@ -2,13 +2,33 @@ * Shared test utilities for AgentCore CLI tests. * Import these helpers instead of duplicating code in each test file. */ +import { runCLI as runCLIImpl } from './cli-runner.js'; +import { expect } from 'vitest'; export { runCLI, spawnAndCollect, cleanSpawnEnv, type RunResult } from './cli-runner.js'; +export { createTelemetryHelper, type TelemetryHelper, type TelemetryEntry } from './telemetry-helper.js'; export { exists } from './fs-helpers.js'; export { hasCommand, hasAwsCredentials, prereqs } from './prereqs.js'; export { createTestProject, type TestProject, type CreateTestProjectOptions } from './project-factory.js'; export { readProjectConfig } from './config-reader.js'; +export async function runSuccess(args: string[], cwd: string): Promise> { + const result = await runCLIImpl(args, cwd); + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', true); + return json as Record; +} + +export async function runFailure(args: string[], cwd: string): Promise> { + const result = await runCLIImpl(args, cwd); + expect(result.exitCode).toBe(1); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', false); + expect(json).toHaveProperty('error'); + return json as Record; +} + /** * Retry an async function up to `times` attempts with a delay between retries. */ diff --git a/src/test-utils/project-factory.ts b/src/test-utils/project-factory.ts index 77c77e1c6..61525f6c5 100644 --- a/src/test-utils/project-factory.ts +++ b/src/test-utils/project-factory.ts @@ -65,7 +65,7 @@ export async function createTestProject(options: CreateTestProjectOptions = {}): args.push('--json'); - const result = await runCLI(args, testDir, skipInstall); + const result = await runCLI(args, testDir, { skipInstall }); if (result.exitCode !== 0) { // Clean up on failure diff --git a/src/test-utils/telemetry-helper.ts b/src/test-utils/telemetry-helper.ts new file mode 100644 index 000000000..e7fa58949 --- /dev/null +++ b/src/test-utils/telemetry-helper.ts @@ -0,0 +1,56 @@ +import { globSync } from 'glob'; +import { mkdtempSync, readFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { expect } from 'vitest'; + +export interface TelemetryEntry { + value: number; + attrs: Record; +} + +export interface TelemetryHelper { + /** Temp directory used as AGENTCORE_CONFIG_DIR */ + dir: string; + /** Env vars to pass to runCLI to enable audit mode */ + env: { AGENTCORE_TELEMETRY_AUDIT: '1'; AGENTCORE_CONFIG_DIR: string }; + /** Read all JSONL entries from the audit telemetry directory */ + readEntries: () => TelemetryEntry[]; + /** Assert a metric was emitted with attrs matching the given subset */ + assertMetricEmitted: (expected: Record) => void; + /** Delete telemetry entries only (keeps the config dir) */ + clearEntries: () => void; + /** Delete the entire config directory — call in afterAll */ + destroy: () => void; +} + +export function createTelemetryHelper(): TelemetryHelper { + const dir = mkdtempSync(join(tmpdir(), 'agentcore-audit-')); + const helper: TelemetryHelper = { + dir, + env: { AGENTCORE_TELEMETRY_AUDIT: '1', AGENTCORE_CONFIG_DIR: dir }, + readEntries() { + return globSync(join(dir, 'telemetry', '*.json')).flatMap(f => + readFileSync(f, 'utf-8') + .trim() + .split('\n') + .map(line => JSON.parse(line) as TelemetryEntry) + ); + }, + assertMetricEmitted(expected) { + const entries = helper.readEntries(); + const match = entries.find(e => Object.entries(expected).every(([k, v]) => String(e.attrs[k]) === String(v))); + expect( + match, + `No telemetry entry matching ${JSON.stringify(expected)}\nFound ${entries.length} entries:\n${entries.map(e => JSON.stringify(e.attrs)).join('\n')}` + ).toBeDefined(); + }, + clearEntries() { + rmSync(join(dir, 'telemetry'), { recursive: true, force: true }); + }, + destroy() { + rmSync(dir, { recursive: true, force: true }); + }, + }; + return helper; +}