From bd04ab1467b08e8ae1aaaf13495813a2b825f1da Mon Sep 17 00:00:00 2001 From: Ignacio Pardo Date: Tue, 7 Apr 2026 19:24:26 -0300 Subject: [PATCH 01/33] feat: SDK discover schema and base infrastructure Add discover.json validator, expand scenarios validator, update plugin manifest for marketplace installability, and document scenario recipes. Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/marketplace.json | 6 +- .claude-plugin/plugin.json | 7 +- .github/workflows/tests.yml | 2 +- CLAUDE.md | 10 ++- README.md | 101 ++++++++++++++++++++---- hooks/validate-pipeline-output.sh | 9 +++ hooks/validators/validate_discover.py | 102 +++++++++++++++++++++++++ hooks/validators/validate_scenarios.py | 100 +++++++++++++++++++++++- 8 files changed, 311 insertions(+), 26 deletions(-) create mode 100644 hooks/validators/validate_discover.py diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 8119710..0cec48e 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,11 @@ "plugins": [ { "name": "autonoma-test-planner", - "source": "./", + "source": { + "source": "url", + "url": "https://github.com/IgnacioPardo/test-planner-plugin-sc-v2.git", + "ref": "IgnacioPardo/sdk-scenarios" + }, "description": "Generates comprehensive E2E test cases through a validated 4-step pipeline with deterministic validation" } ] diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index bade427..2de57c6 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,11 +1,8 @@ { "name": "autonoma-test-planner", "description": "Generates comprehensive E2E test cases for a codebase through a validated multi-step pipeline with deterministic validation at each step", - "version": "1.1.0", + "version": "1.2.1", "author": { "name": "Autonoma" - }, - "commands": [ - "./commands" - ] + } } diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 73754fe..f2c1c4d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,5 +14,5 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.11" - - run: pip install pytest pyyaml + - run: pip install pytest pyyaml Faker - run: pytest tests/ -v diff --git a/CLAUDE.md b/CLAUDE.md index 3822134..c7642a5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,9 +10,9 @@ commands/generate-tests.md # Entry point — dispatches the 4-step pipeline skills/generate-tests/SKILL.md # Orchestrator skill agents/ # Isolated subagents (one per step) kb-generator.md # Step 1: Knowledge base → autonoma/AUTONOMA.md + features.json - scenario-generator.md # Step 2: Scenarios → autonoma/scenarios.md + scenario-generator.md # Step 2: Discover + scenarios → autonoma/discover.json + autonoma/scenarios.md test-case-generator.md # Step 3: Tests → autonoma/qa-tests/INDEX.md + test files - env-factory-generator.md # Step 4: Environment factory endpoint + env-factory-generator.md # Step 4: Environment Factory implementation/integration + scenario validation hooks/ hooks.json # PostToolUse hook config (triggers on Write) validate-pipeline-output.sh # Bash dispatcher → routes to Python validators @@ -23,7 +23,7 @@ hooks/ Each step spawns an isolated subagent. After each Write, the PostToolUse hook in `hooks/hooks.json` runs `validate-pipeline-output.sh`, which pattern-matches the file path and runs the appropriate Python validator. Validators exit 0 (OK) or 2 (block with error message). -Steps 1-3 require user confirmation before advancing. Step 4 is the final step (no gate). +Steps 1-3 require user confirmation before advancing. Step 4 is the final step. ## Validation @@ -32,8 +32,10 @@ Validators are in `hooks/validators/`. They parse YAML frontmatter and check req | Validator | File matched | Key checks | |-----------|-------------|------------| | `validate_kb.py` | `*/autonoma/AUTONOMA.md` | app_name, app_description (≥20 chars), core_flows with at least one `core: true` | +| `validate_discover.py` | `*/autonoma/discover.json` | schema object, models, edges, relations, scopeField | | `validate_features.py` | `*/autonoma/features.json` | features array length matches total_features, valid types, at least one core feature | -| `validate_scenarios.py` | `*/autonoma/scenarios.md` | scenario_count ≥ 3, standard/empty/large scenarios present, entity_types | +| `validate_scenarios.py` | `*/autonoma/scenarios.md` | scenario_count ≥ 3, standard/empty/large scenarios present, entity_types, discover metadata, variable field strategy | +| `validate_scenario_recipes.py` | `*/autonoma/scenario-recipes.json` | approved recipe file, validation mode, standard/empty/large present, lifecycle status | | `validate_test_index.py` | `*/autonoma/qa-tests/INDEX.md` | test totals match folder sums, criticality sums, cross-checks against features.json | | `validate_test_file.py` | `*/autonoma/qa-tests/*/[!I]*.md` | title, description, criticality (critical/high/mid/low), scenario, flow | diff --git a/README.md b/README.md index d264d6d..176ea66 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,9 @@ Analyzes your frontend codebase and produces `autonoma/AUTONOMA.md` — a user-p ### Step 2: Scenarios -Reads the knowledge base and your backend data model to design three test data environments: `standard` (realistic variety), `empty` (empty states), and `large` (pagination/performance). Outputs `autonoma/scenarios.md` with frontmatter summarizing each scenario. +Reads the knowledge base and the SDK `discover` response from your backend Environment Factory to design three test data environments: `standard` (realistic variety), `empty` (empty states), and `large` (pagination/performance). Outputs `autonoma/discover.json` plus `autonoma/scenarios.md`, preserving the legacy scenario summary while adding schema metadata and minimal variable-field planning. -**You review**: entity names, counts, and relationships. These become hard assertions in your tests. +**You review**: entity names, counts, relationships, and which values truly must stay generated. Fixed values are preferred because they become stable test assertions; if uniqueness is needed, the planner should first prefer concrete hardcoded values with a discriminator. Variable fields are exceptions used only for genuinely dynamic values. Generator hints are optional and are not tied to `faker`. ### Step 3: E2E Tests @@ -48,13 +48,81 @@ Generates markdown test files organized by feature in `autonoma/qa-tests/`. Each An `INDEX.md` tracks total test count, folder breakdown, and coverage correlation with your codebase size. +`scenarios.md` is fixture input for this step, not the subject under test. Step 3 should not spend test budget verifying seeded counts or Environment Factory correctness. + **You review**: test distribution and coverage correlation. Test count should roughly match 3-5x your route/feature count. ### Step 4: Environment Factory -Implements an endpoint in your backend that creates and tears down isolated test data for each scenario. Handles `discover`, `up`, and `down` actions with HMAC-SHA256 request signing and JWT-signed refs for safe teardown. +Implements or completes the backend Environment Factory so the planned scenarios can actually be created and torn down through the current SDK contract. Step 4 includes backend wiring plus validation: `discover`, `up`, `down`, request signing, refs signing, a smoke-tested lifecycle, and validation of the planned scenarios with `autonoma/scenario-recipes.json`. After validation, the plugin uploads the parsed recipe document to the setup API through the dedicated `scenario-recipe-versions` route so Step 04 in `agent` can persist normalized scenario data directly. + +**You review**: where the Environment Factory lives, what changed, whether a smoke `discover` → `up` → `down` check passed, and whether `standard`, `empty`, and `large` all passed lifecycle validation. + +## Scenario Recipes + +`autonoma/scenario-recipes.json` is the validated handoff between planning and execution. It is produced in Step 4 after the Environment Factory has been implemented or verified and after each scenario has passed lifecycle validation. + +The file contains: + +- top-level metadata: `version`, `source`, and `validationMode` +- one recipe per named scenario, usually `standard`, `empty`, and `large` +- for each recipe: + - `name` and `description` + - `create`: the inline data graph Autonoma will send to the SDK `up` action + - `validation`: proof that the recipe passed `checkScenario`, `checkAllScenarios`, or endpoint lifecycle validation + +Conceptually, a scenario recipe is not a test case. It is a data fixture definition for the Environment Factory. The `create` payload describes which records should exist before a run starts, including nested records and references such as `_alias` and `_ref`. + +Example shape: + +```json +{ + "version": 1, + "source": { + "discoverPath": "autonoma/discover.json", + "scenariosPath": "autonoma/scenarios.md" + }, + "validationMode": "sdk-check", + "recipes": [ + { + "name": "standard", + "description": "Realistic baseline workspace", + "create": { + "User": [{ "email": "{{owner_email}}" }] + }, + "variables": { + "owner_email": { + "strategy": "derived", + "source": "testRunId", + "format": "owner+{testRunId}@example.com" + } + }, + "validation": { + "status": "validated", + "method": "checkScenario", + "phase": "ok" + } + } + ] +} +``` + +Persisted recipes store tokenized `create` payloads plus `variables` metadata — never resolved concrete values. The `variables` field defines how each `{{token}}` is resolved at runtime using one of three strategies: `literal`, `derived` (from `testRunId`), or `faker`. This allows the `agent` to resolve the same tokens later for real runs. + +During Step 4, the plugin runs a preflight check that resolves tokens into transient concrete payloads and sends signed `up`/`down` requests to the live SDK endpoint. The write hook also enforces that same preflight before a final `autonoma/scenario-recipes.json` write is accepted. These transient values are never persisted. + +Storage semantics: -**You review**: implementation plan before any code is written. The endpoint never modifies existing data. +- in this plugin repo, `autonoma/scenario-recipes.json` is a local output artifact so the user and validators can inspect it +- when uploaded to `agent`, the backend does not keep the raw JSON file as text +- instead, `agent` parses the document and stores the approved scenario recipe data in its scenario JSONB storage through the `scenario-recipe-versions` setup endpoint + +Runtime semantics: + +- the planner still thinks in named scenarios like `standard`, `empty`, and `large` +- the SDK protocol does not require those names on the wire +- before a run, Autonoma resolves the active stored recipe version for the selected scenario and sends its `create` payload to the Environment Factory `up` action +- after the run, Autonoma calls `down` using the returned teardown refs/token ## Validation @@ -63,24 +131,26 @@ Every output file has YAML frontmatter validated by shell scripts (not prompts). | File | What's validated | |------|-----------------| | `AUTONOMA.md` | core_flows table, app description, feature/skill counts | -| `scenarios.md` | scenario count, required scenarios (standard/empty/large), entity types | +| `discover.json` | SDK discover schema shape: models, edges, relations, scopeField, and supported `type` formats | +| `scenarios.md` | scenario count, required scenarios (standard/empty/large), entity types, discover metadata, minimal variable fields | +| `scenario-recipes.json` | validated recipe file, discover-aware model/field/type parity, required scenarios, optional variables consistency, and mandatory live endpoint preflight | | `INDEX.md` | test totals match folder sums, criticality counts sum correctly, test count within expected range | | Each test file | title, description, criticality (critical/high/mid/low), scenario, flow | -## Environment Variables (Step 4) +## Environment Variables -Step 4 requires two secrets for the Environment Factory endpoint: +Step 2 and Step 4 use the live SDK endpoint when fetching `discover` or validating through HTTP: ```bash -# Generate secrets -openssl rand -hex 32 # AUTONOMA_SIGNING_SECRET -openssl rand -hex 32 # AUTONOMA_JWT_SECRET +AUTONOMA_SDK_ENDPOINT= +AUTONOMA_SHARED_SECRET= ``` -Add to your `.env`: -``` -AUTONOMA_SIGNING_SECRET= -AUTONOMA_JWT_SECRET= +Step 4 backend implementation uses the current SDK secret names: + +```bash +AUTONOMA_SHARED_SECRET= +AUTONOMA_SIGNING_SECRET= ``` ## Requirements @@ -115,8 +185,11 @@ autonoma-test-planner/ ├── hooks/ │ ├── hooks.json # PostToolUse hook config │ ├── validate-pipeline-output.sh # Validation dispatcher +│ ├── preflight_scenario_recipes.py # Preflight resolver + endpoint lifecycle checker │ └── validators/ │ ├── validate_kb.py +│ ├── validate_discover.py +│ ├── validate_scenario_recipes.py │ ├── validate_scenarios.py │ ├── validate_test_index.py │ └── validate_test_file.py diff --git a/hooks/validate-pipeline-output.sh b/hooks/validate-pipeline-output.sh index 5fda0fe..c64d763 100755 --- a/hooks/validate-pipeline-output.sh +++ b/hooks/validate-pipeline-output.sh @@ -16,6 +16,11 @@ fi SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" VALIDATORS_DIR="$SCRIPT_DIR/validators" +# Persist the plugin root so orchestrator/subagent bash snippets can find plugin-local scripts. +# This hook is the earliest reliable place where we know the plugin directory. +PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +echo "$PLUGIN_ROOT" > /tmp/autonoma-plugin-root + # Ensure PyYAML is available (required for frontmatter parsing) python3 -c "import yaml" 2>/dev/null || pip3 install pyyaml -q 2>/dev/null @@ -25,6 +30,10 @@ case "$FILE_PATH" in VALIDATOR_SCRIPT="$VALIDATORS_DIR/validate_kb.py" VALIDATOR_NAME="validate-kb" ;; + */autonoma/discover.json) + VALIDATOR_SCRIPT="$VALIDATORS_DIR/validate_discover.py" + VALIDATOR_NAME="validate-discover" + ;; */autonoma/features.json) VALIDATOR_SCRIPT="$VALIDATORS_DIR/validate_features.py" VALIDATOR_NAME="validate-features" diff --git a/hooks/validators/validate_discover.py b/hooks/validators/validate_discover.py new file mode 100644 index 0000000..102cc8c --- /dev/null +++ b/hooks/validators/validate_discover.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""Validates autonoma/discover.json structure.""" +import json +import re +import sys + + +TYPE_PATTERN = re.compile(r"^(?:[A-Za-z][A-Za-z0-9_]*|enum\([^()]+\))(?:\[\])?$") + + +filepath = sys.argv[1] + +try: + with open(filepath) as fh: + payload = json.load(fh) +except Exception as e: + print(f'Invalid JSON: {e}') + sys.exit(1) + +if not isinstance(payload, dict): + print('discover.json must contain a JSON object') + sys.exit(1) + +schema = payload.get('schema') +if not isinstance(schema, dict): + print('discover.json must contain a "schema" object') + sys.exit(1) + +required_schema_fields = ['models', 'edges', 'relations', 'scopeField'] +missing = [f for f in required_schema_fields if f not in schema] +if missing: + print(f'schema is missing required fields: {missing}') + sys.exit(1) + +models = schema.get('models') +if not isinstance(models, list) or len(models) == 0: + print('schema.models must be a non-empty list') + sys.exit(1) + +for i, model in enumerate(models): + if not isinstance(model, dict): + print(f'schema.models[{i}] must be an object') + sys.exit(1) + if not isinstance(model.get('name'), str) or not model.get('name', '').strip(): + print(f'schema.models[{i}].name must be a non-empty string') + sys.exit(1) + fields = model.get('fields') + if not isinstance(fields, list): + print(f'schema.models[{i}].fields must be a list') + sys.exit(1) + for j, field in enumerate(fields): + if not isinstance(field, dict): + print(f'schema.models[{i}].fields[{j}] must be an object') + sys.exit(1) + for key in ['name', 'type', 'isRequired', 'isId', 'hasDefault']: + if key not in field: + print(f'schema.models[{i}].fields[{j}] missing required field: {key}') + sys.exit(1) + field_type = field.get('type') + if not isinstance(field_type, str) or len(field_type.strip()) == 0: + print(f'schema.models[{i}].fields[{j}].type must be a non-empty string') + sys.exit(1) + if TYPE_PATTERN.match(field_type.strip()) is None: + print( + f'schema.models[{i}].fields[{j}].type must use a supported type format, got: {field_type}' + ) + sys.exit(1) + +edges = schema.get('edges') +if not isinstance(edges, list): + print('schema.edges must be a list') + sys.exit(1) + +for i, edge in enumerate(edges): + if not isinstance(edge, dict): + print(f'schema.edges[{i}] must be an object') + sys.exit(1) + for key in ['from', 'to', 'localField', 'foreignField', 'nullable']: + if key not in edge: + print(f'schema.edges[{i}] missing required field: {key}') + sys.exit(1) + +relations = schema.get('relations') +if not isinstance(relations, list): + print('schema.relations must be a list') + sys.exit(1) + +for i, relation in enumerate(relations): + if not isinstance(relation, dict): + print(f'schema.relations[{i}] must be an object') + sys.exit(1) + for key in ['parentModel', 'childModel', 'parentField', 'childField']: + if key not in relation: + print(f'schema.relations[{i}] missing required field: {key}') + sys.exit(1) + +scope_field = schema.get('scopeField') +if not isinstance(scope_field, str) or len(scope_field.strip()) == 0: + print('schema.scopeField must be a non-empty string') + sys.exit(1) + +print('OK') diff --git a/hooks/validators/validate_scenarios.py b/hooks/validators/validate_scenarios.py index eb77f5c..9bbbaec 100644 --- a/hooks/validators/validate_scenarios.py +++ b/hooks/validators/validate_scenarios.py @@ -26,7 +26,7 @@ sys.exit(1) # Required fields -required = ['scenario_count', 'scenarios', 'entity_types'] +required = ['scenario_count', 'scenarios', 'entity_types', 'discover', 'variable_fields', 'planning_sections'] missing = [f for f in required if f not in fm] if missing: print(f'Missing required frontmatter fields: {missing}') @@ -73,4 +73,102 @@ print(f'entity_types[{i}] must be a mapping with at least a "name" field') sys.exit(1) +# Validate discover metadata +discover = fm.get('discover') +if not isinstance(discover, dict): + print('discover must be a mapping') + sys.exit(1) + +for field in ['source', 'model_count', 'edge_count', 'relation_count', 'scope_field']: + if field not in discover: + print(f'discover missing required field: {field}') + sys.exit(1) + +if discover.get('source') != 'sdk': + print('discover.source must be exactly "sdk"') + sys.exit(1) + +for field in ['model_count', 'edge_count', 'relation_count']: + value = discover.get(field) + if not isinstance(value, int) or value < 0: + print(f'discover.{field} must be a non-negative integer') + sys.exit(1) + +scope_field = discover.get('scope_field') +if not isinstance(scope_field, str) or len(scope_field.strip()) == 0: + print('discover.scope_field must be a non-empty string') + sys.exit(1) + +if discover.get('model_count') == 0: + print('discover.model_count must be greater than 0') + sys.exit(1) + +# Validate variable_fields +variable_fields = fm.get('variable_fields') +if not isinstance(variable_fields, list): + print('variable_fields must be a list') + sys.exit(1) + +for i, variable in enumerate(variable_fields): + if not isinstance(variable, dict): + print(f'variable_fields[{i}] must be a mapping') + sys.exit(1) + for field in ['token', 'entity', 'scenarios', 'reason', 'test_reference']: + if field not in variable: + print(f'variable_fields[{i}] missing required field: {field}') + sys.exit(1) + + token = variable.get('token') + if not isinstance(token, str) or len(token) < 5 or not token.startswith('{{') or not token.endswith('}}'): + print(f'variable_fields[{i}].token must use double curly braces, e.g. {{title}}') + sys.exit(1) + + for field in ['entity', 'reason', 'test_reference']: + value = variable.get(field) + if not isinstance(value, str) or len(value.strip()) == 0: + print(f'variable_fields[{i}].{field} must be a non-empty string') + sys.exit(1) + + if 'generator' in variable: + generator = variable.get('generator') + if not isinstance(generator, str) or len(generator.strip()) == 0: + print(f'variable_fields[{i}].generator must be a non-empty string if present') + sys.exit(1) + + scenario_names = variable.get('scenarios') + if not isinstance(scenario_names, list) or len(scenario_names) == 0: + print(f'variable_fields[{i}].scenarios must be a non-empty list') + sys.exit(1) + unknown_names = [name for name in scenario_names if name not in found_names] + if unknown_names: + print(f'variable_fields[{i}].scenarios has unknown scenario names: {unknown_names}') + sys.exit(1) + +# Validate planning_sections metadata +planning_sections = fm.get('planning_sections') +if not isinstance(planning_sections, list) or len(planning_sections) == 0: + print('planning_sections must be a non-empty list') + sys.exit(1) + +required_sections = { + 'sdk_discover', + 'schema_summary', + 'relationship_map', + 'variable_data_strategy', +} +unknown_sections = [section for section in planning_sections if not isinstance(section, str) or len(section.strip()) == 0] +if unknown_sections: + print('planning_sections must contain only non-empty strings') + sys.exit(1) + +missing_sections = required_sections - set(planning_sections) +if missing_sections: + print(f'Missing required planning_sections: {missing_sections}') + sys.exit(1) + +for section in planning_sections: + if section not in required_sections: + print(f'planning_sections contains unknown value: {section}') + sys.exit(1) + print('OK') From 804059b8ae0ae061743421dd69ae5d3a2bfc64f3 Mon Sep 17 00:00:00 2001 From: Ignacio Pardo Date: Tue, 7 Apr 2026 19:24:40 -0300 Subject: [PATCH 02/33] feat: scenario recipe creation and preflight validation Add token resolution engine with literal/derived/faker strategies, recipe schema validator, live endpoint preflight, and updated agent guidance for scenario generation flow. Co-Authored-By: Claude Opus 4.6 --- agents/env-factory-generator.md | 352 +++++++++++------- agents/scenario-generator.md | 118 +++++- agents/test-case-generator.md | 25 +- commands/generate-tests.md | 156 ++++++-- hooks/preflight_scenario_recipes.py | 319 ++++++++++++++++ hooks/validate-pipeline-output.sh | 22 ++ hooks/validators/validate_scenario_recipes.py | 332 +++++++++++++++++ skills/generate-tests/SKILL.md | 248 +++++++++--- 8 files changed, 1335 insertions(+), 237 deletions(-) create mode 100644 hooks/preflight_scenario_recipes.py create mode 100644 hooks/validators/validate_scenario_recipes.py diff --git a/agents/env-factory-generator.md b/agents/env-factory-generator.md index 85d6ba7..5e18487 100644 --- a/agents/env-factory-generator.md +++ b/agents/env-factory-generator.md @@ -1,8 +1,8 @@ --- description: > - Implements the Autonoma Environment Factory endpoint in the project's backend. - Creates discover/up/down actions, security layers, and integration tests. - Tests the implementation within the session before completing. + Implements or completes the Autonoma Environment Factory in the project's backend. + Extends an existing SDK integration when possible, wires discover/up/down behavior to the + planned scenarios, then validates the planned scenarios against the lifecycle before completing. tools: - Read - Glob @@ -17,8 +17,23 @@ maxTurns: 60 # Environment Factory Generator -You implement the Autonoma Environment Factory endpoint in the project's backend. -Your input is `autonoma/scenarios.md`. Your output is working endpoint code with tests. +You implement or complete the Autonoma Environment Factory in the project's backend. +Your inputs are `autonoma/discover.json`, `autonoma/scenarios.md`, and the backend codebase. +Your output is working backend code plus validated scenario recipes. + +## Goal + +Step 2 already proved that the backend can answer `discover`, or at least that there is enough +of an Environment Factory integration to expose schema metadata. Step 4's job is to finish the +real backend implementation for scenario creation and teardown, then validate the planned scenarios +against that implementation: + +1. make sure the backend exposes the current SDK protocol +2. make sure `up` can create scenario data from inline `create` recipes +3. make sure `down` can delete only the data created by `up` +4. smoke-test the lifecycle in-session +5. validate `standard`, `empty`, and `large` +6. persist approved recipes to `autonoma/scenario-recipes.json` ## Instructions @@ -28,156 +43,219 @@ Your input is `autonoma/scenarios.md`. Your output is working endpoint code with - `https://docs.agent.autonoma.app/llms/test-planner/step-4-implement-scenarios.txt` - `https://docs.agent.autonoma.app/llms/guides/environment-factory.txt` - Follow those instructions for how to implement the endpoint. + Follow the current SDK protocol from those docs. If the docs lag behind the repo, prefer the + real SDK contract already visible in the backend codebase. -2. Read `autonoma/scenarios.md` — parse the frontmatter and full scenario data. +2. Read `autonoma/discover.json` and `autonoma/scenarios.md`. + - `discover.json` is the schema source of truth + - `scenarios.md` is the planning layer that defines what `standard`, `empty`, and `large` + should look like -3. Explore the backend codebase to understand: - - Framework (Next.js, Express, Elixir/Phoenix, etc.) - - Database layer (Prisma, Drizzle, raw SQL, Ecto, etc.) - - Authentication mechanism (session cookies, JWT, etc.) - - Existing route/endpoint patterns +3. Explore the backend codebase to determine: + - whether the Autonoma SDK is already installed + - where the Environment Factory endpoint lives + - which parts already exist: `discover`, `up`, `down`, auth callback, teardown helpers + - what framework and ORM patterns the backend already uses ## CRITICAL: Before Writing Any Code -**Ask the user for confirmation** before implementing. Present your plan: +Ask the user for confirmation before implementing. Present a short plan: -> "I'm about to implement the Autonoma Environment Factory endpoint. Here's what I'll do: +> "I'm about to implement or complete the Autonoma Environment Factory. Here's what I'll do: > -> **Endpoint location**: [where you'll put it] -> **Framework integration**: [how it fits the existing patterns] -> **Database operations**: This endpoint will CREATE test data (organizations, users, entities) -> and DELETE them during teardown. It will NOT modify or delete any existing data. -> **Security**: HMAC-SHA256 request signing + JWT-signed refs for safe teardown +> **Endpoint location**: [route / handler path] +> **Current state**: [what already exists vs what is missing] +> **Step 4 scope**: make discover/up/down work with the current SDK contract and validate the planned scenarios against it +> **Database operations**: `up` will create isolated test data and `down` will delete only those created refs +> **Security**: HMAC-SHA256 request signing with `AUTONOMA_SHARED_SECRET` plus signed refs tokens with `AUTONOMA_SIGNING_SECRET` > > **Environment variables needed**: -> - `AUTONOMA_SIGNING_SECRET` — shared secret for HMAC request verification -> - `AUTONOMA_JWT_SECRET` — secret for signing/verifying refs tokens -> -> To generate these secrets, run: -> ```bash -> openssl rand -hex 32 -> ``` -> Run this command TWICE — once for each secret. Use DIFFERENT values for each. -> Set them in your `.env` file (or equivalent): -> ``` -> AUTONOMA_SIGNING_SECRET= -> AUTONOMA_JWT_SECRET= -> ``` +> - `AUTONOMA_SHARED_SECRET` +> - `AUTONOMA_SIGNING_SECRET` > > Shall I proceed?" -**Do NOT proceed until the user confirms.** +Do NOT proceed until the user confirms. ## Implementation Requirements -### Always Implement on the Backend - -Find the project's backend and implement the endpoint there. Look for: -- API route directories (e.g., `app/api/`, `pages/api/`, `src/routes/`, `lib/`) -- Existing endpoint patterns to match -- If it's a monorepo, find the backend package/app - -If you can't find the backend, ask the user where it is. - -### Environment Variables - -Always use these exact names: -- `AUTONOMA_SIGNING_SECRET` — for HMAC-SHA256 request verification -- `AUTONOMA_JWT_SECRET` — for JWT signing of refs tokens - -### Security Layers (All Required) - -1. **Production guard**: Return 404 when `NODE_ENV=production` (or equivalent) unless explicitly overridden -2. **HMAC-SHA256 verification**: Verify `x-signature` header against request body using `AUTONOMA_SIGNING_SECRET` -3. **Signed refs (JWT)**: Sign refs in `up` response, verify in `down` request using `AUTONOMA_JWT_SECRET` - -### Creation and Teardown Order - -- **Up**: Create parent entities before children (org → users → projects → tests → runs) -- **Down**: Delete in REVERSE order (runs → tests → projects → users → org) -- Do NOT rely on ORM cascade behavior — explicit deletion is safer -- Use `testRunId` in all unique fields to prevent parallel test collisions - -### Endpoint Actions - -| Action | Purpose | -|------------|-------------------------------| -| `discover` | Return available scenarios | -| `up` | Create scenario data, return auth + refs | -| `down` | Verify refs token, delete data | - -## CRITICAL: Test Within the Session - -After implementing the endpoint, you MUST test it to verify it works: - -1. **Check if the dev server is running** or start it -2. **Generate temporary secrets** for testing: - ```bash - export AUTONOMA_SIGNING_SECRET=$(openssl rand -hex 32) - export AUTONOMA_JWT_SECRET=$(openssl rand -hex 32) - ``` - -3. **Test the discover action**: - ```bash - BODY='{"action":"discover"}' - SIG=$(echo -n "$BODY" | openssl dgst -sha256 -hmac "$AUTONOMA_SIGNING_SECRET" | sed 's/.*= //') - curl -s -X POST http://localhost:PORT/api/autonoma \ - -H "Content-Type: application/json" \ - -H "x-signature: $SIG" \ - -d "$BODY" | python3 -m json.tool - ``` - -4. **Test the up action** (for each scenario): - ```bash - BODY='{"action":"up","environment":"standard","testRunId":"test-001"}' - SIG=$(echo -n "$BODY" | openssl dgst -sha256 -hmac "$AUTONOMA_SIGNING_SECRET" | sed 's/.*= //') - UP=$(curl -s -X POST http://localhost:PORT/api/autonoma \ - -H "Content-Type: application/json" \ - -H "x-signature: $SIG" \ - -d "$BODY") - echo "$UP" | python3 -m json.tool - ``` - -5. **Test the down action** using refs from up: - ```bash - REFS=$(echo "$UP" | python3 -c "import sys,json; print(json.dumps(json.load(sys.stdin)['refs']))") - TOKEN=$(echo "$UP" | python3 -c "import sys,json; print(json.load(sys.stdin)['refsToken'])") - BODY=$(python3 -c "import json; print(json.dumps({'action':'down','testRunId':'test-001','refs':json.loads('$REFS'),'refsToken':'$TOKEN'}))") - SIG=$(echo -n "$BODY" | openssl dgst -sha256 -hmac "$AUTONOMA_SIGNING_SECRET" | sed 's/.*= //') - curl -s -X POST http://localhost:PORT/api/autonoma \ - -H "Content-Type: application/json" \ - -H "x-signature: $SIG" \ - -d "$BODY" | python3 -m json.tool - ``` - -6. **Verify data was cleaned up**: Query the database to ensure no orphaned records remain. - -If any test fails, fix the implementation and re-test. +### Build on the existing backend + +- Prefer extending the existing Environment Factory endpoint over replacing it +- Match the backend's framework, ORM, and route conventions +- Do not create a separate throwaway server + +### Current SDK contract + +Implement or preserve these actions: + +| Action | Purpose | +|--------|---------| +| `discover` | Return schema metadata: version, sdk info, models, edges, relations, scopeField | +| `up` | Accept inline `create` payloads plus optional `testRunId`, create data, return `auth`, `refs`, and `refsToken` | +| `down` | Accept `refsToken`, verify it, and tear down the created data | + +### Security requirements + +Use these exact environment variable names: +- `AUTONOMA_SHARED_SECRET` — HMAC request verification secret shared with Autonoma +- `AUTONOMA_SIGNING_SECRET` — private secret for signing and verifying refs tokens + +Required protections: +1. production guard unless explicitly allowed +2. HMAC-SHA256 verification of the `x-signature` header +3. signed refs tokens for teardown + +### Scenario implementation guidance + +- Use `autonoma/scenarios.md` to decide what data the backend needs to support +- Preserve generated fields as generated values; do not force everything into static literals +- Make unique fields depend on `testRunId` when needed +- Prefer explicit create and teardown ordering based on the schema +- If `discover` already works but `up` / `down` do not, keep the introspection path and finish the lifecycle + +## CRITICAL: Smoke-Test and Validate Within the Session + +After implementing, test the lifecycle in-session. + +At minimum: +1. confirm `discover` still works +2. send one signed `up` request with a small inline `create` payload compatible with the schema +3. send the corresponding signed `down` request using the returned `refsToken` +4. verify cleanup succeeds + +After the wiring works, validate `standard`, `empty`, and `large` against the backend. +Prefer: +1. backend-local `checkScenario` / `checkAllScenarios` +2. signed endpoint `up` / `down` validation if local SDK checks are not practical + +Write the approved results to `autonoma/scenario-recipes.json`. + +## CRITICAL: scenario-recipes.json must match the current setup API schema + +The file must be a JSON object in this exact logical shape: + +```json +{ + "version": 1, + "source": { + "discoverPath": "autonoma/discover.json", + "scenariosPath": "autonoma/scenarios.md" + }, + "validationMode": "sdk-check", + "recipes": [ + { + "name": "standard", + "description": "Realistic dataset for core flows", + "create": { + "User": [ + { + "email": "{{owner_email}}" + } + ] + }, + "variables": { + "owner_email": { + "strategy": "derived", + "source": "testRunId", + "format": "owner+{testRunId}@example.com" + } + }, + "validation": { + "status": "validated", + "method": "checkScenario", + "phase": "ok", + "up_ms": 12, + "down_ms": 8 + } + } + ] +} +``` + +Required rules: +- top-level keys must be `version`, `source`, `validationMode`, and `recipes` +- `version` must be the integer `1` +- `source.discoverPath` must be `autonoma/discover.json` +- `source.scenariosPath` must be `autonoma/scenarios.md` +- `validationMode` must be `sdk-check` or `endpoint-lifecycle` +- `recipes` must include `standard`, `empty`, and `large` +- every recipe must contain `name`, `description`, `create`, and `validation` +- every `validation` object must contain: + - `status: "validated"` + - `method`: one of `checkScenario`, `checkAllScenarios`, `endpoint-up-down` + - `phase: "ok"` + - optional `up_ms` / `down_ms` as non-negative integers + +### Per-recipe `variables` (required when `create` uses tokens) + +If `create` contains `{{token}}` placeholders, the recipe MUST include a `variables` object that +defines how each token is resolved. The persisted `create` remains tokenized — concrete values are +never stored. The `variables` field stores the planned generation logic so the `agent` can resolve +tokens at runtime. + +Allowed strategies: +- `literal` — `{ "strategy": "literal", "value": }` +- `derived` — `{ "strategy": "derived", "source": "testRunId", "format": "