diff --git a/.changeset/smoke-fail-fast-unknown-provider.md b/.changeset/smoke-fail-fast-unknown-provider.md new file mode 100644 index 0000000..aeeaaa7 --- /dev/null +++ b/.changeset/smoke-fail-fast-unknown-provider.md @@ -0,0 +1,8 @@ +--- +"open-codesign": patch +--- + +Make the `scripts/smoke-models.ts` harness fail fast when the smoke config +references a provider not registered in `ENV_KEY`. Previously the script +silently skipped the model (treating it as "no API key set"), masking config +typos and producing false-green smoke runs. diff --git a/.changeset/smoke-models-cli.md b/.changeset/smoke-models-cli.md new file mode 100644 index 0000000..91ff5b1 --- /dev/null +++ b/.changeset/smoke-models-cli.md @@ -0,0 +1,13 @@ +--- +'open-codesign': minor +--- + +feat(scripts): add `pnpm smoke` batch model/prompt tester + +`scripts/smoke-models.ts` runs a (provider × model × prompt) matrix through the same `generate()` code path the desktop app uses, saves each artifact to `/tmp/smoke/`, and prints a colored report with quality flags (multiple `
` elements, emoji icons, missing EDITMODE block, JS syntax errors via acorn). + +API keys come from environment variables (`OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, etc.) — never stored in the repo. Models and prompts live in `scripts/smoke-models.toml`. + +CLI flags: `--model`, `--prompt`, `--only-failed`, `--config`. + +Sanitize TOML prompt names against path traversal — model/prompt slugs now collapse any non-`[a-zA-Z0-9._-]` character to `_`, and the resolved artifact path is verified to stay inside `/tmp/smoke/` before write. diff --git a/.gitignore b/.gitignore index 1f252f9..6308446 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ test-results/ .env .env.* !.env.example +scripts/.smoke-keys.env # Editor .vscode/* diff --git a/package.json b/package.json index d514ba9..ae42aed 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "docs:dev": "pnpm --filter open-codesign-website dev", "docs:build": "pnpm --filter open-codesign-website build", "docs:preview": "pnpm --filter open-codesign-website preview", + "smoke": "tsx scripts/smoke-models.ts", "changeset": "changeset", "version-packages": "changeset version", "release": "turbo run build && changeset publish" @@ -35,7 +36,13 @@ "devDependencies": { "@biomejs/biome": "^1.9.4", "@changesets/cli": "^2.27.11", + "@iarna/toml": "^2.2.5", + "@open-codesign/core": "workspace:*", + "@open-codesign/shared": "workspace:*", + "@types/node": "^22.10.2", + "acorn": "^8.14.0", "husky": "^9.1.7", + "tsx": "^4.19.2", "turbo": "^2.3.3", "typescript": "^5.7.2" } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e6c241b..918db57 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -14,9 +14,27 @@ importers: '@changesets/cli': specifier: ^2.27.11 version: 2.31.0(@types/node@22.19.17) + '@iarna/toml': + specifier: ^2.2.5 + version: 2.2.5 + '@open-codesign/core': + specifier: workspace:* + version: link:packages/core + '@open-codesign/shared': + specifier: workspace:* + version: link:packages/shared + '@types/node': + specifier: ^22.10.2 + version: 22.19.17 + acorn: + specifier: ^8.14.0 + version: 8.16.0 husky: specifier: ^9.1.7 version: 9.1.7 + tsx: + specifier: ^4.19.2 + version: 4.21.0 turbo: specifier: ^2.3.3 version: 2.9.6 @@ -95,7 +113,7 @@ importers: version: 19.2.3(@types/react@19.2.14) '@vitejs/plugin-react': specifier: ^4.3.4 - version: 4.7.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)) + version: 4.7.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0)) autoprefixer: specifier: ^10.4.20 version: 10.5.0(postcss@8.5.10) @@ -110,7 +128,7 @@ importers: version: 26.8.1(dmg-builder@26.8.1) electron-vite: specifier: ^2.3.0 - version: 2.3.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)) + version: 2.3.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0)) tailwindcss: specifier: ^4.0.0 version: 4.2.2 @@ -119,7 +137,7 @@ importers: version: 5.9.3 vite: specifier: ^6.0.5 - version: 6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0) + version: 6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0) vitest: specifier: ^2.1.8 version: 2.1.9(@types/node@22.19.17)(happy-dom@20.9.0)(lightningcss@1.32.0) @@ -826,6 +844,12 @@ packages: cpu: [ppc64] os: [aix] + '@esbuild/aix-ppc64@0.27.7': + resolution: {integrity: sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + '@esbuild/android-arm64@0.21.5': resolution: {integrity: sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==} engines: {node: '>=12'} @@ -838,6 +862,12 @@ packages: cpu: [arm64] os: [android] + '@esbuild/android-arm64@0.27.7': + resolution: {integrity: sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] + '@esbuild/android-arm@0.21.5': resolution: {integrity: sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==} engines: {node: '>=12'} @@ -850,6 +880,12 @@ packages: cpu: [arm] os: [android] + '@esbuild/android-arm@0.27.7': + resolution: {integrity: sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==} + engines: {node: '>=18'} + cpu: [arm] + os: [android] + '@esbuild/android-x64@0.21.5': resolution: {integrity: sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==} engines: {node: '>=12'} @@ -862,6 +898,12 @@ packages: cpu: [x64] os: [android] + '@esbuild/android-x64@0.27.7': + resolution: {integrity: sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + '@esbuild/darwin-arm64@0.21.5': resolution: {integrity: sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==} engines: {node: '>=12'} @@ -874,6 +916,12 @@ packages: cpu: [arm64] os: [darwin] + '@esbuild/darwin-arm64@0.27.7': + resolution: {integrity: sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [darwin] + '@esbuild/darwin-x64@0.21.5': resolution: {integrity: sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==} engines: {node: '>=12'} @@ -886,6 +934,12 @@ packages: cpu: [x64] os: [darwin] + '@esbuild/darwin-x64@0.27.7': + resolution: {integrity: sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [darwin] + '@esbuild/freebsd-arm64@0.21.5': resolution: {integrity: sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==} engines: {node: '>=12'} @@ -898,6 +952,12 @@ packages: cpu: [arm64] os: [freebsd] + '@esbuild/freebsd-arm64@0.27.7': + resolution: {integrity: sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==} + engines: {node: '>=18'} + cpu: [arm64] + os: [freebsd] + '@esbuild/freebsd-x64@0.21.5': resolution: {integrity: sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==} engines: {node: '>=12'} @@ -910,6 +970,12 @@ packages: cpu: [x64] os: [freebsd] + '@esbuild/freebsd-x64@0.27.7': + resolution: {integrity: sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [freebsd] + '@esbuild/linux-arm64@0.21.5': resolution: {integrity: sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==} engines: {node: '>=12'} @@ -922,6 +988,12 @@ packages: cpu: [arm64] os: [linux] + '@esbuild/linux-arm64@0.27.7': + resolution: {integrity: sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [linux] + '@esbuild/linux-arm@0.21.5': resolution: {integrity: sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==} engines: {node: '>=12'} @@ -934,6 +1006,12 @@ packages: cpu: [arm] os: [linux] + '@esbuild/linux-arm@0.27.7': + resolution: {integrity: sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==} + engines: {node: '>=18'} + cpu: [arm] + os: [linux] + '@esbuild/linux-ia32@0.21.5': resolution: {integrity: sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==} engines: {node: '>=12'} @@ -946,6 +1024,12 @@ packages: cpu: [ia32] os: [linux] + '@esbuild/linux-ia32@0.27.7': + resolution: {integrity: sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + '@esbuild/linux-loong64@0.21.5': resolution: {integrity: sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==} engines: {node: '>=12'} @@ -958,6 +1042,12 @@ packages: cpu: [loong64] os: [linux] + '@esbuild/linux-loong64@0.27.7': + resolution: {integrity: sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + '@esbuild/linux-mips64el@0.21.5': resolution: {integrity: sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==} engines: {node: '>=12'} @@ -970,6 +1060,12 @@ packages: cpu: [mips64el] os: [linux] + '@esbuild/linux-mips64el@0.27.7': + resolution: {integrity: sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + '@esbuild/linux-ppc64@0.21.5': resolution: {integrity: sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==} engines: {node: '>=12'} @@ -982,6 +1078,12 @@ packages: cpu: [ppc64] os: [linux] + '@esbuild/linux-ppc64@0.27.7': + resolution: {integrity: sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [linux] + '@esbuild/linux-riscv64@0.21.5': resolution: {integrity: sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==} engines: {node: '>=12'} @@ -994,6 +1096,12 @@ packages: cpu: [riscv64] os: [linux] + '@esbuild/linux-riscv64@0.27.7': + resolution: {integrity: sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==} + engines: {node: '>=18'} + cpu: [riscv64] + os: [linux] + '@esbuild/linux-s390x@0.21.5': resolution: {integrity: sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==} engines: {node: '>=12'} @@ -1006,6 +1114,12 @@ packages: cpu: [s390x] os: [linux] + '@esbuild/linux-s390x@0.27.7': + resolution: {integrity: sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==} + engines: {node: '>=18'} + cpu: [s390x] + os: [linux] + '@esbuild/linux-x64@0.21.5': resolution: {integrity: sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==} engines: {node: '>=12'} @@ -1018,12 +1132,24 @@ packages: cpu: [x64] os: [linux] + '@esbuild/linux-x64@0.27.7': + resolution: {integrity: sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==} + engines: {node: '>=18'} + cpu: [x64] + os: [linux] + '@esbuild/netbsd-arm64@0.25.12': resolution: {integrity: sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg==} engines: {node: '>=18'} cpu: [arm64] os: [netbsd] + '@esbuild/netbsd-arm64@0.27.7': + resolution: {integrity: sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==} + engines: {node: '>=18'} + cpu: [arm64] + os: [netbsd] + '@esbuild/netbsd-x64@0.21.5': resolution: {integrity: sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==} engines: {node: '>=12'} @@ -1036,12 +1162,24 @@ packages: cpu: [x64] os: [netbsd] + '@esbuild/netbsd-x64@0.27.7': + resolution: {integrity: sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==} + engines: {node: '>=18'} + cpu: [x64] + os: [netbsd] + '@esbuild/openbsd-arm64@0.25.12': resolution: {integrity: sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A==} engines: {node: '>=18'} cpu: [arm64] os: [openbsd] + '@esbuild/openbsd-arm64@0.27.7': + resolution: {integrity: sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openbsd] + '@esbuild/openbsd-x64@0.21.5': resolution: {integrity: sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==} engines: {node: '>=12'} @@ -1054,12 +1192,24 @@ packages: cpu: [x64] os: [openbsd] + '@esbuild/openbsd-x64@0.27.7': + resolution: {integrity: sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==} + engines: {node: '>=18'} + cpu: [x64] + os: [openbsd] + '@esbuild/openharmony-arm64@0.25.12': resolution: {integrity: sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg==} engines: {node: '>=18'} cpu: [arm64] os: [openharmony] + '@esbuild/openharmony-arm64@0.27.7': + resolution: {integrity: sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openharmony] + '@esbuild/sunos-x64@0.21.5': resolution: {integrity: sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==} engines: {node: '>=12'} @@ -1072,6 +1222,12 @@ packages: cpu: [x64] os: [sunos] + '@esbuild/sunos-x64@0.27.7': + resolution: {integrity: sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==} + engines: {node: '>=18'} + cpu: [x64] + os: [sunos] + '@esbuild/win32-arm64@0.21.5': resolution: {integrity: sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==} engines: {node: '>=12'} @@ -1084,6 +1240,12 @@ packages: cpu: [arm64] os: [win32] + '@esbuild/win32-arm64@0.27.7': + resolution: {integrity: sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==} + engines: {node: '>=18'} + cpu: [arm64] + os: [win32] + '@esbuild/win32-ia32@0.21.5': resolution: {integrity: sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==} engines: {node: '>=12'} @@ -1096,6 +1258,12 @@ packages: cpu: [ia32] os: [win32] + '@esbuild/win32-ia32@0.27.7': + resolution: {integrity: sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==} + engines: {node: '>=18'} + cpu: [ia32] + os: [win32] + '@esbuild/win32-x64@0.21.5': resolution: {integrity: sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==} engines: {node: '>=12'} @@ -1108,6 +1276,12 @@ packages: cpu: [x64] os: [win32] + '@esbuild/win32-x64@0.27.7': + resolution: {integrity: sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==} + engines: {node: '>=18'} + cpu: [x64] + os: [win32] + '@fontsource-variable/fraunces@5.2.9': resolution: {integrity: sha512-Y6IjunlN9Ni723np+GIgAaKzCDBrPRrqNi01TZxHs5wtHYROWFM9W6yiT+/gGwSjWIRD18oX17kD/BRWekc/Lw==} @@ -1991,6 +2165,11 @@ packages: resolution: {integrity: sha512-AO2ac6pjRB3SJmGJo+v5/aK6Omggp6fsLrs6wN9bd35ulu4cCwaAU9+7ZhXjeqHVkaHThLuzH0nZr0YpCDhygg==} engines: {node: ^18.17.0 || >=20.5.0} + acorn@8.16.0: + resolution: {integrity: sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==} + engines: {node: '>=0.4.0'} + hasBin: true + agent-base@7.1.4: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} @@ -2612,6 +2791,11 @@ packages: engines: {node: '>=18'} hasBin: true + esbuild@0.27.7: + resolution: {integrity: sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==} + engines: {node: '>=18'} + hasBin: true + escalade@3.2.0: resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} engines: {node: '>=6'} @@ -2813,6 +2997,9 @@ packages: resolution: {integrity: sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==} engines: {node: '>=8'} + get-tsconfig@4.14.0: + resolution: {integrity: sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==} + get-uri@6.0.5: resolution: {integrity: sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==} engines: {node: '>= 14'} @@ -3748,6 +3935,9 @@ packages: resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==} engines: {node: '>=8'} + resolve-pkg-maps@1.0.0: + resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==} + responselike@2.0.1: resolution: {integrity: sha512-4gl03wn3hj1HP3yzgdI7d3lCkF95F21Pz4BPGvKHinyQzALR5CapwC8yIi0Rh58DEMQ/SguC03wFj2k0M/mHhw==} @@ -4070,6 +4260,11 @@ packages: tslib@2.8.1: resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + tsx@4.21.0: + resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} + engines: {node: '>=18.0.0'} + hasBin: true + tunnel-agent@0.6.0: resolution: {integrity: sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==} @@ -5334,147 +5529,225 @@ snapshots: '@esbuild/aix-ppc64@0.25.12': optional: true + '@esbuild/aix-ppc64@0.27.7': + optional: true + '@esbuild/android-arm64@0.21.5': optional: true '@esbuild/android-arm64@0.25.12': optional: true + '@esbuild/android-arm64@0.27.7': + optional: true + '@esbuild/android-arm@0.21.5': optional: true '@esbuild/android-arm@0.25.12': optional: true + '@esbuild/android-arm@0.27.7': + optional: true + '@esbuild/android-x64@0.21.5': optional: true '@esbuild/android-x64@0.25.12': optional: true + '@esbuild/android-x64@0.27.7': + optional: true + '@esbuild/darwin-arm64@0.21.5': optional: true '@esbuild/darwin-arm64@0.25.12': optional: true + '@esbuild/darwin-arm64@0.27.7': + optional: true + '@esbuild/darwin-x64@0.21.5': optional: true '@esbuild/darwin-x64@0.25.12': optional: true + '@esbuild/darwin-x64@0.27.7': + optional: true + '@esbuild/freebsd-arm64@0.21.5': optional: true '@esbuild/freebsd-arm64@0.25.12': optional: true + '@esbuild/freebsd-arm64@0.27.7': + optional: true + '@esbuild/freebsd-x64@0.21.5': optional: true '@esbuild/freebsd-x64@0.25.12': optional: true + '@esbuild/freebsd-x64@0.27.7': + optional: true + '@esbuild/linux-arm64@0.21.5': optional: true '@esbuild/linux-arm64@0.25.12': optional: true + '@esbuild/linux-arm64@0.27.7': + optional: true + '@esbuild/linux-arm@0.21.5': optional: true '@esbuild/linux-arm@0.25.12': optional: true + '@esbuild/linux-arm@0.27.7': + optional: true + '@esbuild/linux-ia32@0.21.5': optional: true '@esbuild/linux-ia32@0.25.12': optional: true + '@esbuild/linux-ia32@0.27.7': + optional: true + '@esbuild/linux-loong64@0.21.5': optional: true '@esbuild/linux-loong64@0.25.12': optional: true + '@esbuild/linux-loong64@0.27.7': + optional: true + '@esbuild/linux-mips64el@0.21.5': optional: true '@esbuild/linux-mips64el@0.25.12': optional: true + '@esbuild/linux-mips64el@0.27.7': + optional: true + '@esbuild/linux-ppc64@0.21.5': optional: true '@esbuild/linux-ppc64@0.25.12': optional: true + '@esbuild/linux-ppc64@0.27.7': + optional: true + '@esbuild/linux-riscv64@0.21.5': optional: true '@esbuild/linux-riscv64@0.25.12': optional: true + '@esbuild/linux-riscv64@0.27.7': + optional: true + '@esbuild/linux-s390x@0.21.5': optional: true '@esbuild/linux-s390x@0.25.12': optional: true + '@esbuild/linux-s390x@0.27.7': + optional: true + '@esbuild/linux-x64@0.21.5': optional: true '@esbuild/linux-x64@0.25.12': optional: true + '@esbuild/linux-x64@0.27.7': + optional: true + '@esbuild/netbsd-arm64@0.25.12': optional: true + '@esbuild/netbsd-arm64@0.27.7': + optional: true + '@esbuild/netbsd-x64@0.21.5': optional: true '@esbuild/netbsd-x64@0.25.12': optional: true + '@esbuild/netbsd-x64@0.27.7': + optional: true + '@esbuild/openbsd-arm64@0.25.12': optional: true + '@esbuild/openbsd-arm64@0.27.7': + optional: true + '@esbuild/openbsd-x64@0.21.5': optional: true '@esbuild/openbsd-x64@0.25.12': optional: true + '@esbuild/openbsd-x64@0.27.7': + optional: true + '@esbuild/openharmony-arm64@0.25.12': optional: true + '@esbuild/openharmony-arm64@0.27.7': + optional: true + '@esbuild/sunos-x64@0.21.5': optional: true '@esbuild/sunos-x64@0.25.12': optional: true + '@esbuild/sunos-x64@0.27.7': + optional: true + '@esbuild/win32-arm64@0.21.5': optional: true '@esbuild/win32-arm64@0.25.12': optional: true + '@esbuild/win32-arm64@0.27.7': + optional: true + '@esbuild/win32-ia32@0.21.5': optional: true '@esbuild/win32-ia32@0.25.12': optional: true + '@esbuild/win32-ia32@0.27.7': + optional: true + '@esbuild/win32-x64@0.21.5': optional: true '@esbuild/win32-x64@0.25.12': optional: true + '@esbuild/win32-x64@0.27.7': + optional: true + '@fontsource-variable/fraunces@5.2.9': {} '@fontsource-variable/geist@5.2.8': {} @@ -6333,7 +6606,7 @@ snapshots: '@ungap/structured-clone@1.3.0': {} - '@vitejs/plugin-react@4.7.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0))': + '@vitejs/plugin-react@4.7.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0))': dependencies: '@babel/core': 7.29.0 '@babel/plugin-transform-react-jsx-self': 7.27.1(@babel/core@7.29.0) @@ -6341,7 +6614,7 @@ snapshots: '@rolldown/pluginutils': 1.0.0-beta.27 '@types/babel__core': 7.20.5 react-refresh: 0.17.0 - vite: 6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0) + vite: 6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0) transitivePeerDependencies: - supports-color @@ -6493,6 +6766,8 @@ snapshots: abbrev@3.0.1: {} + acorn@8.16.0: {} + agent-base@7.1.4: {} ajv-formats@3.0.1(ajv@8.18.0): @@ -7093,7 +7368,7 @@ snapshots: transitivePeerDependencies: - supports-color - electron-vite@2.3.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)): + electron-vite@2.3.0(vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0)): dependencies: '@babel/core': 7.29.0 '@babel/plugin-transform-arrow-functions': 7.27.1(@babel/core@7.29.0) @@ -7101,7 +7376,7 @@ snapshots: esbuild: 0.21.5 magic-string: 0.30.21 picocolors: 1.1.1 - vite: 6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0) + vite: 6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0) transitivePeerDependencies: - supports-color @@ -7231,6 +7506,35 @@ snapshots: '@esbuild/win32-ia32': 0.25.12 '@esbuild/win32-x64': 0.25.12 + esbuild@0.27.7: + optionalDependencies: + '@esbuild/aix-ppc64': 0.27.7 + '@esbuild/android-arm': 0.27.7 + '@esbuild/android-arm64': 0.27.7 + '@esbuild/android-x64': 0.27.7 + '@esbuild/darwin-arm64': 0.27.7 + '@esbuild/darwin-x64': 0.27.7 + '@esbuild/freebsd-arm64': 0.27.7 + '@esbuild/freebsd-x64': 0.27.7 + '@esbuild/linux-arm': 0.27.7 + '@esbuild/linux-arm64': 0.27.7 + '@esbuild/linux-ia32': 0.27.7 + '@esbuild/linux-loong64': 0.27.7 + '@esbuild/linux-mips64el': 0.27.7 + '@esbuild/linux-ppc64': 0.27.7 + '@esbuild/linux-riscv64': 0.27.7 + '@esbuild/linux-s390x': 0.27.7 + '@esbuild/linux-x64': 0.27.7 + '@esbuild/netbsd-arm64': 0.27.7 + '@esbuild/netbsd-x64': 0.27.7 + '@esbuild/openbsd-arm64': 0.27.7 + '@esbuild/openbsd-x64': 0.27.7 + '@esbuild/openharmony-arm64': 0.27.7 + '@esbuild/sunos-x64': 0.27.7 + '@esbuild/win32-arm64': 0.27.7 + '@esbuild/win32-ia32': 0.27.7 + '@esbuild/win32-x64': 0.27.7 + escalade@3.2.0: {} escape-string-regexp@4.0.0: @@ -7452,6 +7756,10 @@ snapshots: dependencies: pump: 3.0.4 + get-tsconfig@4.14.0: + dependencies: + resolve-pkg-maps: 1.0.0 + get-uri@6.0.5: dependencies: basic-ftp: 5.3.0 @@ -8411,6 +8719,8 @@ snapshots: resolve-from@5.0.0: {} + resolve-pkg-maps@1.0.0: {} + responselike@2.0.1: dependencies: lowercase-keys: 2.0.0 @@ -8781,6 +9091,13 @@ snapshots: tslib@2.8.1: {} + tsx@4.21.0: + dependencies: + esbuild: 0.27.7 + get-tsconfig: 4.14.0 + optionalDependencies: + fsevents: 2.3.3 + tunnel-agent@0.6.0: dependencies: safe-buffer: 5.2.1 @@ -8899,7 +9216,7 @@ snapshots: fsevents: 2.3.3 lightningcss: 1.32.0 - vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0): + vite@6.4.2(@types/node@22.19.17)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0): dependencies: esbuild: 0.25.12 fdir: 6.5.0(picomatch@4.0.4) @@ -8912,6 +9229,7 @@ snapshots: fsevents: 2.3.3 jiti: 2.6.1 lightningcss: 1.32.0 + tsx: 4.21.0 vitepress@1.6.4(@algolia/client-search@5.50.2)(@types/node@22.19.17)(lightningcss@1.32.0)(postcss@8.5.10)(search-insights@2.17.3)(typescript@5.9.3): dependencies: diff --git a/scripts/smoke-models.toml b/scripts/smoke-models.toml new file mode 100644 index 0000000..d02ab69 --- /dev/null +++ b/scripts/smoke-models.toml @@ -0,0 +1,85 @@ +# Smoke-test matrix +# +# Drives `pnpm smoke`. API keys are NEVER stored here — set them in your +# environment (one per provider you want to test): +# +# export OPENROUTER_API_KEY=sk-or-... +# export ANTHROPIC_API_KEY=sk-ant-... +# export OPENAI_API_KEY=sk-... +# export GOOGLE_API_KEY=... +# export GROQ_API_KEY=... +# +# Or stash them in a gitignored file and source on demand: +# echo 'export OPENROUTER_API_KEY=sk-or-...' > scripts/.smoke-keys.env +# set -a && source scripts/.smoke-keys.env && set +a && pnpm smoke +# +# Add or remove rows freely. Rows referencing a provider with no key are +# skipped with a warning, not failed. + +[[prompts]] +name = "dashboard" +text = "做一个数据看板,含 3 张图:条形图(销量)、折线图(留存)、饼图(细分)。深色主题。" + +[[prompts]] +name = "mobile_onboarding" +text = "iOS 移动端 onboarding 三屏:欢迎、权限请求、登录。Liquid Glass 风格,每屏构图不同。" + +[[prompts]] +name = "marketing_landing" +text = "Landing page for an indie design tool called Open Codesign. Warm cream tones, editorial display serif (Fraunces), dense layout with hero / 3 feature blocks / pricing / FAQ / CTA." + +[[prompts]] +name = "settings" +text = "应用设置页面,含账户、通知、外观三个 tab。深色主题,sidebar 导航 + 右侧表单。" + +# ─── Free OpenRouter models worth comparing ─────────────────────── +# These all run on $OPENROUTER_API_KEY. No charge. + +[[models]] +provider = "openrouter" +modelId = "qwen/qwen3-coder:free" +# Currently the strongest free coder on OR (480B MoE, 262K ctx). + +[[models]] +provider = "openrouter" +modelId = "deepseek/deepseek-r1:free" +# Reasoning-mandatory; exercises our self-healing retry path. + +[[models]] +provider = "openrouter" +modelId = "nvidia/nemotron-3-super:free" +# Hybrid Mamba-Transformer MoE — different architecture for comparison. + +[[models]] +provider = "openrouter" +modelId = "meta-llama/llama-3.3-70b-instruct:free" +# Solid general baseline. + +[[models]] +provider = "openrouter" +modelId = "openai/gpt-oss-120b:free" +# Already verified working with the reasoning self-healing fix. + +[[models]] +provider = "openrouter" +modelId = "minimax/minimax-m2.5:free" +# Reasoning-mandatory; another self-healing exercise. + +[[models]] +provider = "openrouter" +modelId = "mistralai/devstral-2:free" +# Mistral coding-focused (123B dense). + +[[models]] +provider = "openrouter" +modelId = "xiaomi/mimo-v2-flash:free" +# Reportedly matches Sonnet 4.5 on coding benchmarks — useful upper bound for free tier. + +# ─── Paid baselines (uncomment if you want a quality ceiling) ───── +# [[models]] +# provider = "openrouter" +# modelId = "anthropic/claude-sonnet-4.5" +# +# [[models]] +# provider = "anthropic" +# modelId = "claude-sonnet-4-6" diff --git a/scripts/smoke-models.ts b/scripts/smoke-models.ts new file mode 100644 index 0000000..9763949 --- /dev/null +++ b/scripts/smoke-models.ts @@ -0,0 +1,283 @@ +#!/usr/bin/env tsx +/** + * smoke-models.ts — batch-test (provider, model, prompt) combos through the + * exact same `generate()` code path the desktop app uses. Saves each artifact + * to /tmp/smoke/, runs lightweight quality checks, prints a colored report. + * + * Usage: + * pnpm smoke + * pnpm smoke --model openai/gpt-oss-120b:free + * pnpm smoke --prompt "数据看板" + * pnpm smoke --only-failed + * pnpm smoke --config scripts/my-models.toml + * + * API keys come from the environment (one per provider you want to hit): + * OPENROUTER_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY, GOOGLE_API_KEY, + * GROQ_API_KEY, CEREBRAS_API_KEY, XAI_API_KEY, MISTRAL_API_KEY. + */ +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { isAbsolute, relative, resolve } from 'node:path'; +import { parseArgs } from 'node:util'; +import * as TOML from '@iarna/toml'; +import { generate } from '@open-codesign/core'; +import { Parser } from 'acorn'; + +interface SmokeModel { + provider: string; + modelId: string; +} +interface SmokePrompt { + name: string; + text: string; +} +interface SmokeConfig { + prompts: SmokePrompt[]; + models: SmokeModel[]; +} + +interface RunResult { + model: string; + prompt: string; + ok: boolean; + ms: number; + bytes: number; + artifactPath?: string; + issues: string[]; + error?: string; +} + +const ENV_KEY: Record = { + anthropic: 'ANTHROPIC_API_KEY', + openai: 'OPENAI_API_KEY', + google: 'GOOGLE_API_KEY', + openrouter: 'OPENROUTER_API_KEY', + groq: 'GROQ_API_KEY', + cerebras: 'CEREBRAS_API_KEY', + xai: 'XAI_API_KEY', + mistral: 'MISTRAL_API_KEY', +}; + +const COLOR = { + reset: '\x1b[0m', + green: '\x1b[32m', + red: '\x1b[31m', + yellow: '\x1b[33m', + dim: '\x1b[2m', + bold: '\x1b[1m', +}; + +const SMOKE_DIR = '/tmp/smoke'; +const LAST_RESULTS = `${SMOKE_DIR}/.last-results.json`; + +function parseConfig(path: string): SmokeConfig { + const raw = readFileSync(path, 'utf8'); + const parsed = TOML.parse(raw) as unknown as SmokeConfig; + if (!Array.isArray(parsed.prompts) || !Array.isArray(parsed.models)) { + throw new Error(`${path} must declare [[prompts]] and [[models]] tables`); + } + return parsed; +} + +function slug(model: SmokeModel, prompt: SmokePrompt): string { + const safeModel = `${model.provider}_${model.modelId}`.replace(/[^a-zA-Z0-9._-]/g, '_'); + const safePrompt = prompt.name.replace(/[^a-zA-Z0-9._-]/g, '_'); + return `${safeModel}__${safePrompt}`; +} + +function qualityCheck(html: string): string[] { + const issues: string[] = []; + + const mainCount = (html.match(/]/gi) ?? []).length; + if (mainCount > 1) issues.push(`${mainCount}x
elements`); + + // Emoji used as content icons (anti-slop). Heuristic: emoji codepoint sitting + // inside a
with aria-hidden or a role suggesting decoration. Broad, + // intentionally — false positives are cheap signals here. + const emojiInIcon = html.match( + /aria-hidden=["']true["'][^>]*>[\s]*[\u{1F300}-\u{1FAFF}\u{2600}-\u{27BF}]/gu, + ); + if (emojiInIcon) issues.push(`${emojiInIcon.length} emoji icon(s)`); + + // Validate every