diff --git a/apps/cli/src/commands/eval/env.ts b/apps/cli/src/commands/eval/env.ts index d8b90f50..63cfc767 100644 --- a/apps/cli/src/commands/eval/env.ts +++ b/apps/cli/src/commands/eval/env.ts @@ -75,9 +75,10 @@ export async function loadEnvFromHierarchy(options: LoadEnvOptions): Promise= 0; i--) { + // Load from the closest .env outward so the nearest file wins while parent + // files still contribute missing keys. override: false also preserves + // explicitly exported process.env values. + for (let i = 0; i < envFiles.length; i++) { const envFile = envFiles[i]; loadDotenv({ path: envFile, override: false }); if (verbose) { diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts index f98134c6..0a9ed4a5 100644 --- a/apps/cli/test/eval.integration.test.ts +++ b/apps/cli/test/eval.integration.test.ts @@ -1,4 +1,4 @@ -import { afterEach, describe, expect, it } from 'bun:test'; +import { describe, expect, it } from 'bun:test'; import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import path from 'node:path'; @@ -75,6 +75,64 @@ tests: return { baseDir, suiteDir, testFilePath, diagnosticsPath } satisfies EvalFixture; } +async function createNestedEnvFixture(): Promise { + const baseDir = await mkdtemp(path.join(tmpdir(), 'agentv-cli-nested-env-test-')); + const suiteDir = path.join(baseDir, 'suite'); + const evalDir = path.join(suiteDir, 'evals', 'foo'); + await mkdir(evalDir, { recursive: true }); + + const agentvDir = path.join(suiteDir, '.agentv'); + await mkdir(agentvDir, { recursive: true }); + + const targetsPath = path.join(agentvDir, 'targets.yaml'); + const targetsContent = `$schema: agentv-targets-v2.2 +targets: + - name: default + provider: mock +`; + await writeFile(targetsPath, targetsContent, 'utf8'); + + const testFilePath = path.join(evalDir, 'sample.test.yaml'); + const testFileContent = `description: CLI nested env integration test + +tests: + - id: case-alpha + criteria: System responds with alpha + input: + - role: user + content: | + Please respond with alpha + expected_output: + - role: assistant + content: "Alpha" + - id: case-beta + criteria: System responds with beta + input: + - role: user + content: | + Please respond with beta + expected_output: + - role: assistant + content: "Beta" +`; + await writeFile(testFilePath, testFileContent, 'utf8'); + + await writeFile( + path.join(suiteDir, '.env'), + 'CLI_ENV_SAMPLE=from-root\nCLI_ENV_ROOT_ONLY=from-root\n', + 'utf8', + ); + await writeFile( + path.join(evalDir, '.env'), + 'CLI_ENV_SAMPLE=from-local\nCLI_ENV_LOCAL_ONLY=from-local\n', + 'utf8', + ); + + const diagnosticsPath = path.join(baseDir, 'diagnostics.json'); + + return { baseDir, suiteDir, testFilePath, diagnosticsPath } satisfies EvalFixture; +} + async function runCli( fixture: EvalFixture, args: readonly string[], @@ -82,9 +140,11 @@ async function runCli( ): Promise<{ stdout: string; stderr: string }> { const baseEnv: Record = { ...process.env }; baseEnv.CLI_ENV_SAMPLE = undefined; + baseEnv.CLI_ENV_ROOT_ONLY = undefined; + baseEnv.CLI_ENV_LOCAL_ONLY = undefined; try { - const result = await execa('bun', [CLI_ENTRY, ...args], { + const result = await execa('bun', ['--no-env-file', CLI_ENTRY, ...args], { cwd: fixture.suiteDir, env: { ...baseEnv, @@ -122,50 +182,69 @@ async function readJsonLines(filePath: string): Promise { } async function readDiagnostics(fixture: EvalFixture): Promise> { - const raw = await readFile(fixture.diagnosticsPath, 'utf8'); - return JSON.parse(raw) as Record; -} - -const fixtures: string[] = []; - -afterEach(async () => { - while (fixtures.length > 0) { - const dir = fixtures.pop(); - if (dir) { - await rm(dir, { recursive: true, force: true }); + for (let attempt = 0; attempt < 20; attempt++) { + try { + const raw = await readFile(fixture.diagnosticsPath, 'utf8'); + return JSON.parse(raw) as Record; + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT' || attempt === 19) { + throw error; + } + await new Promise((resolve) => setTimeout(resolve, 50)); } } -}); + + throw new Error(`Missing diagnostics file: ${fixture.diagnosticsPath}`); +} describe('agentv eval CLI', () => { it('writes results, summary, and prompt dumps using default directories', async () => { const fixture = await createFixture(); - fixtures.push(fixture.baseDir); - - const { stdout } = await runCli(fixture, ['eval', fixture.testFilePath, '--verbose']); - - // Don't check stderr - it may contain stack traces or other diagnostics - expect(stdout).toContain('Using target (test-file): file-target [provider=mock]'); - expect(stdout).toContain('Mean score: 0.750'); - // Std deviation is an implementation detail - don't check it - - const outputPath = extractOutputPath(stdout); - expect(outputPath).toContain(`${path.sep}.agentv${path.sep}results${path.sep}`); - - const results = await readJsonLines(outputPath); - expect(results).toHaveLength(2); - const [firstResult, secondResult] = results as Array>; - expect(firstResult.test_id).toBe('case-alpha'); - expect(secondResult.test_id).toBe('case-beta'); - - const diagnostics = await readDiagnostics(fixture); - expect(diagnostics).toMatchObject({ - target: 'file-target', - agentTimeoutMs: null, - envSample: 'from-dotenv', - resultCount: 2, - }); + try { + const { stdout } = await runCli(fixture, ['eval', fixture.testFilePath, '--verbose']); + + // Don't check stderr - it may contain stack traces or other diagnostics + expect(stdout).toContain('Using target (test-file): file-target [provider=mock]'); + expect(stdout).toContain('Mean score: 0.750'); + // Std deviation is an implementation detail - don't check it + + const outputPath = extractOutputPath(stdout); + expect(outputPath).toContain(`${path.sep}.agentv${path.sep}results${path.sep}`); + + const results = await readJsonLines(outputPath); + expect(results).toHaveLength(2); + const [firstResult, secondResult] = results as Array>; + expect(firstResult.test_id).toBe('case-alpha'); + expect(secondResult.test_id).toBe('case-beta'); + + const diagnostics = await readDiagnostics(fixture); + expect(diagnostics).toMatchObject({ + target: 'file-target', + agentTimeoutMs: null, + envSample: 'from-dotenv', + resultCount: 2, + }); + + // Prompt dump feature has been removed, so we no longer check for it + } finally { + await rm(fixture.baseDir, { recursive: true, force: true }); + } + }); - // Prompt dump feature has been removed, so we no longer check for it + it('loads the nearest .env first and uses parent .env only for missing keys', async () => { + const fixture = await createNestedEnvFixture(); + try { + await runCli(fixture, ['eval', fixture.testFilePath, '--verbose']); + + const diagnostics = await readDiagnostics(fixture); + expect(diagnostics).toMatchObject({ + envSample: 'from-local', + envRootOnly: 'from-root', + envLocalOnly: 'from-local', + resultCount: 2, + }); + } finally { + await rm(fixture.baseDir, { recursive: true, force: true }); + } }); }); diff --git a/apps/cli/test/fixtures/mock-run-evaluation.ts b/apps/cli/test/fixtures/mock-run-evaluation.ts index bc44f83a..cf0fac43 100644 --- a/apps/cli/test/fixtures/mock-run-evaluation.ts +++ b/apps/cli/test/fixtures/mock-run-evaluation.ts @@ -78,6 +78,8 @@ async function maybeWriteDiagnostics( testId: options.testId ?? null, useCache: options.useCache ?? false, envSample: process.env.CLI_ENV_SAMPLE ?? null, + envRootOnly: process.env.CLI_ENV_ROOT_ONLY ?? null, + envLocalOnly: process.env.CLI_ENV_LOCAL_ONLY ?? null, resultCount: results.length, } satisfies Record; diff --git a/apps/cli/test/unit/env.test.ts b/apps/cli/test/unit/env.test.ts new file mode 100644 index 00000000..6918a59f --- /dev/null +++ b/apps/cli/test/unit/env.test.ts @@ -0,0 +1,94 @@ +import { afterEach, beforeEach, describe, expect, it } from 'bun:test'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; + +import { loadEnvFromHierarchy } from '../../src/commands/eval/env.js'; + +describe('loadEnvFromHierarchy', () => { + let originalCwd: string; + let originalEnv: NodeJS.ProcessEnv; + let tempDir: string; + + beforeEach(async () => { + originalCwd = process.cwd(); + originalEnv = { ...process.env }; + tempDir = await mkdtemp(path.join(tmpdir(), 'agentv-env-hierarchy-')); + }); + + afterEach(async () => { + process.chdir(originalCwd); + process.env = { ...originalEnv }; + await rm(tempDir, { recursive: true, force: true }); + }); + + it('lets the nearest .env override parent values while merging missing keys', async () => { + const myVarKey = `AGENTV_ENV_TEST_MY_VAR_${Date.now()}_1`; + const sharedOnlyKey = `AGENTV_ENV_TEST_SHARED_ONLY_${Date.now()}_1`; + const localOnlyKey = `AGENTV_ENV_TEST_LOCAL_ONLY_${Date.now()}_1`; + const repoRoot = tempDir; + const evalDir = path.join(repoRoot, 'evals', 'foo'); + const testFilePath = path.join(evalDir, 'sample.eval.yaml'); + + await mkdir(evalDir, { recursive: true }); + await writeFile( + path.join(repoRoot, '.env'), + `${myVarKey}=root\n${sharedOnlyKey}=from_root\n`, + 'utf8', + ); + await writeFile( + path.join(evalDir, '.env'), + `${myVarKey}=local\n${localOnlyKey}=from_subfolder\n`, + 'utf8', + ); + await writeFile(testFilePath, 'tests: []\n', 'utf8'); + + process.chdir(repoRoot); + + const loadedPath = await loadEnvFromHierarchy({ + testFilePath, + repoRoot, + verbose: false, + }); + + expect(loadedPath).toBe(path.join(evalDir, '.env')); + expect(process.env[myVarKey]).toBe('local'); + expect(process.env[sharedOnlyKey]).toBe('from_root'); + expect(process.env[localOnlyKey]).toBe('from_subfolder'); + }); + + it('does not override values already exported in process.env', async () => { + const myVarKey = `AGENTV_ENV_TEST_MY_VAR_${Date.now()}_2`; + const sharedOnlyKey = `AGENTV_ENV_TEST_SHARED_ONLY_${Date.now()}_2`; + const localOnlyKey = `AGENTV_ENV_TEST_LOCAL_ONLY_${Date.now()}_2`; + const repoRoot = tempDir; + const evalDir = path.join(repoRoot, 'evals', 'foo'); + const testFilePath = path.join(evalDir, 'sample.eval.yaml'); + + await mkdir(evalDir, { recursive: true }); + await writeFile( + path.join(repoRoot, '.env'), + `${myVarKey}=root\n${sharedOnlyKey}=from_root\n`, + 'utf8', + ); + await writeFile( + path.join(evalDir, '.env'), + `${myVarKey}=local\n${localOnlyKey}=from_subfolder\n`, + 'utf8', + ); + await writeFile(testFilePath, 'tests: []\n', 'utf8'); + + process.env[myVarKey] = 'shell'; + process.chdir(repoRoot); + + await loadEnvFromHierarchy({ + testFilePath, + repoRoot, + verbose: false, + }); + + expect(process.env[myVarKey]).toBe('shell'); + expect(process.env[sharedOnlyKey]).toBe('from_root'); + expect(process.env[localOnlyKey]).toBe('from_subfolder'); + }); +}); diff --git a/packages/core/src/evaluation/evaluate.ts b/packages/core/src/evaluation/evaluate.ts index 2ddee007..871b2e0c 100644 --- a/packages/core/src/evaluation/evaluate.ts +++ b/packages/core/src/evaluation/evaluate.ts @@ -238,8 +238,13 @@ export async function evaluate(config: EvalConfig): Promise { const gitRoot = await findGitRoot(process.cwd()); const repoRoot = gitRoot ?? process.cwd(); - // Load .env files from hierarchy (closest to cwd first) - await loadEnvHierarchy(repoRoot); + const testFilePath = config.specFile + ? path.resolve(config.specFile) + : path.join(process.cwd(), '__programmatic__.yaml'); + + // Load .env files from the eval file hierarchy so nested eval-local .env + // files participate even when the command is launched from a parent folder. + await loadEnvHierarchy(repoRoot, testFilePath); let resolvedTarget: ResolvedTarget; let taskProvider: ReturnType | undefined; @@ -263,18 +268,15 @@ export async function evaluate(config: EvalConfig): Promise { } let evalCases: readonly EvalTest[] | EvalTest[]; - let testFilePath: string; if (config.specFile) { // File-based mode: load from YAML - testFilePath = path.resolve(config.specFile); evalCases = await loadTests(testFilePath, repoRoot, { verbose: config.verbose, filter: config.filter, }); } else { // Inline mode: convert EvalTestInput[] to EvalTest[] - testFilePath = path.join(process.cwd(), '__programmatic__.yaml'); evalCases = (config.tests ?? []).map((test): EvalTest => { const input = typeof test.input === 'string' @@ -432,13 +434,13 @@ async function discoverDefaultTarget(repoRoot: string): Promise { +async function loadEnvHierarchy(repoRoot: string, startPath: string): Promise { const { readFileSync } = await import('node:fs'); - const cwd = process.cwd(); - const chain = buildDirectoryChain(path.join(cwd, '_placeholder'), repoRoot); + const chain = buildDirectoryChain(startPath, repoRoot); // Collect .env files from closest to root const envFiles: string[] = []; @@ -447,8 +449,10 @@ async function loadEnvHierarchy(repoRoot: string): Promise { if (existsSync(envPath)) envFiles.push(envPath); } - // Load from root to child so child values take precedence - for (let i = envFiles.length - 1; i >= 0; i--) { + // buildDirectoryChain returns directories from closest to farthest. Loading in + // that same order means nearer .env files set shared keys first, while parent + // .env files loaded afterward only backfill keys that are still missing. + for (let i = 0; i < envFiles.length; i++) { try { const content = readFileSync(envFiles[i], 'utf8'); for (const line of content.split('\n')) {