From bf36bafd8720fb9c1e4723c6328e0422cd22dfd7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:20:43 +0000 Subject: [PATCH 1/4] Initial plan From d3c2bf7ec71b92bd8d17073fa78466df97004154 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:25:13 +0000 Subject: [PATCH 2/4] fix(cli): prefer nearest env file in eval hierarchy Co-authored-by: christso <3115311+christso@users.noreply.github.com> --- apps/cli/src/commands/eval/env.ts | 7 ++- apps/cli/test/unit/env.test.ts | 98 +++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 apps/cli/test/unit/env.test.ts diff --git a/apps/cli/src/commands/eval/env.ts b/apps/cli/src/commands/eval/env.ts index d8b90f50..63cfc767 100644 --- a/apps/cli/src/commands/eval/env.ts +++ b/apps/cli/src/commands/eval/env.ts @@ -75,9 +75,10 @@ export async function loadEnvFromHierarchy(options: LoadEnvOptions): Promise= 0; i--) { + // Load from the closest .env outward so the nearest file wins while parent + // files still contribute missing keys. override: false also preserves + // explicitly exported process.env values. + for (let i = 0; i < envFiles.length; i++) { const envFile = envFiles[i]; loadDotenv({ path: envFile, override: false }); if (verbose) { diff --git a/apps/cli/test/unit/env.test.ts b/apps/cli/test/unit/env.test.ts new file mode 100644 index 00000000..aa806ac9 --- /dev/null +++ b/apps/cli/test/unit/env.test.ts @@ -0,0 +1,98 @@ +import { afterEach, beforeEach, describe, expect, it } from 'bun:test'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; + +import { loadEnvFromHierarchy } from '../../src/commands/eval/env.js'; + +describe('loadEnvFromHierarchy', () => { + let originalCwd: string; + let tempDir: string; + let originalMyVar: string | undefined; + let originalSharedOnly: string | undefined; + let originalLocalOnly: string | undefined; + + beforeEach(async () => { + originalCwd = process.cwd(); + tempDir = await mkdtemp(path.join(tmpdir(), 'agentv-env-hierarchy-')); + originalMyVar = process.env.MY_VAR; + originalSharedOnly = process.env.SHARED_ONLY; + originalLocalOnly = process.env.LOCAL_ONLY; + + delete process.env.MY_VAR; + delete process.env.SHARED_ONLY; + delete process.env.LOCAL_ONLY; + }); + + afterEach(async () => { + process.chdir(originalCwd); + + if (originalMyVar === undefined) { + delete process.env.MY_VAR; + } else { + process.env.MY_VAR = originalMyVar; + } + + if (originalSharedOnly === undefined) { + delete process.env.SHARED_ONLY; + } else { + process.env.SHARED_ONLY = originalSharedOnly; + } + + if (originalLocalOnly === undefined) { + delete process.env.LOCAL_ONLY; + } else { + process.env.LOCAL_ONLY = originalLocalOnly; + } + + await rm(tempDir, { recursive: true, force: true }); + }); + + it('lets the nearest .env override parent values while merging missing keys', async () => { + const repoRoot = tempDir; + const evalDir = path.join(repoRoot, 'evals', 'foo'); + const testFilePath = path.join(evalDir, 'sample.eval.yaml'); + + await mkdir(evalDir, { recursive: true }); + await writeFile(path.join(repoRoot, '.env'), 'MY_VAR=root\nSHARED_ONLY=from_root\n', 'utf8'); + await writeFile(path.join(evalDir, '.env'), 'MY_VAR=local\nLOCAL_ONLY=from_subfolder\n', 'utf8'); + await writeFile(testFilePath, 'tests: []\n', 'utf8'); + + process.chdir(repoRoot); + + const loadedPath = await loadEnvFromHierarchy({ + testFilePath, + repoRoot, + verbose: false, + }); + + expect(loadedPath).toBe(path.join(evalDir, '.env')); + expect(process.env.MY_VAR).toBe('local'); + expect(process.env.SHARED_ONLY).toBe('from_root'); + expect(process.env.LOCAL_ONLY).toBe('from_subfolder'); + }); + + it('does not override values already exported in process.env', async () => { + const repoRoot = tempDir; + const evalDir = path.join(repoRoot, 'evals', 'foo'); + const testFilePath = path.join(evalDir, 'sample.eval.yaml'); + + await mkdir(evalDir, { recursive: true }); + await writeFile(path.join(repoRoot, '.env'), 'MY_VAR=root\nSHARED_ONLY=from_root\n', 'utf8'); + await writeFile(path.join(evalDir, '.env'), 'MY_VAR=local\nLOCAL_ONLY=from_subfolder\n', 'utf8'); + await writeFile(testFilePath, 'tests: []\n', 'utf8'); + + process.env.MY_VAR = 'shell'; + process.chdir(repoRoot); + + await loadEnvFromHierarchy({ + testFilePath, + repoRoot, + verbose: false, + }); + + expect(process.env.MY_VAR).toBe('shell'); + expect(process.env.SHARED_ONLY).toBe('from_root'); + expect(process.env.LOCAL_ONLY).toBe('from_subfolder'); + }); +}); From fda19173f373e65ac0a8ba77f788060141a8a4f8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:26:37 +0000 Subject: [PATCH 3/4] fix(cli): prefer nearest env file in eval hierarchy Co-authored-by: christso <3115311+christso@users.noreply.github.com> --- apps/cli/test/unit/env.test.ts | 76 ++++++++++++++++------------------ 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/apps/cli/test/unit/env.test.ts b/apps/cli/test/unit/env.test.ts index aa806ac9..6918a59f 100644 --- a/apps/cli/test/unit/env.test.ts +++ b/apps/cli/test/unit/env.test.ts @@ -7,55 +7,40 @@ import { loadEnvFromHierarchy } from '../../src/commands/eval/env.js'; describe('loadEnvFromHierarchy', () => { let originalCwd: string; + let originalEnv: NodeJS.ProcessEnv; let tempDir: string; - let originalMyVar: string | undefined; - let originalSharedOnly: string | undefined; - let originalLocalOnly: string | undefined; beforeEach(async () => { originalCwd = process.cwd(); + originalEnv = { ...process.env }; tempDir = await mkdtemp(path.join(tmpdir(), 'agentv-env-hierarchy-')); - originalMyVar = process.env.MY_VAR; - originalSharedOnly = process.env.SHARED_ONLY; - originalLocalOnly = process.env.LOCAL_ONLY; - - delete process.env.MY_VAR; - delete process.env.SHARED_ONLY; - delete process.env.LOCAL_ONLY; }); afterEach(async () => { process.chdir(originalCwd); - - if (originalMyVar === undefined) { - delete process.env.MY_VAR; - } else { - process.env.MY_VAR = originalMyVar; - } - - if (originalSharedOnly === undefined) { - delete process.env.SHARED_ONLY; - } else { - process.env.SHARED_ONLY = originalSharedOnly; - } - - if (originalLocalOnly === undefined) { - delete process.env.LOCAL_ONLY; - } else { - process.env.LOCAL_ONLY = originalLocalOnly; - } - + process.env = { ...originalEnv }; await rm(tempDir, { recursive: true, force: true }); }); it('lets the nearest .env override parent values while merging missing keys', async () => { + const myVarKey = `AGENTV_ENV_TEST_MY_VAR_${Date.now()}_1`; + const sharedOnlyKey = `AGENTV_ENV_TEST_SHARED_ONLY_${Date.now()}_1`; + const localOnlyKey = `AGENTV_ENV_TEST_LOCAL_ONLY_${Date.now()}_1`; const repoRoot = tempDir; const evalDir = path.join(repoRoot, 'evals', 'foo'); const testFilePath = path.join(evalDir, 'sample.eval.yaml'); await mkdir(evalDir, { recursive: true }); - await writeFile(path.join(repoRoot, '.env'), 'MY_VAR=root\nSHARED_ONLY=from_root\n', 'utf8'); - await writeFile(path.join(evalDir, '.env'), 'MY_VAR=local\nLOCAL_ONLY=from_subfolder\n', 'utf8'); + await writeFile( + path.join(repoRoot, '.env'), + `${myVarKey}=root\n${sharedOnlyKey}=from_root\n`, + 'utf8', + ); + await writeFile( + path.join(evalDir, '.env'), + `${myVarKey}=local\n${localOnlyKey}=from_subfolder\n`, + 'utf8', + ); await writeFile(testFilePath, 'tests: []\n', 'utf8'); process.chdir(repoRoot); @@ -67,22 +52,33 @@ describe('loadEnvFromHierarchy', () => { }); expect(loadedPath).toBe(path.join(evalDir, '.env')); - expect(process.env.MY_VAR).toBe('local'); - expect(process.env.SHARED_ONLY).toBe('from_root'); - expect(process.env.LOCAL_ONLY).toBe('from_subfolder'); + expect(process.env[myVarKey]).toBe('local'); + expect(process.env[sharedOnlyKey]).toBe('from_root'); + expect(process.env[localOnlyKey]).toBe('from_subfolder'); }); it('does not override values already exported in process.env', async () => { + const myVarKey = `AGENTV_ENV_TEST_MY_VAR_${Date.now()}_2`; + const sharedOnlyKey = `AGENTV_ENV_TEST_SHARED_ONLY_${Date.now()}_2`; + const localOnlyKey = `AGENTV_ENV_TEST_LOCAL_ONLY_${Date.now()}_2`; const repoRoot = tempDir; const evalDir = path.join(repoRoot, 'evals', 'foo'); const testFilePath = path.join(evalDir, 'sample.eval.yaml'); await mkdir(evalDir, { recursive: true }); - await writeFile(path.join(repoRoot, '.env'), 'MY_VAR=root\nSHARED_ONLY=from_root\n', 'utf8'); - await writeFile(path.join(evalDir, '.env'), 'MY_VAR=local\nLOCAL_ONLY=from_subfolder\n', 'utf8'); + await writeFile( + path.join(repoRoot, '.env'), + `${myVarKey}=root\n${sharedOnlyKey}=from_root\n`, + 'utf8', + ); + await writeFile( + path.join(evalDir, '.env'), + `${myVarKey}=local\n${localOnlyKey}=from_subfolder\n`, + 'utf8', + ); await writeFile(testFilePath, 'tests: []\n', 'utf8'); - process.env.MY_VAR = 'shell'; + process.env[myVarKey] = 'shell'; process.chdir(repoRoot); await loadEnvFromHierarchy({ @@ -91,8 +87,8 @@ describe('loadEnvFromHierarchy', () => { verbose: false, }); - expect(process.env.MY_VAR).toBe('shell'); - expect(process.env.SHARED_ONLY).toBe('from_root'); - expect(process.env.LOCAL_ONLY).toBe('from_subfolder'); + expect(process.env[myVarKey]).toBe('shell'); + expect(process.env[sharedOnlyKey]).toBe('from_root'); + expect(process.env[localOnlyKey]).toBe('from_subfolder'); }); }); From 7cd0007c0bbb4edd867e05f208048ba30d6adca6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 13 Mar 2026 20:51:00 +0000 Subject: [PATCH 4/4] Co-authored-by: christso <3115311+christso@users.noreply.github.com> --- apps/cli/test/eval.integration.test.ts | 159 +++++++++++++----- apps/cli/test/fixtures/mock-run-evaluation.ts | 2 + packages/core/src/evaluation/evaluate.ts | 28 +-- 3 files changed, 137 insertions(+), 52 deletions(-) diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts index f98134c6..0a9ed4a5 100644 --- a/apps/cli/test/eval.integration.test.ts +++ b/apps/cli/test/eval.integration.test.ts @@ -1,4 +1,4 @@ -import { afterEach, describe, expect, it } from 'bun:test'; +import { describe, expect, it } from 'bun:test'; import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import path from 'node:path'; @@ -75,6 +75,64 @@ tests: return { baseDir, suiteDir, testFilePath, diagnosticsPath } satisfies EvalFixture; } +async function createNestedEnvFixture(): Promise { + const baseDir = await mkdtemp(path.join(tmpdir(), 'agentv-cli-nested-env-test-')); + const suiteDir = path.join(baseDir, 'suite'); + const evalDir = path.join(suiteDir, 'evals', 'foo'); + await mkdir(evalDir, { recursive: true }); + + const agentvDir = path.join(suiteDir, '.agentv'); + await mkdir(agentvDir, { recursive: true }); + + const targetsPath = path.join(agentvDir, 'targets.yaml'); + const targetsContent = `$schema: agentv-targets-v2.2 +targets: + - name: default + provider: mock +`; + await writeFile(targetsPath, targetsContent, 'utf8'); + + const testFilePath = path.join(evalDir, 'sample.test.yaml'); + const testFileContent = `description: CLI nested env integration test + +tests: + - id: case-alpha + criteria: System responds with alpha + input: + - role: user + content: | + Please respond with alpha + expected_output: + - role: assistant + content: "Alpha" + - id: case-beta + criteria: System responds with beta + input: + - role: user + content: | + Please respond with beta + expected_output: + - role: assistant + content: "Beta" +`; + await writeFile(testFilePath, testFileContent, 'utf8'); + + await writeFile( + path.join(suiteDir, '.env'), + 'CLI_ENV_SAMPLE=from-root\nCLI_ENV_ROOT_ONLY=from-root\n', + 'utf8', + ); + await writeFile( + path.join(evalDir, '.env'), + 'CLI_ENV_SAMPLE=from-local\nCLI_ENV_LOCAL_ONLY=from-local\n', + 'utf8', + ); + + const diagnosticsPath = path.join(baseDir, 'diagnostics.json'); + + return { baseDir, suiteDir, testFilePath, diagnosticsPath } satisfies EvalFixture; +} + async function runCli( fixture: EvalFixture, args: readonly string[], @@ -82,9 +140,11 @@ async function runCli( ): Promise<{ stdout: string; stderr: string }> { const baseEnv: Record = { ...process.env }; baseEnv.CLI_ENV_SAMPLE = undefined; + baseEnv.CLI_ENV_ROOT_ONLY = undefined; + baseEnv.CLI_ENV_LOCAL_ONLY = undefined; try { - const result = await execa('bun', [CLI_ENTRY, ...args], { + const result = await execa('bun', ['--no-env-file', CLI_ENTRY, ...args], { cwd: fixture.suiteDir, env: { ...baseEnv, @@ -122,50 +182,69 @@ async function readJsonLines(filePath: string): Promise { } async function readDiagnostics(fixture: EvalFixture): Promise> { - const raw = await readFile(fixture.diagnosticsPath, 'utf8'); - return JSON.parse(raw) as Record; -} - -const fixtures: string[] = []; - -afterEach(async () => { - while (fixtures.length > 0) { - const dir = fixtures.pop(); - if (dir) { - await rm(dir, { recursive: true, force: true }); + for (let attempt = 0; attempt < 20; attempt++) { + try { + const raw = await readFile(fixture.diagnosticsPath, 'utf8'); + return JSON.parse(raw) as Record; + } catch (error) { + if ((error as NodeJS.ErrnoException).code !== 'ENOENT' || attempt === 19) { + throw error; + } + await new Promise((resolve) => setTimeout(resolve, 50)); } } -}); + + throw new Error(`Missing diagnostics file: ${fixture.diagnosticsPath}`); +} describe('agentv eval CLI', () => { it('writes results, summary, and prompt dumps using default directories', async () => { const fixture = await createFixture(); - fixtures.push(fixture.baseDir); - - const { stdout } = await runCli(fixture, ['eval', fixture.testFilePath, '--verbose']); - - // Don't check stderr - it may contain stack traces or other diagnostics - expect(stdout).toContain('Using target (test-file): file-target [provider=mock]'); - expect(stdout).toContain('Mean score: 0.750'); - // Std deviation is an implementation detail - don't check it - - const outputPath = extractOutputPath(stdout); - expect(outputPath).toContain(`${path.sep}.agentv${path.sep}results${path.sep}`); - - const results = await readJsonLines(outputPath); - expect(results).toHaveLength(2); - const [firstResult, secondResult] = results as Array>; - expect(firstResult.test_id).toBe('case-alpha'); - expect(secondResult.test_id).toBe('case-beta'); - - const diagnostics = await readDiagnostics(fixture); - expect(diagnostics).toMatchObject({ - target: 'file-target', - agentTimeoutMs: null, - envSample: 'from-dotenv', - resultCount: 2, - }); + try { + const { stdout } = await runCli(fixture, ['eval', fixture.testFilePath, '--verbose']); + + // Don't check stderr - it may contain stack traces or other diagnostics + expect(stdout).toContain('Using target (test-file): file-target [provider=mock]'); + expect(stdout).toContain('Mean score: 0.750'); + // Std deviation is an implementation detail - don't check it + + const outputPath = extractOutputPath(stdout); + expect(outputPath).toContain(`${path.sep}.agentv${path.sep}results${path.sep}`); + + const results = await readJsonLines(outputPath); + expect(results).toHaveLength(2); + const [firstResult, secondResult] = results as Array>; + expect(firstResult.test_id).toBe('case-alpha'); + expect(secondResult.test_id).toBe('case-beta'); + + const diagnostics = await readDiagnostics(fixture); + expect(diagnostics).toMatchObject({ + target: 'file-target', + agentTimeoutMs: null, + envSample: 'from-dotenv', + resultCount: 2, + }); + + // Prompt dump feature has been removed, so we no longer check for it + } finally { + await rm(fixture.baseDir, { recursive: true, force: true }); + } + }); - // Prompt dump feature has been removed, so we no longer check for it + it('loads the nearest .env first and uses parent .env only for missing keys', async () => { + const fixture = await createNestedEnvFixture(); + try { + await runCli(fixture, ['eval', fixture.testFilePath, '--verbose']); + + const diagnostics = await readDiagnostics(fixture); + expect(diagnostics).toMatchObject({ + envSample: 'from-local', + envRootOnly: 'from-root', + envLocalOnly: 'from-local', + resultCount: 2, + }); + } finally { + await rm(fixture.baseDir, { recursive: true, force: true }); + } }); }); diff --git a/apps/cli/test/fixtures/mock-run-evaluation.ts b/apps/cli/test/fixtures/mock-run-evaluation.ts index bc44f83a..cf0fac43 100644 --- a/apps/cli/test/fixtures/mock-run-evaluation.ts +++ b/apps/cli/test/fixtures/mock-run-evaluation.ts @@ -78,6 +78,8 @@ async function maybeWriteDiagnostics( testId: options.testId ?? null, useCache: options.useCache ?? false, envSample: process.env.CLI_ENV_SAMPLE ?? null, + envRootOnly: process.env.CLI_ENV_ROOT_ONLY ?? null, + envLocalOnly: process.env.CLI_ENV_LOCAL_ONLY ?? null, resultCount: results.length, } satisfies Record; diff --git a/packages/core/src/evaluation/evaluate.ts b/packages/core/src/evaluation/evaluate.ts index 2ddee007..871b2e0c 100644 --- a/packages/core/src/evaluation/evaluate.ts +++ b/packages/core/src/evaluation/evaluate.ts @@ -238,8 +238,13 @@ export async function evaluate(config: EvalConfig): Promise { const gitRoot = await findGitRoot(process.cwd()); const repoRoot = gitRoot ?? process.cwd(); - // Load .env files from hierarchy (closest to cwd first) - await loadEnvHierarchy(repoRoot); + const testFilePath = config.specFile + ? path.resolve(config.specFile) + : path.join(process.cwd(), '__programmatic__.yaml'); + + // Load .env files from the eval file hierarchy so nested eval-local .env + // files participate even when the command is launched from a parent folder. + await loadEnvHierarchy(repoRoot, testFilePath); let resolvedTarget: ResolvedTarget; let taskProvider: ReturnType | undefined; @@ -263,18 +268,15 @@ export async function evaluate(config: EvalConfig): Promise { } let evalCases: readonly EvalTest[] | EvalTest[]; - let testFilePath: string; if (config.specFile) { // File-based mode: load from YAML - testFilePath = path.resolve(config.specFile); evalCases = await loadTests(testFilePath, repoRoot, { verbose: config.verbose, filter: config.filter, }); } else { // Inline mode: convert EvalTestInput[] to EvalTest[] - testFilePath = path.join(process.cwd(), '__programmatic__.yaml'); evalCases = (config.tests ?? []).map((test): EvalTest => { const input = typeof test.input === 'string' @@ -432,13 +434,13 @@ async function discoverDefaultTarget(repoRoot: string): Promise { +async function loadEnvHierarchy(repoRoot: string, startPath: string): Promise { const { readFileSync } = await import('node:fs'); - const cwd = process.cwd(); - const chain = buildDirectoryChain(path.join(cwd, '_placeholder'), repoRoot); + const chain = buildDirectoryChain(startPath, repoRoot); // Collect .env files from closest to root const envFiles: string[] = []; @@ -447,8 +449,10 @@ async function loadEnvHierarchy(repoRoot: string): Promise { if (existsSync(envPath)) envFiles.push(envPath); } - // Load from root to child so child values take precedence - for (let i = envFiles.length - 1; i >= 0; i--) { + // buildDirectoryChain returns directories from closest to farthest. Loading in + // that same order means nearer .env files set shared keys first, while parent + // .env files loaded afterward only backfill keys that are still missing. + for (let i = 0; i < envFiles.length; i++) { try { const content = readFileSync(envFiles[i], 'utf8'); for (const line of content.split('\n')) {