microsoft · richardpark-msft · Mar 21, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
@@ -897,6 +897,57 @@ config:
   max_attempts: 3  # Retry failed graders up to 3 times (default: 1, no retries)
 ```
 
+### Git Resources
+
+Task inputs can reference git repositories as resources, checked out at a specific commit. This is useful for testing against real codebases without manually preparing fixture directories.
+
+```yaml
+# Task YAML
+inputs:
+  prompt: "Fix the bug in server.go"
+  workdir: my-repo                  # agent starts inside this subdirectory
+  files:
+    # Existing resource types still work:
+    - path: helpers/utils.js              # file from context_dir
+    - content: "package main\n..."        # inline content
+
+  repos:
+    # Git resource — checkout a commit from a local repo
+    - type: worktree                 # required (currently only worktree is supported)
+      source: /path/to/local/repo    # required for worktree strategy
+      commit: abc123def
+      dest: my-repo                  # optional: subdirectory in workspace
+```
+
+**`workdir`** (optional): A relative path within the workspace to use as the agent's working directory. When a git resource is checked out into a subdirectory via `dest`, set `workdir` to that subdirectory so the agent starts inside the repo. Must not escape the workspace root.
+
+    **Strategy support:**
+
+| Strategy | Use Case | Mechanism |
+|---|---|---|
+| `worktree` | Already inside the target repo; very cheap, no network | `git worktree add` |
+
+**Fields:**
+
+| Field | Required | Description |
+|---|---|---|
+| `type` | Yes | Currently only `worktree` |
+| `source` | Yes | Local folder where the git repository resides |
+| `commit` | No | Commit SHA, branch, or tag. Defaults to HEAD |
+| `dest` | No | Subdirectory name in workspace. Omit to use workspace root |
+
+**Examples:**
+
+```yaml
+# Worktree strategy — cheap checkout from local repo
+- type: worktree
+  source: /path/to/local/repo
+  commit: feature-branch
+  dest: feature
+```
+
+Worktrees are automatically cleaned up after each task via `git worktree remove`.
+
 When a grader fails, waza will retry the task execution up to `max_attempts` times. The evaluation outcome includes an `attempts` field showing how many executions were needed to pass. This is useful for handling transient failures in external services or non-deterministic grader behavior.
 
 **Output:** JSON results include `attempts` per task showing the number of executions performed.

@@ -272,6 +272,7 @@ func TestNewTaskFromPromptCommand_EndToEndCreatesTaskFile(t *testing.T) {
 	require.NoError(t, err)
 
 	expected := &models.TestCase{
+		Path:        outputPath,
 		DisplayName: "auto-generated",
 		TestID:      "auto-generated",
 		Tags:        []string{"auto-generated"},

@@ -25,8 +25,14 @@ type CopilotEngine struct {
 
 	startOnce sync.Once
 
-	workspacesMu sync.Mutex
-	workspaces   []string // workspaces to clean up at Shutdown
+	// resourcesMu protects workspaces and worktrees
+	resourcesMu sync.Mutex
+	// workspaces are temp folders - each test run gets a unique one, and it's removed at Shutdown.
+	workspaces []string
+	// gitResources that will be cleaned up at Shutdown.
+	// NOTE: in some cases there is some bookkeeping information (like with git workspaces) so cleanup
+	// must be called before the workspace is deleted.
+	gitResources []GitResource
 
 	// sessions maps session IDs to copilotSessions
 	sessions   map[string]CopilotSession
@@ -137,7 +143,7 @@ func (e *CopilotEngine) Execute(ctx context.Context, req *ExecutionRequest) (*Ex
 
 	start := time.Now()
 
-	workspaceDir, err := e.setupWorkspace(req.Resources)
+	workspaceDir, err := e.setupWorkspace(ctx, req.Resources, req.GitResources)
 
 	if err != nil {
 		return nil, err
@@ -289,23 +295,32 @@ func (e *CopilotEngine) doShutdown(ctx context.Context) error {
 		return fmt.Errorf("failed to stop client: %w", err)
 	}
 
-	// remove the workspace folders - should be safe now that all the copilot sessions are shut down
-	// and the tests are complete.
-	workspaces := func() []string {
-		e.workspacesMu.Lock()
-		defer e.workspacesMu.Unlock()
+	workspaces, gitResources := func() ([]string, []GitResource) {
+		e.resourcesMu.Lock()
+		defer e.resourcesMu.Unlock()
+		worktrees := e.gitResources
+		e.gitResources = nil
+
 		workspaces := e.workspaces
 		e.workspaces = nil
-		return workspaces
+
+		return workspaces, worktrees
 	}()
 
+	// Clean up worktrees before removing workspaces (worktrees may be inside workspace dirs)
+	for _, wt := range gitResources {
+		if err := wt.Cleanup(ctx); err != nil {
+			slog.Warn("failed to cleanup git resource", "error", err)
+		}
+	}
+
+	// remove the workspace folders - should be safe now that all the copilot sessions are shut down
+	// and the tests are complete.
 	for _, ws := range workspaces {
-		if ws != "" {
-			if err := os.RemoveAll(ws); err != nil {
-				// errors here probably indicate some issue with our code continuing to lock files
-				// even after tests have completed...
-				slog.Warn("failed to cleanup stale workspace", "path", ws, "error", err)
-			}
+		if err := os.RemoveAll(ws); err != nil {
+			// errors here probably indicate some issue with our code continuing to lock files
+			// even after tests have completed...
+			slog.Warn("failed to cleanup stale workspace", "path", ws, "error", err)
 		}
 	}
 
@@ -376,22 +391,32 @@ func (*CopilotEngine) getSkillDirs(cwd string, req *ExecutionRequest) []string {
 	return skillDirs
 }
 
-func (e *CopilotEngine) setupWorkspace(resources []ResourceFile) (string, error) {
+func (e *CopilotEngine) setupWorkspace(ctx context.Context, resources []ResourceFile, gitResources []models.GitResource) (string, error) {
 	workspaceDir, err := os.MkdirTemp("", "waza-*")
 
 	if err != nil {
 		return "", fmt.Errorf("failed to create temp workspace: %w", err)
 	}
 
-	e.workspacesMu.Lock()
+	e.resourcesMu.Lock()
 	e.workspaces = append(e.workspaces, workspaceDir)
-	e.workspacesMu.Unlock()
+	e.resourcesMu.Unlock()
 
 	// Write resource files to workspace
 	if err := setupWorkspaceResources(workspaceDir, resources); err != nil {
 		return "", fmt.Errorf("failed to setup resources at workspace %s: %w", workspaceDir, err)
 	}
 
+	wts, err := CloneGitResources(ctx, gitResources, workspaceDir)
+	if err != nil {
+		return "", err
+	}
+	if len(wts) > 0 {
+		e.resourcesMu.Lock()
+		e.gitResources = append(e.gitResources, wts...)
+		e.resourcesMu.Unlock()
+	}
+
 	return workspaceDir, nil
 }
 

@@ -2,6 +2,8 @@ package execution
 
 import (
 	"context"
+	"fmt"
+	"path/filepath"
 	"strings"
 	"time"
 
@@ -30,10 +32,11 @@ type AgentEngine interface {
 
 // ExecutionRequest represents a test execution request
 type ExecutionRequest struct {
-	ModelID   string
-	Message   string
-	Context   map[string]any
-	Resources []ResourceFile
+	ModelID      string
+	Message      string
+	Context      map[string]any
+	Resources    []ResourceFile
+	GitResources []models.GitResource
 
 	SessionID string
 	SkillName string
@@ -54,6 +57,24 @@ type ResourceFile struct {
 	Content []byte
 }
 
+// ResolveWorkDir returns the effective working directory for the agent session.
+// If workDir is empty, the workspace root is returned. Otherwise workDir is
+// joined to the workspace root after verifying it doesn't escape via path
+// traversal.
+func ResolveWorkDir(workspaceDir, workDir string) (string, error) {
+	if workDir == "" {
+		return workspaceDir, nil
+	}
+
+	resolved := filepath.Join(workspaceDir, workDir)
+	// Prevent traversal outside the workspace (e.g. workDir = "../../etc")
+	rel, err := filepath.Rel(workspaceDir, resolved)
+	if err != nil || strings.HasPrefix(rel, "..") {
+		return "", fmt.Errorf("workdir %q escapes the workspace", workDir)
+	}
+	return resolved, nil
+}
+
 type SkillInvocation struct {
 	// Name of the invoked skill
 	Name string

@@ -9,6 +9,7 @@ import (
 	"github.com/microsoft/waza/internal/models"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	gomock "go.uber.org/mock/gomock"
 )
 
 // SpyEngine wraps an AgentEngine and tracks Shutdown calls.
@@ -172,21 +173,28 @@ func TestCopilotEngine_Shutdown_Idempotent(t *testing.T) {
 }
 
 func TestCopilotEngine_Shutdown_CleansWorkspace(t *testing.T) {
+	ctrl := gomock.NewController(t)
+	gr := NewMockGitResource(ctrl)
+	gr.EXPECT().Cleanup(gomock.Any())
+
 	engine := NewCopilotEngineBuilder("test-model", nil).Build()
 
 	// Simulate a workspace existing (without running the full SDK)
-	tmpDir := t.TempDir()
-	engine.workspacesMu.Lock()
-	engine.workspaces = append(engine.workspaces, tmpDir)
-	engine.workspacesMu.Unlock()
+	tmpWorkspaceDir := t.TempDir()
+
+	engine.resourcesMu.Lock()
+	engine.workspaces = append(engine.workspaces, tmpWorkspaceDir)
+	engine.gitResources = append(engine.gitResources, gr)
+	engine.resourcesMu.Unlock()
 
 	err := engine.Shutdown(context.Background())
 	require.NoError(t, err)
 
 	// After shutdown, workspace should be cleared
-	engine.workspacesMu.Lock()
-	defer engine.workspacesMu.Unlock()
+	engine.resourcesMu.Lock()
+	defer engine.resourcesMu.Unlock()
 	require.Empty(t, engine.workspaces)
+	require.Empty(t, engine.gitResources)
 }
 
 func TestCopilotEngine_Shutdown_WithCancelledContext(t *testing.T) {

@@ -1,3 +1,4 @@
 package execution
 
 //go:generate go tool mockgen -package execution -destination copilot_client_wrapper_mocks_test.go . CopilotSession,CopilotClient
+//go:generate go tool mockgen -package execution -destination execution_mocks.go . GitResource