From d3eb11e555313a862732c0afdc648b0db70cae87 Mon Sep 17 00:00:00 2001 From: Shayne Boyer Date: Thu, 5 Mar 2026 11:00:12 -0500 Subject: [PATCH 1/9] feat: add eval coverage grid generator #82 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 12 + cmd/waza/cmd_coverage.go | 384 ++++++++++++++++++++++++ cmd/waza/cmd_coverage_test.go | 125 ++++++++ cmd/waza/root.go | 1 + site/src/content/docs/reference/cli.mdx | 30 ++ 5 files changed, 552 insertions(+) create mode 100644 cmd/waza/cmd_coverage.go create mode 100644 cmd/waza/cmd_coverage_test.go diff --git a/README.md b/README.md index af494dc1..2d108e82 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,9 @@ waza run examples/code-explainer/eval.yaml --context-dir examples/code-explainer # Compare results across models waza compare results-gpt4.json results-sonnet.json +# Generate eval coverage grid +waza coverage --format markdown + # Count tokens in skill files waza tokens count skills/ @@ -301,6 +304,15 @@ Compare results from multiple evaluation runs side by side — per-task score de |------|-------|-------------| | `--format ` | `-f` | Output format: `table` or `json` (default: `table`) | +### `waza coverage [root]` + +Generate a skill-to-eval coverage grid showing which skills are fully covered, partially covered, or missing evals. + +| Flag | Short | Description | +|------|-------|-------------| +| `--format ` | `-f` | Output format: `text`, `markdown`, or `json` (default: `text`) | +| `--discover ` | | Additional directory to scan for skills/evals (repeatable) | + ### `waza cache clear` Clear all cached evaluation results to force re-execution on the next run. diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go new file mode 100644 index 00000000..914dcf78 --- /dev/null +++ b/cmd/waza/cmd_coverage.go @@ -0,0 +1,384 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "text/tabwriter" + + "github.com/microsoft/waza/internal/models" + "github.com/microsoft/waza/internal/skill" + "github.com/spf13/cobra" + "gopkg.in/yaml.v3" +) + +type coverageSkillRow struct { + Skill string `json:"skill"` + Tasks int `json:"tasks"` + Graders []string `json:"graders"` + Coverage string `json:"coverage"` +} + +type coverageReport struct { + TotalSkills int `json:"total_skills"` + Covered int `json:"covered"` + Partial int `json:"partial"` + Uncovered int `json:"uncovered"` + CoveragePct float64 `json:"coverage_pct"` + Skills []coverageSkillRow `json:"skills"` +} + +type evalSpecLite struct { + Skill string `yaml:"skill"` + Tasks []string `yaml:"tasks"` + Graders []models.GraderConfig `yaml:"graders"` +} + +func newCoverageCommand() *cobra.Command { + var outputFormat string + var discoverPaths []string + + cmd := &cobra.Command{ + Use: "coverage [root]", + Short: "Generate an eval coverage grid for discovered skills", + Long: `Generate an eval coverage grid showing which skills have eval coverage. + +By default, this command scans: + - skills/ and .github/skills for SKILL.md files + - evals/ and skill directories for eval.yaml files + +Use --discover to add additional directories to scan for eval and skill files.`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root := "." + if len(args) > 0 { + root = args[0] + } + + report, err := buildCoverageReport(root, discoverPaths) + if err != nil { + return err + } + + switch outputFormat { + case "text": + renderCoverageText(cmd.OutOrStdout(), report) + case "markdown": + renderCoverageMarkdown(cmd.OutOrStdout(), report) + case "json": + if err := renderCoverageJSON(cmd.OutOrStdout(), report); err != nil { + return err + } + default: + return fmt.Errorf("unsupported format %q: must be text, markdown, or json", outputFormat) + } + return nil + }, + } + + cmd.Flags().StringVarP(&outputFormat, "format", "f", "text", "Output format: text, markdown, or json") + cmd.Flags().StringArrayVar(&discoverPaths, "discover", nil, "Additional directories to scan for skills/evals (repeatable)") + return cmd +} + +func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, error) { + absRoot, err := filepath.Abs(root) + if err != nil { + return nil, fmt.Errorf("resolving root path: %w", err) + } + if _, err := os.Stat(absRoot); err != nil { + return nil, fmt.Errorf("invalid root path %q: %w", root, err) + } + + skillPaths, err := discoverSkillFiles(absRoot, discoverPaths) + if err != nil { + return nil, err + } + if len(skillPaths) == 0 { + return nil, fmt.Errorf("no SKILL.md files found under %s", absRoot) + } + + evalBySkill := make(map[string][]string) + tasksBySkill := make(map[string]int) + gradersBySkill := make(map[string]map[string]struct{}) + + evalPaths, err := discoverEvalFiles(absRoot, skillPaths, discoverPaths) + if err != nil { + return nil, err + } + + for _, evalPath := range evalPaths { + spec, parseErr := parseEvalSpec(evalPath) + if parseErr != nil { + continue + } + skillName := strings.TrimSpace(spec.Skill) + if skillName == "" { + skillName = inferSkillNameFromEvalPath(evalPath) + } + if skillName == "" { + continue + } + evalBySkill[skillName] = append(evalBySkill[skillName], evalPath) + tasksBySkill[skillName] += len(spec.Tasks) + if _, ok := gradersBySkill[skillName]; !ok { + gradersBySkill[skillName] = make(map[string]struct{}) + } + for _, g := range spec.Graders { + kind := strings.TrimSpace(string(g.Kind)) + if kind != "" { + gradersBySkill[skillName][kind] = struct{}{} + } + } + } + + skillNames := make([]string, 0, len(skillPaths)) + for name := range skillPaths { + skillNames = append(skillNames, name) + } + sort.Strings(skillNames) + + report := &coverageReport{ + TotalSkills: len(skillNames), + Skills: make([]coverageSkillRow, 0, len(skillNames)), + } + + for _, name := range skillNames { + graderSet := gradersBySkill[name] + graders := sortedKeys(graderSet) + tasks := tasksBySkill[name] + hasEval := len(evalBySkill[name]) > 0 + + coverage := "❌ None" + switch { + case !hasEval: + report.Uncovered++ + case tasks > 0 && len(graders) >= 2: + coverage = "✅ Full" + report.Covered++ + default: + coverage = "⚠️ Partial" + report.Partial++ + } + + report.Skills = append(report.Skills, coverageSkillRow{ + Skill: name, + Tasks: tasks, + Graders: graders, + Coverage: coverage, + }) + } + + if report.TotalSkills > 0 { + report.CoveragePct = float64(report.Covered+report.Partial) * 100 / float64(report.TotalSkills) + } + return report, nil +} + +func discoverSkillFiles(root string, discoverPaths []string) (map[string]string, error) { + searchRoots := []string{ + filepath.Join(root, "skills"), + filepath.Join(root, ".github", "skills"), + } + for _, p := range discoverPaths { + searchRoots = append(searchRoots, resolvePath(root, p)) + } + + found := make(map[string]string) + seenPaths := make(map[string]struct{}) + + for _, sr := range searchRoots { + if !isDir(sr) { + continue + } + err := filepath.WalkDir(sr, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() { + name := d.Name() + if strings.HasPrefix(name, ".") || name == "node_modules" || name == "vendor" { + return fs.SkipDir + } + return nil + } + if d.Name() != "SKILL.md" { + return nil + } + absPath, _ := filepath.Abs(path) + if _, ok := seenPaths[absPath]; ok { + return nil + } + seenPaths[absPath] = struct{}{} + skillName := parseSkillName(absPath) + if skillName == "" { + skillName = filepath.Base(filepath.Dir(absPath)) + } + if _, exists := found[skillName]; !exists { + found[skillName] = absPath + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("walking skill directory %s: %w", sr, err) + } + } + + return found, nil +} + +func discoverEvalFiles(root string, skillPaths map[string]string, discoverPaths []string) ([]string, error) { + searchRoots := []string{filepath.Join(root, "evals")} + for _, p := range discoverPaths { + searchRoots = append(searchRoots, resolvePath(root, p)) + } + + candidates := make(map[string]struct{}) + + for _, evalRoot := range searchRoots { + if !isDir(evalRoot) { + continue + } + if err := filepath.WalkDir(evalRoot, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() { + name := d.Name() + if strings.HasPrefix(name, ".") || name == "node_modules" || name == "vendor" { + return fs.SkipDir + } + return nil + } + if d.Name() == "eval.yaml" { + absPath, _ := filepath.Abs(path) + candidates[absPath] = struct{}{} + } + return nil + }); err != nil { + return nil, fmt.Errorf("walking eval directory %s: %w", evalRoot, err) + } + } + + for _, skillPath := range skillPaths { + skillDir := filepath.Dir(skillPath) + for _, rel := range []string{"eval.yaml", filepath.Join("evals", "eval.yaml"), filepath.Join("tests", "eval.yaml")} { + p := filepath.Join(skillDir, rel) + if isFile(p) { + absPath, _ := filepath.Abs(p) + candidates[absPath] = struct{}{} + } + } + } + + evalPaths := make([]string, 0, len(candidates)) + for path := range candidates { + evalPaths = append(evalPaths, path) + } + sort.Strings(evalPaths) + return evalPaths, nil +} + +func parseEvalSpec(evalPath string) (*evalSpecLite, error) { + data, err := os.ReadFile(evalPath) + if err != nil { + return nil, err + } + var spec evalSpecLite + if err := yaml.Unmarshal(data, &spec); err != nil { + return nil, err + } + return &spec, nil +} + +func parseSkillName(path string) string { + data, err := os.ReadFile(path) + if err != nil { + return "" + } + var sk skill.Skill + if err := sk.UnmarshalText(data); err != nil { + return "" + } + return strings.TrimSpace(sk.Frontmatter.Name) +} + +func inferSkillNameFromEvalPath(evalPath string) string { + parent := filepath.Base(filepath.Dir(evalPath)) + switch parent { + case "evals", "tests": + return filepath.Base(filepath.Dir(filepath.Dir(evalPath))) + default: + return parent + } +} + +func renderCoverageText(w io.Writer, report *coverageReport) { + fmt.Fprintln(w, "📊 Eval Coverage Grid") //nolint:errcheck + fmt.Fprintf(w, "Coverage: %.1f%% (%d/%d)\n\n", report.CoveragePct, report.Covered+report.Partial, report.TotalSkills) //nolint:errcheck + + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, "Skill\tTasks\tGraders\tCoverage") //nolint:errcheck + fmt.Fprintln(tw, "-----\t-----\t-------\t--------") //nolint:errcheck + for _, row := range report.Skills { + graders := "—" + if len(row.Graders) > 0 { + graders = strings.Join(row.Graders, ", ") + } + fmt.Fprintf(tw, "%s\t%d\t%s\t%s\n", row.Skill, row.Tasks, graders, row.Coverage) //nolint:errcheck + } + _ = tw.Flush() +} + +func renderCoverageMarkdown(w io.Writer, report *coverageReport) { + fmt.Fprintln(w, "📊 Eval Coverage Grid") //nolint:errcheck + fmt.Fprintln(w, "| Skill | Tasks | Graders | Coverage |") //nolint:errcheck + fmt.Fprintln(w, "|-------|-------|---------|----------|") //nolint:errcheck + for _, row := range report.Skills { + graders := "—" + if len(row.Graders) > 0 { + graders = strings.Join(row.Graders, ", ") + } + fmt.Fprintf(w, "| %s | %d | %s | %s |\n", row.Skill, row.Tasks, graders, row.Coverage) //nolint:errcheck + } +} + +func renderCoverageJSON(w io.Writer, report *coverageReport) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(report) +} + +func sortedKeys(set map[string]struct{}) []string { + if len(set) == 0 { + return nil + } + keys := make([]string, 0, len(set)) + for k := range set { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +func resolvePath(root, p string) string { + if filepath.IsAbs(p) { + return p + } + return filepath.Join(root, p) +} + +func isDir(path string) bool { + info, err := os.Stat(path) + return err == nil && info.IsDir() +} + +func isFile(path string) bool { + info, err := os.Stat(path) + return err == nil && !info.IsDir() +} diff --git a/cmd/waza/cmd_coverage_test.go b/cmd/waza/cmd_coverage_test.go new file mode 100644 index 00000000..cbf5ade2 --- /dev/null +++ b/cmd/waza/cmd_coverage_test.go @@ -0,0 +1,125 @@ +package main + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildCoverageReport_NoEvals(t *testing.T) { + root := t.TempDir() + writeSkill(t, root, filepath.Join("skills", "alpha"), "alpha") + writeSkill(t, root, filepath.Join("skills", "beta"), "beta") + + report, err := buildCoverageReport(root, nil) + require.NoError(t, err) + + assert.Equal(t, 2, report.TotalSkills) + assert.Equal(t, 0, report.Covered) + assert.Equal(t, 0, report.Partial) + assert.Equal(t, 2, report.Uncovered) + assert.Equal(t, "❌ None", report.Skills[0].Coverage) + assert.Equal(t, "❌ None", report.Skills[1].Coverage) +} + +func TestBuildCoverageReport_PartialAndFull(t *testing.T) { + root := t.TempDir() + writeSkill(t, root, filepath.Join("skills", "partial-skill"), "partial-skill") + writeSkill(t, root, filepath.Join(".github", "skills", "full-skill"), "full-skill") + + writeEval(t, root, filepath.Join("evals", "partial-skill", "eval.yaml"), ` +skill: partial-skill +tasks: + - tasks/*.yaml +graders: + - type: prompt + name: judge +`) + writeEval(t, root, filepath.Join("custom", "full-skill", "eval.yaml"), ` +skill: full-skill +tasks: + - tasks/a.yaml + - tasks/b.yaml +graders: + - type: prompt + name: judge + - type: file + name: files +`) + + report, err := buildCoverageReport(root, []string{"custom"}) + require.NoError(t, err) + + assert.Equal(t, 2, report.TotalSkills) + assert.Equal(t, 1, report.Covered) + assert.Equal(t, 1, report.Partial) + assert.Equal(t, 0, report.Uncovered) + assert.InDelta(t, 100.0, report.CoveragePct, 0.1) + + rows := map[string]coverageSkillRow{} + for _, row := range report.Skills { + rows[row.Skill] = row + } + + assert.Equal(t, "⚠️ Partial", rows["partial-skill"].Coverage) + assert.Equal(t, 1, rows["partial-skill"].Tasks) + assert.Equal(t, []string{"prompt"}, rows["partial-skill"].Graders) + + assert.Equal(t, "✅ Full", rows["full-skill"].Coverage) + assert.Equal(t, 2, rows["full-skill"].Tasks) + assert.Equal(t, []string{"file", "prompt"}, rows["full-skill"].Graders) +} + +func TestRenderCoverageMarkdown(t *testing.T) { + report := &coverageReport{ + TotalSkills: 2, + Skills: []coverageSkillRow{ + {Skill: "alpha", Tasks: 1, Graders: []string{"prompt"}, Coverage: "⚠️ Partial"}, + {Skill: "beta", Tasks: 2, Graders: []string{"file", "prompt"}, Coverage: "✅ Full"}, + }, + } + + var buf bytes.Buffer + renderCoverageMarkdown(&buf, report) + out := buf.String() + + assert.Contains(t, out, "📊 Eval Coverage Grid") + assert.Contains(t, out, "| Skill | Tasks | Graders | Coverage |") + assert.Contains(t, out, "| alpha | 1 | prompt | ⚠️ Partial |") + assert.Contains(t, out, "| beta | 2 | file, prompt | ✅ Full |") +} + +func TestRootCommand_HasCoverageSubcommand(t *testing.T) { + root := newRootCommand() + found := false + for _, c := range root.Commands() { + if c.Name() == "coverage" { + found = true + break + } + } + assert.True(t, found, "root command should have 'coverage' subcommand") +} + +func writeSkill(t *testing.T, root, relDir, skillName string) { + t.Helper() + dir := filepath.Join(root, relDir) + require.NoError(t, os.MkdirAll(dir, 0o755)) + content := `--- +name: ` + skillName + ` +description: "test skill" +--- +` + require.NoError(t, os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte(content), 0o644)) +} + +func writeEval(t *testing.T, root, relPath, content string) { + t.Helper() + absPath := filepath.Join(root, relPath) + require.NoError(t, os.MkdirAll(filepath.Dir(absPath), 0o755)) + require.NoError(t, os.WriteFile(absPath, []byte(content), 0o644)) +} diff --git a/cmd/waza/root.go b/cmd/waza/root.go index a53d1475..ac2b6e29 100644 --- a/cmd/waza/root.go +++ b/cmd/waza/root.go @@ -40,6 +40,7 @@ performance against predefined test cases.`, cmd.AddCommand(newInitCommand()) cmd.AddCommand(tokens.NewCommand()) cmd.AddCommand(newCompareCommand()) + cmd.AddCommand(newCoverageCommand()) cmd.AddCommand(dev.NewCommand()) cmd.AddCommand(newMetadataCommand(cmd)) cmd.AddCommand(newCheckCommand()) diff --git a/site/src/content/docs/reference/cli.mdx b/site/src/content/docs/reference/cli.mdx index eb348951..6a473128 100644 --- a/site/src/content/docs/reference/cli.mdx +++ b/site/src/content/docs/reference/cli.mdx @@ -295,6 +295,36 @@ waza compare gpt4.json sonnet.json opus.json waza compare results-*.json --format json ``` +## waza coverage + +Generate an eval coverage grid for discovered skills. + +```bash +waza coverage [root] +``` + +### Arguments + +| Argument | Description | +|----------|-------------| +| `[root]` | Root directory to scan (default: current directory) | + +### Flags + +| Flag | Description | +|------|-------------| +| `--format` | Output format: `text` (default), `markdown`, `json` | +| `--discover` | Additional directories to scan for skills/evals (repeatable) | + +### Examples + +```bash +waza coverage +waza coverage --format markdown +waza coverage --format json +waza coverage --discover custom-evals --discover plugins +``` + ## waza suggest Generate suggested eval artifacts from a skill's `SKILL.md` using an LLM. From db26aaf327c6a70ea6a4cd7008dde4e7595efa69 Mon Sep 17 00:00:00 2001 From: Shayne Boyer Date: Thu, 5 Mar 2026 12:42:52 -0500 Subject: [PATCH 2/9] fix: address review feedback on PR #92 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmd/waza/cmd_coverage.go | 22 +++++++++++++----- cmd/waza/cmd_coverage_test.go | 30 +++++++++++++++++++++++++ site/src/content/docs/reference/cli.mdx | 4 ++-- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go index 914dcf78..8dd52038 100644 --- a/cmd/waza/cmd_coverage.go +++ b/cmd/waza/cmd_coverage.go @@ -41,7 +41,7 @@ type evalSpecLite struct { func newCoverageCommand() *cobra.Command { var outputFormat string - var discoverPaths []string + var searchPaths []string cmd := &cobra.Command{ Use: "coverage [root]", @@ -52,7 +52,7 @@ By default, this command scans: - skills/ and .github/skills for SKILL.md files - evals/ and skill directories for eval.yaml files -Use --discover to add additional directories to scan for eval and skill files.`, +Use --path to add additional directories to scan for eval and skill files.`, Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { root := "." @@ -60,7 +60,7 @@ Use --discover to add additional directories to scan for eval and skill files.`, root = args[0] } - report, err := buildCoverageReport(root, discoverPaths) + report, err := buildCoverageReport(root, searchPaths) if err != nil { return err } @@ -82,7 +82,7 @@ Use --discover to add additional directories to scan for eval and skill files.`, } cmd.Flags().StringVarP(&outputFormat, "format", "f", "text", "Output format: text, markdown, or json") - cmd.Flags().StringArrayVar(&discoverPaths, "discover", nil, "Additional directories to scan for skills/evals (repeatable)") + cmd.Flags().StringArrayVar(&searchPaths, "path", nil, "Additional directories to scan for skills/evals (repeatable)") return cmd } @@ -106,6 +106,7 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, evalBySkill := make(map[string][]string) tasksBySkill := make(map[string]int) gradersBySkill := make(map[string]map[string]struct{}) + var parseFailures []string evalPaths, err := discoverEvalFiles(absRoot, skillPaths, discoverPaths) if err != nil { @@ -115,6 +116,7 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, for _, evalPath := range evalPaths { spec, parseErr := parseEvalSpec(evalPath) if parseErr != nil { + parseFailures = append(parseFailures, fmt.Sprintf("%s (%v)", evalPath, parseErr)) continue } skillName := strings.TrimSpace(spec.Skill) @@ -136,6 +138,10 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, } } } + if len(parseFailures) > 0 { + sort.Strings(parseFailures) + return nil, fmt.Errorf("failed to parse %d eval files: %s", len(parseFailures), strings.Join(parseFailures, "; ")) + } skillNames := make([]string, 0, len(skillPaths)) for name := range skillPaths { @@ -255,7 +261,7 @@ func discoverEvalFiles(root string, skillPaths map[string]string, discoverPaths } return nil } - if d.Name() == "eval.yaml" { + if d.Name() == "eval.yaml" || d.Name() == "eval.yml" { absPath, _ := filepath.Abs(path) candidates[absPath] = struct{}{} } @@ -267,7 +273,11 @@ func discoverEvalFiles(root string, skillPaths map[string]string, discoverPaths for _, skillPath := range skillPaths { skillDir := filepath.Dir(skillPath) - for _, rel := range []string{"eval.yaml", filepath.Join("evals", "eval.yaml"), filepath.Join("tests", "eval.yaml")} { + for _, rel := range []string{ + "eval.yaml", "eval.yml", + filepath.Join("evals", "eval.yaml"), filepath.Join("evals", "eval.yml"), + filepath.Join("tests", "eval.yaml"), filepath.Join("tests", "eval.yml"), + } { p := filepath.Join(skillDir, rel) if isFile(p) { absPath, _ := filepath.Abs(p) diff --git a/cmd/waza/cmd_coverage_test.go b/cmd/waza/cmd_coverage_test.go index cbf5ade2..df71e075 100644 --- a/cmd/waza/cmd_coverage_test.go +++ b/cmd/waza/cmd_coverage_test.go @@ -74,6 +74,36 @@ graders: assert.Equal(t, []string{"file", "prompt"}, rows["full-skill"].Graders) } +func TestBuildCoverageReport_IncludesEvalYML(t *testing.T) { + root := t.TempDir() + writeSkill(t, root, filepath.Join("skills", "alpha"), "alpha") + writeEval(t, root, filepath.Join("evals", "alpha", "eval.yml"), ` +skill: alpha +tasks: + - tasks/*.yaml +graders: + - type: prompt + name: judge + - type: file + name: files +`) + + report, err := buildCoverageReport(root, nil) + require.NoError(t, err) + require.Len(t, report.Skills, 1) + assert.Equal(t, "✅ Full", report.Skills[0].Coverage) +} + +func TestBuildCoverageReport_ReturnsParseErrors(t *testing.T) { + root := t.TempDir() + writeSkill(t, root, filepath.Join("skills", "alpha"), "alpha") + writeEval(t, root, filepath.Join("evals", "alpha", "eval.yaml"), "skill: [bad") + + _, err := buildCoverageReport(root, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to parse 1 eval files") +} + func TestRenderCoverageMarkdown(t *testing.T) { report := &coverageReport{ TotalSkills: 2, diff --git a/site/src/content/docs/reference/cli.mdx b/site/src/content/docs/reference/cli.mdx index 6a473128..6ff3969e 100644 --- a/site/src/content/docs/reference/cli.mdx +++ b/site/src/content/docs/reference/cli.mdx @@ -314,7 +314,7 @@ waza coverage [root] | Flag | Description | |------|-------------| | `--format` | Output format: `text` (default), `markdown`, `json` | -| `--discover` | Additional directories to scan for skills/evals (repeatable) | +| `--path` | Additional directories to scan for skills/evals (repeatable) | ### Examples @@ -322,7 +322,7 @@ waza coverage [root] waza coverage waza coverage --format markdown waza coverage --format json -waza coverage --discover custom-evals --discover plugins +waza coverage --path custom-evals --path plugins ``` ## waza suggest From 82427c19de02012fe223d05d62c3d21714d93de2 Mon Sep 17 00:00:00 2001 From: Shayne Boyer Date: Thu, 5 Mar 2026 15:32:02 -0500 Subject: [PATCH 3/9] fix: address PR #92 coverage command review comments Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 2 +- cmd/waza/cmd_coverage.go | 19 +++++++++---- cmd/waza/cmd_coverage_test.go | 36 +++++++++++++++++++++++++ site/src/content/docs/reference/cli.mdx | 2 +- 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 2d108e82..bd10c885 100644 --- a/README.md +++ b/README.md @@ -311,7 +311,7 @@ Generate a skill-to-eval coverage grid showing which skills are fully covered, p | Flag | Short | Description | |------|-------|-------------| | `--format ` | `-f` | Output format: `text`, `markdown`, or `json` (default: `text`) | -| `--discover ` | | Additional directory to scan for skills/evals (repeatable) | +| `--path ` | | Additional directory to scan for skills/evals (repeatable) | ### `waza cache clear` diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go index 8dd52038..1eb8ddcd 100644 --- a/cmd/waza/cmd_coverage.go +++ b/cmd/waza/cmd_coverage.go @@ -204,7 +204,7 @@ func discoverSkillFiles(root string, discoverPaths []string) (map[string]string, } err := filepath.WalkDir(sr, func(path string, d fs.DirEntry, err error) error { if err != nil { - return nil + return fmt.Errorf("error walking %s: %w", path, err) } if d.IsDir() { name := d.Name() @@ -216,7 +216,10 @@ func discoverSkillFiles(root string, discoverPaths []string) (map[string]string, if d.Name() != "SKILL.md" { return nil } - absPath, _ := filepath.Abs(path) + absPath, absErr := filepath.Abs(path) + if absErr != nil { + absPath = filepath.Clean(path) + } if _, ok := seenPaths[absPath]; ok { return nil } @@ -252,7 +255,7 @@ func discoverEvalFiles(root string, skillPaths map[string]string, discoverPaths } if err := filepath.WalkDir(evalRoot, func(path string, d fs.DirEntry, err error) error { if err != nil { - return nil + return fmt.Errorf("error walking %s: %w", path, err) } if d.IsDir() { name := d.Name() @@ -262,7 +265,10 @@ func discoverEvalFiles(root string, skillPaths map[string]string, discoverPaths return nil } if d.Name() == "eval.yaml" || d.Name() == "eval.yml" { - absPath, _ := filepath.Abs(path) + absPath, absErr := filepath.Abs(path) + if absErr != nil { + absPath = filepath.Clean(path) + } candidates[absPath] = struct{}{} } return nil @@ -280,7 +286,10 @@ func discoverEvalFiles(root string, skillPaths map[string]string, discoverPaths } { p := filepath.Join(skillDir, rel) if isFile(p) { - absPath, _ := filepath.Abs(p) + absPath, absErr := filepath.Abs(p) + if absErr != nil { + absPath = filepath.Clean(p) + } candidates[absPath] = struct{}{} } } diff --git a/cmd/waza/cmd_coverage_test.go b/cmd/waza/cmd_coverage_test.go index df71e075..d1ad0c2a 100644 --- a/cmd/waza/cmd_coverage_test.go +++ b/cmd/waza/cmd_coverage_test.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "encoding/json" "os" "path/filepath" "testing" @@ -123,6 +124,41 @@ func TestRenderCoverageMarkdown(t *testing.T) { assert.Contains(t, out, "| beta | 2 | file, prompt | ✅ Full |") } +func TestRenderCoverageJSON(t *testing.T) { + report := &coverageReport{ + TotalSkills: 1, + Covered: 1, + Partial: 0, + Uncovered: 0, + CoveragePct: 100, + Skills: []coverageSkillRow{ + {Skill: "alpha", Tasks: 2, Graders: []string{"file", "prompt"}, Coverage: "✅ Full"}, + }, + } + + var buf bytes.Buffer + require.NoError(t, renderCoverageJSON(&buf, report)) + + var decoded map[string]any + require.NoError(t, json.Unmarshal(buf.Bytes(), &decoded)) + assert.Equal(t, float64(1), decoded["total_skills"]) + assert.Contains(t, buf.String(), "\n \"total_skills\"") +} + +func TestCoverageCommand_UnsupportedFormat(t *testing.T) { + root := t.TempDir() + writeSkill(t, root, filepath.Join("skills", "alpha"), "alpha") + + cmd := newCoverageCommand() + cmd.SetOut(new(bytes.Buffer)) + cmd.SetErr(new(bytes.Buffer)) + cmd.SetArgs([]string{root, "--format", "xml"}) + + err := cmd.Execute() + require.Error(t, err) + assert.Contains(t, err.Error(), `unsupported format "xml"`) +} + func TestRootCommand_HasCoverageSubcommand(t *testing.T) { root := newRootCommand() found := false diff --git a/site/src/content/docs/reference/cli.mdx b/site/src/content/docs/reference/cli.mdx index 6ff3969e..7e0e4277 100644 --- a/site/src/content/docs/reference/cli.mdx +++ b/site/src/content/docs/reference/cli.mdx @@ -313,7 +313,7 @@ waza coverage [root] | Flag | Description | |------|-------------| -| `--format` | Output format: `text` (default), `markdown`, `json` | +| `-f, --format` | Output format: `text` (default), `markdown`, `json` | | `--path` | Additional directories to scan for skills/evals (repeatable) | ### Examples From 54d872e78806ab338746fee9475d78e4ddb4ddba Mon Sep 17 00:00:00 2001 From: Shayne Boyer Date: Mon, 9 Mar 2026 17:59:08 -0400 Subject: [PATCH 4/9] fix: coverage pct counts only fully covered skills, document threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CoveragePct now counts only 'Full' (≥2 grader types) skills, not Partial - Add comment clarifying that Full requires tasks + multiple grader types - Update summary line to say 'fully covered' Addresses wbreza review feedback on PR #92. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmd/waza/cmd_coverage.go | 5 +++-- cmd/waza/cmd_coverage_test.go | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go index 1eb8ddcd..4ae7bc86 100644 --- a/cmd/waza/cmd_coverage.go +++ b/cmd/waza/cmd_coverage.go @@ -165,6 +165,7 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, case !hasEval: report.Uncovered++ case tasks > 0 && len(graders) >= 2: + // Full: eval spec has tasks and multiple grader types coverage = "✅ Full" report.Covered++ default: @@ -181,7 +182,7 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, } if report.TotalSkills > 0 { - report.CoveragePct = float64(report.Covered+report.Partial) * 100 / float64(report.TotalSkills) + report.CoveragePct = float64(report.Covered) * 100 / float64(report.TotalSkills) } return report, nil } @@ -339,7 +340,7 @@ func inferSkillNameFromEvalPath(evalPath string) string { func renderCoverageText(w io.Writer, report *coverageReport) { fmt.Fprintln(w, "📊 Eval Coverage Grid") //nolint:errcheck - fmt.Fprintf(w, "Coverage: %.1f%% (%d/%d)\n\n", report.CoveragePct, report.Covered+report.Partial, report.TotalSkills) //nolint:errcheck + fmt.Fprintf(w, "Coverage: %.1f%% (%d/%d fully covered)\n\n", report.CoveragePct, report.Covered, report.TotalSkills) //nolint:errcheck tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) fmt.Fprintln(tw, "Skill\tTasks\tGraders\tCoverage") //nolint:errcheck diff --git a/cmd/waza/cmd_coverage_test.go b/cmd/waza/cmd_coverage_test.go index d1ad0c2a..5fea24de 100644 --- a/cmd/waza/cmd_coverage_test.go +++ b/cmd/waza/cmd_coverage_test.go @@ -59,7 +59,7 @@ graders: assert.Equal(t, 1, report.Covered) assert.Equal(t, 1, report.Partial) assert.Equal(t, 0, report.Uncovered) - assert.InDelta(t, 100.0, report.CoveragePct, 0.1) + assert.InDelta(t, 50.0, report.CoveragePct, 0.1) rows := map[string]coverageSkillRow{} for _, row := range report.Skills { From 972722c2e9bdf763efcd2dd7f7fe4df884d0ad2c Mon Sep 17 00:00:00 2001 From: Shayne Boyer Date: Mon, 9 Mar 2026 18:19:36 -0400 Subject: [PATCH 5/9] docs: Document waza coverage levels and percentage calculation --- README.md | 2 ++ site/src/content/docs/reference/cli.mdx | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/README.md b/README.md index bd10c885..8948ef8c 100644 --- a/README.md +++ b/README.md @@ -308,6 +308,8 @@ Compare results from multiple evaluation runs side by side — per-task score de Generate a skill-to-eval coverage grid showing which skills are fully covered, partially covered, or missing evals. +**Note**: Full coverage requires tasks and 2+ grader types. The coverage percentage reflects only fully covered skills. + | Flag | Short | Description | |------|-------|-------------| | `--format ` | `-f` | Output format: `text`, `markdown`, or `json` (default: `text`) | diff --git a/site/src/content/docs/reference/cli.mdx b/site/src/content/docs/reference/cli.mdx index 7e0e4277..e1405bc9 100644 --- a/site/src/content/docs/reference/cli.mdx +++ b/site/src/content/docs/reference/cli.mdx @@ -316,6 +316,14 @@ waza coverage [root] | `-f, --format` | Output format: `text` (default), `markdown`, `json` | | `--path` | Additional directories to scan for skills/evals (repeatable) | +### Coverage Levels + +- **Full**: Skill has an `eval.yaml` with tasks and at least 2 distinct grader types. +- **Partial**: Skill has an `eval.yaml` but fewer than 2 grader types or no tasks. +- **Missing**: No `eval.yaml` found for the skill. + +**Note**: The reported coverage percentage reflects only fully covered skills (`Fully Covered / Total Skills`). + ### Examples ```bash From 51f1878cdec8b51b5e6f2e1aef3c573cfd1efb4d Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:23:52 -0400 Subject: [PATCH 6/9] fix: gofmt formatting in cmd_coverage.go Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmd/waza/cmd_coverage.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go index 4ae7bc86..feaba6be 100644 --- a/cmd/waza/cmd_coverage.go +++ b/cmd/waza/cmd_coverage.go @@ -339,7 +339,7 @@ func inferSkillNameFromEvalPath(evalPath string) string { } func renderCoverageText(w io.Writer, report *coverageReport) { - fmt.Fprintln(w, "📊 Eval Coverage Grid") //nolint:errcheck + fmt.Fprintln(w, "📊 Eval Coverage Grid") //nolint:errcheck fmt.Fprintf(w, "Coverage: %.1f%% (%d/%d fully covered)\n\n", report.CoveragePct, report.Covered, report.TotalSkills) //nolint:errcheck tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) From 4a781a8a706aa211a79a8751121e8a03fb24578b Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:42:44 -0400 Subject: [PATCH 7/9] fix: address Copilot review feedback on PR #92 - Validate root path is a directory, not just exists - Update help text to mention both eval.yaml and eval.yml - Update CLI docs to reference eval.yaml/eval.yml consistently - Add test for file-path rejection Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmd/waza/cmd_coverage.go | 8 ++++++-- cmd/waza/cmd_coverage_test.go | 9 +++++++++ site/src/content/docs/reference/cli.mdx | 6 +++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go index feaba6be..6f69d144 100644 --- a/cmd/waza/cmd_coverage.go +++ b/cmd/waza/cmd_coverage.go @@ -50,7 +50,7 @@ func newCoverageCommand() *cobra.Command { By default, this command scans: - skills/ and .github/skills for SKILL.md files - - evals/ and skill directories for eval.yaml files + - evals/ and skill directories for eval.yaml/eval.yml files Use --path to add additional directories to scan for eval and skill files.`, Args: cobra.MaximumNArgs(1), @@ -91,9 +91,13 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, if err != nil { return nil, fmt.Errorf("resolving root path: %w", err) } - if _, err := os.Stat(absRoot); err != nil { + info, err := os.Stat(absRoot) + if err != nil { return nil, fmt.Errorf("invalid root path %q: %w", root, err) } + if !info.IsDir() { + return nil, fmt.Errorf("root path %q is not a directory", root) + } skillPaths, err := discoverSkillFiles(absRoot, discoverPaths) if err != nil { diff --git a/cmd/waza/cmd_coverage_test.go b/cmd/waza/cmd_coverage_test.go index 5fea24de..8932da3d 100644 --- a/cmd/waza/cmd_coverage_test.go +++ b/cmd/waza/cmd_coverage_test.go @@ -171,6 +171,15 @@ func TestRootCommand_HasCoverageSubcommand(t *testing.T) { assert.True(t, found, "root command should have 'coverage' subcommand") } +func TestBuildCoverageReport_RejectsFilePath(t *testing.T) { + f := filepath.Join(t.TempDir(), "notadir.txt") + require.NoError(t, os.WriteFile(f, []byte("hello"), 0o644)) + + _, err := buildCoverageReport(f, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "is not a directory") +} + func writeSkill(t *testing.T, root, relDir, skillName string) { t.Helper() dir := filepath.Join(root, relDir) diff --git a/site/src/content/docs/reference/cli.mdx b/site/src/content/docs/reference/cli.mdx index e1405bc9..74d13423 100644 --- a/site/src/content/docs/reference/cli.mdx +++ b/site/src/content/docs/reference/cli.mdx @@ -318,9 +318,9 @@ waza coverage [root] ### Coverage Levels -- **Full**: Skill has an `eval.yaml` with tasks and at least 2 distinct grader types. -- **Partial**: Skill has an `eval.yaml` but fewer than 2 grader types or no tasks. -- **Missing**: No `eval.yaml` found for the skill. +- **Full**: Skill has an `eval.yaml`/`eval.yml` with tasks and at least 2 distinct grader types. +- **Partial**: Skill has an `eval.yaml`/`eval.yml` but fewer than 2 grader types or no tasks. +- **Missing**: No `eval.yaml`/`eval.yml` found for the skill. **Note**: The reported coverage percentage reflects only fully covered skills (`Fully Covered / Total Skills`). From 26867b1cc78115cae8fcbf21b42bc9a4e842ac2e Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 11:17:36 -0700 Subject: [PATCH 8/9] fix: handle tasks_from in eval coverage classification Eval specs using tasks_from instead of inline tasks were misclassified as Partial coverage. Now tasks_from is parsed and treated as having tasks for coverage purposes. Updated docs to clarify both forms qualify. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 2 +- cmd/waza/cmd_coverage.go | 13 +++++++++---- cmd/waza/cmd_coverage_test.go | 20 ++++++++++++++++++++ site/src/content/docs/reference/cli.mdx | 2 +- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 8948ef8c..c10d84a7 100644 --- a/README.md +++ b/README.md @@ -308,7 +308,7 @@ Compare results from multiple evaluation runs side by side — per-task score de Generate a skill-to-eval coverage grid showing which skills are fully covered, partially covered, or missing evals. -**Note**: Full coverage requires tasks and 2+ grader types. The coverage percentage reflects only fully covered skills. +**Note**: Full coverage requires tasks (via `tasks:` or `tasks_from:`) and 2+ grader types. The coverage percentage reflects only fully covered skills. | Flag | Short | Description | |------|-------|-------------| diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go index 6f69d144..6c0816f1 100644 --- a/cmd/waza/cmd_coverage.go +++ b/cmd/waza/cmd_coverage.go @@ -34,9 +34,10 @@ type coverageReport struct { } type evalSpecLite struct { - Skill string `yaml:"skill"` - Tasks []string `yaml:"tasks"` - Graders []models.GraderConfig `yaml:"graders"` + Skill string `yaml:"skill"` + Tasks []string `yaml:"tasks"` + TasksFrom string `yaml:"tasks_from,omitempty"` + Graders []models.GraderConfig `yaml:"graders"` } func newCoverageCommand() *cobra.Command { @@ -131,7 +132,11 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, continue } evalBySkill[skillName] = append(evalBySkill[skillName], evalPath) - tasksBySkill[skillName] += len(spec.Tasks) + taskCount := len(spec.Tasks) + if taskCount == 0 && spec.TasksFrom != "" { + taskCount = 1 // tasks_from references an external file; count as having tasks + } + tasksBySkill[skillName] += taskCount if _, ok := gradersBySkill[skillName]; !ok { gradersBySkill[skillName] = make(map[string]struct{}) } diff --git a/cmd/waza/cmd_coverage_test.go b/cmd/waza/cmd_coverage_test.go index 8932da3d..c26b8821 100644 --- a/cmd/waza/cmd_coverage_test.go +++ b/cmd/waza/cmd_coverage_test.go @@ -180,6 +180,26 @@ func TestBuildCoverageReport_RejectsFilePath(t *testing.T) { assert.Contains(t, err.Error(), "is not a directory") } +func TestBuildCoverageReport_TasksFromCountsAsTasks(t *testing.T) { + root := t.TempDir() + writeSkill(t, root, filepath.Join("skills", "from-skill"), "from-skill") + writeEval(t, root, filepath.Join("evals", "from-skill", "eval.yaml"), ` +skill: from-skill +tasks_from: tasks/ +graders: + - type: prompt + name: judge + - type: diff + name: snapshot +`) + + report, err := buildCoverageReport(root, nil) + require.NoError(t, err) + require.Len(t, report.Skills, 1) + assert.Equal(t, "✅ Full", report.Skills[0].Coverage) + assert.Equal(t, 1, report.Skills[0].Tasks) +} + func writeSkill(t *testing.T, root, relDir, skillName string) { t.Helper() dir := filepath.Join(root, relDir) diff --git a/site/src/content/docs/reference/cli.mdx b/site/src/content/docs/reference/cli.mdx index 74d13423..5fd81a83 100644 --- a/site/src/content/docs/reference/cli.mdx +++ b/site/src/content/docs/reference/cli.mdx @@ -318,7 +318,7 @@ waza coverage [root] ### Coverage Levels -- **Full**: Skill has an `eval.yaml`/`eval.yml` with tasks and at least 2 distinct grader types. +- **Full**: Skill has an `eval.yaml`/`eval.yml` with tasks (via `tasks:` or `tasks_from:`) and at least 2 distinct grader types. - **Partial**: Skill has an `eval.yaml`/`eval.yml` but fewer than 2 grader types or no tasks. - **Missing**: No `eval.yaml`/`eval.yml` found for the skill. From 10f3a73f5ee6808583bbb4dd90926ac008f3d3b7 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 06:34:52 -0700 Subject: [PATCH 9/9] fix: address review feedback for coverage command - Parse failures now warn to stderr instead of aborting the report, making waza coverage usable in repos with broken eval files. - Use tabwriter placeholders for emoji to fix column alignment. - Updated test to match new warn-not-error behavior. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmd/waza/cmd_coverage.go | 18 +++++++++++++++--- cmd/waza/cmd_coverage_test.go | 10 ++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/cmd/waza/cmd_coverage.go b/cmd/waza/cmd_coverage.go index 6c0816f1..9ec2c387 100644 --- a/cmd/waza/cmd_coverage.go +++ b/cmd/waza/cmd_coverage.go @@ -149,7 +149,7 @@ func buildCoverageReport(root string, discoverPaths []string) (*coverageReport, } if len(parseFailures) > 0 { sort.Strings(parseFailures) - return nil, fmt.Errorf("failed to parse %d eval files: %s", len(parseFailures), strings.Join(parseFailures, "; ")) + fmt.Fprintf(os.Stderr, "warning: failed to parse %d eval file(s): %s\n", len(parseFailures), strings.Join(parseFailures, "; ")) } skillNames := make([]string, 0, len(skillPaths)) @@ -351,7 +351,9 @@ func renderCoverageText(w io.Writer, report *coverageReport) { fmt.Fprintln(w, "📊 Eval Coverage Grid") //nolint:errcheck fmt.Fprintf(w, "Coverage: %.1f%% (%d/%d fully covered)\n\n", report.CoveragePct, report.Covered, report.TotalSkills) //nolint:errcheck - tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + // Use placeholders for emoji to avoid tabwriter alignment issues + var buf strings.Builder + tw := tabwriter.NewWriter(&buf, 0, 0, 2, ' ', 0) fmt.Fprintln(tw, "Skill\tTasks\tGraders\tCoverage") //nolint:errcheck fmt.Fprintln(tw, "-----\t-----\t-------\t--------") //nolint:errcheck for _, row := range report.Skills { @@ -359,9 +361,19 @@ func renderCoverageText(w io.Writer, report *coverageReport) { if len(row.Graders) > 0 { graders = strings.Join(row.Graders, ", ") } - fmt.Fprintf(tw, "%s\t%d\t%s\t%s\n", row.Skill, row.Tasks, graders, row.Coverage) //nolint:errcheck + coverage := row.Coverage + coverage = strings.Replace(coverage, "✅", "{CHECK}", 1) + coverage = strings.Replace(coverage, "⚠️", "{WARN}", 1) + coverage = strings.Replace(coverage, "❌", "{CROSS}", 1) + fmt.Fprintf(tw, "%s\t%d\t%s\t%s\n", row.Skill, row.Tasks, graders, coverage) //nolint:errcheck } _ = tw.Flush() + + result := buf.String() + result = strings.ReplaceAll(result, "{CHECK}", "✅") + result = strings.ReplaceAll(result, "{WARN}", "⚠️") + result = strings.ReplaceAll(result, "{CROSS}", "❌") + fmt.Fprint(w, result) //nolint:errcheck } func renderCoverageMarkdown(w io.Writer, report *coverageReport) { diff --git a/cmd/waza/cmd_coverage_test.go b/cmd/waza/cmd_coverage_test.go index c26b8821..bfa8c732 100644 --- a/cmd/waza/cmd_coverage_test.go +++ b/cmd/waza/cmd_coverage_test.go @@ -95,14 +95,16 @@ graders: assert.Equal(t, "✅ Full", report.Skills[0].Coverage) } -func TestBuildCoverageReport_ReturnsParseErrors(t *testing.T) { +func TestBuildCoverageReport_WarnsOnParseErrors(t *testing.T) { root := t.TempDir() writeSkill(t, root, filepath.Join("skills", "alpha"), "alpha") writeEval(t, root, filepath.Join("evals", "alpha", "eval.yaml"), "skill: [bad") - _, err := buildCoverageReport(root, nil) - require.Error(t, err) - assert.Contains(t, err.Error(), "failed to parse 1 eval files") + report, err := buildCoverageReport(root, nil) + require.NoError(t, err, "parse failures should warn, not error") + require.NotNil(t, report) + assert.Equal(t, 1, report.TotalSkills) + assert.Equal(t, 0, report.Covered) } func TestRenderCoverageMarkdown(t *testing.T) {