diff --git a/cmd/build.go b/cmd/build.go index 52dc677..ff3c12c 100644 --- a/cmd/build.go +++ b/cmd/build.go @@ -216,20 +216,6 @@ func runBuild(cmd *cobra.Command, args []string) error { return nil } -// exitError pairs an error with a specific exit code for the CLI. -type exitError struct { - code int - err error -} - -func (e *exitError) Error() string { - return e.err.Error() -} - -func (e *exitError) Unwrap() error { - return e.err -} - // guidelinesConfigHasNonDefaults returns true if the guidelines config has any // explicitly set value (Enabled pointer, section title, order list, or exclude list). func guidelinesConfigHasNonDefaults(cfg config.GuidelinesConfig) bool { diff --git a/cmd/generate.go b/cmd/generate.go new file mode 100644 index 0000000..b4c121f --- /dev/null +++ b/cmd/generate.go @@ -0,0 +1,119 @@ +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/armstrongl/rulebound/internal/mdgen" + "github.com/spf13/cobra" +) + +// Generate flags. +var ( + generateOutput string + generateForce bool +) + +// generateCmd defines the `rulebound generate` sub-command. +var generateCmd = &cobra.Command{ + Use: "generate ", + Short: "Generate a Vale YAML rule file from a structured Markdown file", + Long: `generate reads a structured Markdown file with YAML frontmatter and vale-* +fenced code blocks, and emits a Vale-compatible YAML rule file. + +The input .md file uses frontmatter for rule metadata and vale-swap, +vale-tokens, or vale-exceptions fenced blocks for rule data. + +Supported rule types: substitution, existence, occurrence, capitalization.`, + Args: cobra.ExactArgs(1), + RunE: runGenerate, +} + +func init() { + generateCmd.Flags().StringVarP(&generateOutput, "output", "o", "", "Output path (default: input stem + .yml); use '-' for stdout") + generateCmd.Flags().BoolVar(&generateForce, "force", false, "Overwrite output file if it already exists") +} + +func runGenerate(cmd *cobra.Command, args []string) error { + inputPath := args[0] + + // Validate input is a regular file. + info, err := os.Stat(inputPath) + if err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("input file does not exist: %s", inputPath) + } + return fmt.Errorf("checking input file: %w", err) + } + if info.IsDir() { + return fmt.Errorf("input path is a directory, expected a file: %s", inputPath) + } + + // Read input. + data, err := os.ReadFile(inputPath) + if err != nil { + return fmt.Errorf("reading input file: %w", err) + } + + // Parse. + src, warnings, err := mdgen.ParseMarkdown(data) + if err != nil { + return &exitError{code: ExitGeneral, err: fmt.Errorf("parsing %s: %w", inputPath, err)} + } + + for _, w := range warnings { + fmt.Fprintf(os.Stderr, "Warning: %s\n", w.Message) + } + + // Emit YAML. + yamlBytes, emitWarnings, err := mdgen.EmitYAML(src) + if err != nil { + return &exitError{code: ExitGeneral, err: fmt.Errorf("generating YAML: %w", err)} + } + + for _, w := range emitWarnings { + fmt.Fprintf(os.Stderr, "Warning: %s\n", w.Message) + } + + // Resolve output path. + outputPath := generateOutput + if outputPath == "" { + // Default: input stem + .yml in the same directory. + ext := filepath.Ext(inputPath) + outputPath = strings.TrimSuffix(inputPath, ext) + ".yml" + } + + // Write output. + if outputPath == "-" { + // stdout + _, err = os.Stdout.Write(yamlBytes) + if err != nil { + return fmt.Errorf("writing to stdout: %w", err) + } + return nil + } + + // Check if output file exists. + if !generateForce { + if _, err := os.Stat(outputPath); err == nil { + return &exitError{ + code: ExitGeneral, + err: fmt.Errorf("output file already exists: %s (use --force to overwrite)", outputPath), + } + } + } + + if err := os.WriteFile(outputPath, yamlBytes, 0644); err != nil { + return fmt.Errorf("writing output file: %w", err) + } + + if Verbose { + fmt.Printf("Input: %s\n", inputPath) + fmt.Printf("Extends: %s\n", src.Extends) + } + fmt.Printf("Generated: %s\n", outputPath) + + return nil +} diff --git a/cmd/generate_test.go b/cmd/generate_test.go new file mode 100644 index 0000000..76ac4f9 --- /dev/null +++ b/cmd/generate_test.go @@ -0,0 +1,172 @@ +package cmd + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +// resetGenerateFlags resets package-level flag variables to their defaults. +// Cobra flag values persist between rootCmd.Execute() calls in the same process. +func resetGenerateFlags(t *testing.T) { + t.Helper() + generateOutput = "" + generateForce = false +} + +// writeTestMD writes a minimal substitution .md to the given directory. +func writeTestMD(t *testing.T, dir, name string) string { + t.Helper() + content := `--- +extends: substitution +message: "Prefer '%s' over '%s'." +level: warning +--- + +# Test + +` + "```vale-swap\nleverage: use\n```\n" + + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatalf("writing test md: %v", err) + } + return path +} + +func TestGenerate_DefaultOutput(t *testing.T) { + resetGenerateFlags(t) + dir := t.TempDir() + mdPath := writeTestMD(t, dir, "Jargon.md") + + rootCmd.SetArgs([]string{"generate", mdPath}) + if err := rootCmd.Execute(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + ymlPath := filepath.Join(dir, "Jargon.yml") + data, err := os.ReadFile(ymlPath) + if err != nil { + t.Fatalf("reading output: %v", err) + } + if !strings.Contains(string(data), "extends: substitution") { + t.Errorf("output should contain extends: substitution") + } + if !strings.Contains(string(data), "leverage: use") { + t.Errorf("output should contain swap entry") + } +} + +func TestGenerate_ExplicitOutput(t *testing.T) { + resetGenerateFlags(t) + dir := t.TempDir() + mdPath := writeTestMD(t, dir, "Test.md") + outPath := filepath.Join(dir, "custom.yml") + + rootCmd.SetArgs([]string{"generate", mdPath, "--output", outPath}) + if err := rootCmd.Execute(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, err := os.Stat(outPath); err != nil { + t.Errorf("output file not created at %s", outPath) + } +} + +func TestGenerate_Stdout(t *testing.T) { + resetGenerateFlags(t) + dir := t.TempDir() + mdPath := writeTestMD(t, dir, "Test.md") + + // Capture stdout. + old := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + rootCmd.SetArgs([]string{"generate", mdPath, "--output", "-"}) + err := rootCmd.Execute() + + w.Close() + os.Stdout = old + + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var buf bytes.Buffer + buf.ReadFrom(r) + output := buf.String() + + if !strings.Contains(output, "extends: substitution") { + t.Errorf("stdout should contain YAML output, got: %s", output) + } +} + +func TestGenerate_ForceOverwrite(t *testing.T) { + resetGenerateFlags(t) + dir := t.TempDir() + mdPath := writeTestMD(t, dir, "Test.md") + ymlPath := filepath.Join(dir, "Test.yml") + + // Create existing file. + if err := os.WriteFile(ymlPath, []byte("old content"), 0644); err != nil { + t.Fatal(err) + } + + // Without --force should fail. + rootCmd.SetArgs([]string{"generate", mdPath}) + err := rootCmd.Execute() + if err == nil { + t.Fatal("expected error when output exists without --force") + } + + // With --force should succeed. + rootCmd.SetArgs([]string{"generate", mdPath, "--force"}) + if err := rootCmd.Execute(); err != nil { + t.Fatalf("unexpected error with --force: %v", err) + } + + data, _ := os.ReadFile(ymlPath) + if strings.Contains(string(data), "old content") { + t.Error("file should have been overwritten") + } +} + +func TestGenerate_FileNotExist(t *testing.T) { + resetGenerateFlags(t) + rootCmd.SetArgs([]string{"generate", "/nonexistent/file.md"}) + err := rootCmd.Execute() + if err == nil { + t.Fatal("expected error for nonexistent file") + } +} + +func TestGenerate_InputIsDirectory(t *testing.T) { + resetGenerateFlags(t) + dir := t.TempDir() + + rootCmd.SetArgs([]string{"generate", dir}) + err := rootCmd.Execute() + if err == nil { + t.Fatal("expected error when input is a directory") + } + if !strings.Contains(err.Error(), "directory") { + t.Errorf("error should mention directory: %v", err) + } +} + +func TestGenerate_MissingRequiredField(t *testing.T) { + resetGenerateFlags(t) + dir := t.TempDir() + content := "---\nextends: substitution\n---\n\n# No message\n\n```vale-swap\nfoo: bar\n```\n" + mdPath := filepath.Join(dir, "bad.md") + os.WriteFile(mdPath, []byte(content), 0644) + + rootCmd.SetArgs([]string{"generate", mdPath}) + err := rootCmd.Execute() + if err == nil { + t.Fatal("expected error for missing message") + } +} diff --git a/cmd/root.go b/cmd/root.go index afe1987..70cc8a7 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -38,6 +38,20 @@ Usage example: SilenceErrors: true, } +// exitError pairs an error with a specific exit code for the CLI. +type exitError struct { + code int + err error +} + +func (e *exitError) Error() string { + return e.err.Error() +} + +func (e *exitError) Unwrap() error { + return e.err +} + // Execute runs the root command and exits on failure. // An *exitError carries a specific exit code; other errors exit with ExitGeneral. func Execute() { @@ -56,4 +70,6 @@ func init() { // Add sub-commands. rootCmd.AddCommand(buildCmd) + rootCmd.AddCommand(generateCmd) + rootCmd.AddCommand(validateCmd) } diff --git a/cmd/validate.go b/cmd/validate.go new file mode 100644 index 0000000..39a28e4 --- /dev/null +++ b/cmd/validate.go @@ -0,0 +1,78 @@ +package cmd + +import ( + "fmt" + "io" + "os" + + "github.com/armstrongl/rulebound/internal/parser" + "github.com/spf13/cobra" +) + +// validateCmd defines the `rulebound validate` sub-command. +var validateCmd = &cobra.Command{ + Use: "validate [file2.yml ...] | validate -", + Short: "Validate Vale YAML rule files for structural errors", + Long: `validate reads one or more Vale rule YAML files and reports structural +errors: missing required fields, invalid extends values, and rule-type-specific +field errors — without invoking Hugo. + +Use '-' to read YAML from stdin (e.g. rulebound generate foo.md -o - | rulebound validate -). + +Exit code 0 if all files are valid, 1 if any file has errors.`, + Args: cobra.MinimumNArgs(1), + RunE: runValidate, +} + +func runValidate(cmd *cobra.Command, args []string) error { + var totalErrors int + validCount := 0 + + for _, path := range args { + if Verbose { + fmt.Printf("Validating: %s\n", path) + } + + var errs []parser.ValidationError + var err error + + if path == "-" { + data, readErr := io.ReadAll(os.Stdin) + if readErr != nil { + fmt.Fprintf(os.Stderr, ": %v\n", readErr) + totalErrors++ + continue + } + errs, err = parser.ValidateRuleBytes(data) + path = "" + } else { + errs, err = parser.ValidateRule(path) + } + + if err != nil { + fmt.Fprintf(os.Stderr, "%s: %v\n", path, err) + totalErrors++ + continue + } + + if len(errs) == 0 { + validCount++ + continue + } + + totalErrors += len(errs) + for _, ve := range errs { + fmt.Fprintf(os.Stderr, "%s: %s: %s\n", path, ve.Field, ve.Message) + } + } + + fileCount := len(args) + invalidCount := fileCount - validCount + fmt.Printf("Validated %d file(s): %d valid, %d with errors\n", fileCount, validCount, invalidCount) + + if totalErrors > 0 { + return &exitError{code: ExitGeneral, err: fmt.Errorf("%d validation error(s) found", totalErrors)} + } + + return nil +} diff --git a/cmd/validate_test.go b/cmd/validate_test.go new file mode 100644 index 0000000..cb591a0 --- /dev/null +++ b/cmd/validate_test.go @@ -0,0 +1,124 @@ +package cmd + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func writeTestYML(t *testing.T, dir, name, content string) string { + t.Helper() + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatalf("writing test yml: %v", err) + } + return path +} + +func TestValidate_ValidFile(t *testing.T) { + dir := t.TempDir() + path := writeTestYML(t, dir, "valid.yml", `extends: substitution +message: "test" +level: warning +swap: + foo: bar +`) + + rootCmd.SetArgs([]string{"validate", path}) + err := rootCmd.Execute() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidate_MultipleValidFiles(t *testing.T) { + dir := t.TempDir() + path1 := writeTestYML(t, dir, "a.yml", `extends: existence +message: "test" +level: warning +tokens: + - foo +`) + path2 := writeTestYML(t, dir, "b.yml", `extends: occurrence +message: "test" +level: warning +max: 10 +token: '\S+' +`) + + rootCmd.SetArgs([]string{"validate", path1, path2}) + err := rootCmd.Execute() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidate_InvalidFile(t *testing.T) { + dir := t.TempDir() + path := writeTestYML(t, dir, "bad.yml", `extends: substitution +level: warning +`) + + rootCmd.SetArgs([]string{"validate", path}) + err := rootCmd.Execute() + if err == nil { + t.Fatal("expected error for invalid file") + } +} + +func TestValidate_MixedFiles(t *testing.T) { + dir := t.TempDir() + good := writeTestYML(t, dir, "good.yml", `extends: existence +message: "test" +level: warning +tokens: + - foo +`) + bad := writeTestYML(t, dir, "bad.yml", `extends: substitution +level: warning +`) + + rootCmd.SetArgs([]string{"validate", good, bad}) + err := rootCmd.Execute() + if err == nil { + t.Fatal("expected error when one file is invalid") + } +} + +func TestValidate_FileNotExist(t *testing.T) { + rootCmd.SetArgs([]string{"validate", "/nonexistent/file.yml"}) + err := rootCmd.Execute() + if err == nil { + t.Fatal("expected error for nonexistent file") + } +} + +func TestValidate_GenerateThenValidate(t *testing.T) { + // Integration: generate YAML from .md, then validate the output. + resetGenerateFlags(t) + dir := t.TempDir() + mdPath := writeTestMD(t, dir, "Integration.md") + ymlPath := filepath.Join(dir, "Integration.yml") + + // Generate. + rootCmd.SetArgs([]string{"generate", mdPath}) + if err := rootCmd.Execute(); err != nil { + t.Fatalf("generate failed: %v", err) + } + + // Validate the generated output. + rootCmd.SetArgs([]string{"validate", ymlPath}) + if err := rootCmd.Execute(); err != nil { + t.Fatalf("validate failed on generated file: %v", err) + } + + // Verify the generated file contains expected content. + data, err := os.ReadFile(ymlPath) + if err != nil { + t.Fatalf("reading generated file: %v", err) + } + if !strings.Contains(string(data), "extends: substitution") { + t.Error("generated file should contain extends: substitution") + } +} diff --git a/internal/mdgen/emit.go b/internal/mdgen/emit.go new file mode 100644 index 0000000..09f86ad --- /dev/null +++ b/internal/mdgen/emit.go @@ -0,0 +1,125 @@ +package mdgen + +import ( + "fmt" + "sort" + + "go.yaml.in/yaml/v3" +) + +// EmitYAML converts a RuleSource into Vale-compatible YAML bytes. +// Field ordering: extends, message, level, then remaining scalars alphabetically, +// then type-specific compound fields (swap, tokens, exceptions). +func EmitYAML(src *RuleSource) ([]byte, []Warning, error) { + var warnings []Warning + + root := &yaml.Node{Kind: yaml.MappingNode} + + // Required fields in fixed order. + addScalar(root, "extends", src.Extends) + addScalar(root, "message", src.Message) + addScalar(root, "level", src.Level) + + // Remaining scalar fields, sorted alphabetically. + // Separate known compound/type-specific keys to emit last. + compoundKeys := map[string]bool{ + "swap": true, "tokens": true, "exceptions": true, + } + var scalarKeys []string + for k := range src.Fields { + if !compoundKeys[k] { + scalarKeys = append(scalarKeys, k) + } + } + sort.Strings(scalarKeys) + + for _, k := range scalarKeys { + v := src.Fields[k] + node, err := valueToNode(v) + if err != nil { + warnings = append(warnings, Warning{ + Message: fmt.Sprintf("skipping field %q: %v", k, err), + }) + continue + } + root.Content = append(root.Content, + &yaml.Node{Kind: yaml.ScalarNode, Value: k}, + node, + ) + } + + // Type-specific compound fields. + switch src.Extends { + case "substitution": + if len(src.Swap) > 0 { + swapNode := &yaml.Node{Kind: yaml.MappingNode} + for _, pair := range src.Swap { + swapNode.Content = append(swapNode.Content, + &yaml.Node{Kind: yaml.ScalarNode, Value: pair.Key}, + &yaml.Node{Kind: yaml.ScalarNode, Value: pair.Value}, + ) + } + root.Content = append(root.Content, + &yaml.Node{Kind: yaml.ScalarNode, Value: "swap"}, + swapNode, + ) + } + + case "existence": + if len(src.Tokens) > 0 { + root.Content = append(root.Content, + &yaml.Node{Kind: yaml.ScalarNode, Value: "tokens"}, + stringsToSeqNode(src.Tokens), + ) + } + + case "capitalization": + if len(src.Exceptions) > 0 { + root.Content = append(root.Content, + &yaml.Node{Kind: yaml.ScalarNode, Value: "exceptions"}, + stringsToSeqNode(src.Exceptions), + ) + } + } + // occurrence: all fields are scalars, already emitted above. + + doc := &yaml.Node{ + Kind: yaml.DocumentNode, + Content: []*yaml.Node{root}, + } + + out, err := yaml.Marshal(doc) + if err != nil { + return nil, nil, fmt.Errorf("marshaling YAML: %w", err) + } + + return out, warnings, nil +} + +// addScalar appends a key-value pair with string values to a mapping node. +func addScalar(mapping *yaml.Node, key, value string) { + mapping.Content = append(mapping.Content, + &yaml.Node{Kind: yaml.ScalarNode, Value: key}, + &yaml.Node{Kind: yaml.ScalarNode, Value: value}, + ) +} + +// stringsToSeqNode builds a YAML sequence node from a string slice. +func stringsToSeqNode(items []string) *yaml.Node { + seq := &yaml.Node{Kind: yaml.SequenceNode} + for _, item := range items { + seq.Content = append(seq.Content, + &yaml.Node{Kind: yaml.ScalarNode, Value: item}, + ) + } + return seq +} + +// valueToNode converts a Go value to an appropriate yaml.Node. +func valueToNode(v interface{}) (*yaml.Node, error) { + var node yaml.Node + if err := node.Encode(v); err != nil { + return nil, err + } + return &node, nil +} diff --git a/internal/mdgen/emit_test.go b/internal/mdgen/emit_test.go new file mode 100644 index 0000000..1f35628 --- /dev/null +++ b/internal/mdgen/emit_test.go @@ -0,0 +1,362 @@ +package mdgen_test + +import ( + "os" + "strings" + "testing" + + "github.com/armstrongl/rulebound/internal/mdgen" + "github.com/armstrongl/rulebound/internal/parser" + "go.yaml.in/yaml/v3" +) + +// ── Semantic tests ───────────────────────────────────────────────────────── + +func TestEmitYAML_Substitution_Semantic(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsSubstitution, + Message: "Prefer '%s' over '%s'.", + Level: "warning", + Fields: map[string]interface{}{"ignorecase": true}, + Swap: []mdgen.SwapPair{ + {Key: "leverage", Value: "use"}, + {Key: "utilize", Value: "use"}, + }, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var m map[string]interface{} + if err := yaml.Unmarshal(out, &m); err != nil { + t.Fatalf("unmarshal output: %v", err) + } + + if m["extends"] != "substitution" { + t.Errorf("extends: got %v", m["extends"]) + } + if m["message"] != "Prefer '%s' over '%s'." { + t.Errorf("message: got %v", m["message"]) + } + if m["level"] != "warning" { + t.Errorf("level: got %v", m["level"]) + } + if m["ignorecase"] != true { + t.Errorf("ignorecase: got %v", m["ignorecase"]) + } + + // Verify swap is present and has correct values. + swapRaw, ok := m["swap"].(map[string]interface{}) + if !ok { + t.Fatalf("swap: expected map, got %T", m["swap"]) + } + if swapRaw["leverage"] != "use" { + t.Errorf("swap[leverage]: got %v", swapRaw["leverage"]) + } +} + +func TestEmitYAML_Existence_Semantic(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsExistence, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{}, + Tokens: []string{"foo", "bar"}, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var m map[string]interface{} + if err := yaml.Unmarshal(out, &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + tokens, ok := m["tokens"].([]interface{}) + if !ok { + t.Fatalf("tokens: expected slice, got %T", m["tokens"]) + } + if len(tokens) != 2 || tokens[0] != "foo" || tokens[1] != "bar" { + t.Errorf("tokens: got %v", tokens) + } +} + +func TestEmitYAML_Occurrence_Semantic(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsOccurrence, + Message: "test", + Level: "suggestion", + Fields: map[string]interface{}{ + "max": 30, + "token": `[^\s]+`, + "scope": "sentence", + }, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var m map[string]interface{} + if err := yaml.Unmarshal(out, &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if m["max"] != 30 { + t.Errorf("max: got %v", m["max"]) + } + if m["token"] != `[^\s]+` { + t.Errorf("token: got %v", m["token"]) + } + if m["scope"] != "sentence" { + t.Errorf("scope: got %v", m["scope"]) + } +} + +func TestEmitYAML_Capitalization_Semantic(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsCapitalization, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{"match": "$sentence"}, + Exceptions: []string{"iOS", "API"}, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var m map[string]interface{} + if err := yaml.Unmarshal(out, &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if m["match"] != "$sentence" { + t.Errorf("match: got %v", m["match"]) + } + + exceptions, ok := m["exceptions"].([]interface{}) + if !ok { + t.Fatalf("exceptions: expected slice, got %T", m["exceptions"]) + } + if len(exceptions) != 2 || exceptions[0] != "iOS" || exceptions[1] != "API" { + t.Errorf("exceptions: got %v", exceptions) + } +} + +func TestEmitYAML_UnknownField_Passthrough(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsExistence, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{"reviewed_by": "Jane Smith"}, + Tokens: []string{"foo"}, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var m map[string]interface{} + if err := yaml.Unmarshal(out, &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if m["reviewed_by"] != "Jane Smith" { + t.Errorf("reviewed_by: got %v", m["reviewed_by"]) + } +} + +func TestEmitYAML_DefaultLevel_Emitted(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsExistence, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{}, + Tokens: []string{"foo"}, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.Contains(string(out), "level: warning") { + t.Error("default level 'warning' should still be emitted in output") + } +} + +func TestEmitYAML_EmptyExceptions_Omitted(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsCapitalization, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{"match": "$sentence"}, + Exceptions: nil, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if strings.Contains(string(out), "exceptions") { + t.Error("empty exceptions should be omitted from output") + } +} + +func TestEmitYAML_ScopeField(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsExistence, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{"scope": "heading"}, + Tokens: []string{"foo"}, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.Contains(string(out), "scope: heading") { + t.Error("scope field should appear in output") + } +} + +func TestEmitYAML_NoMetaInOutput(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsExistence, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{}, + Tokens: []string{"foo"}, + Meta: map[string]interface{}{"author": "Jane"}, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if strings.Contains(string(out), "meta") || strings.Contains(string(out), "author") { + t.Error("meta fields should not appear in generated YAML") + } +} + +// ── Golden-file tests ────────────────────────────────────────────────────── + +func TestEmitYAML_GoldenFile_Substitution(t *testing.T) { + goldenTest(t, "substitution.md", "expected/substitution.yml") +} + +func TestEmitYAML_GoldenFile_Existence(t *testing.T) { + goldenTest(t, "existence.md", "expected/existence.yml") +} + +func TestEmitYAML_GoldenFile_Occurrence(t *testing.T) { + goldenTest(t, "occurrence.md", "expected/occurrence.yml") +} + +func TestEmitYAML_GoldenFile_Capitalization(t *testing.T) { + goldenTest(t, "capitalization.md", "expected/capitalization.yml") +} + +// ── Round-trip integration ───────────────────────────────────────────────── + +func TestEmitYAML_RoundTrip_Substitution(t *testing.T) { + data := readTestdata(t, "substitution.md") + src, _, err := mdgen.ParseMarkdown(data) + if err != nil { + t.Fatalf("parse: %v", err) + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("emit: %v", err) + } + + // Unmarshal and verify key fields. + var m map[string]interface{} + if err := yaml.Unmarshal(out, &m); err != nil { + t.Fatalf("unmarshal: %v", err) + } + + if m["extends"] != "substitution" { + t.Errorf("extends: got %v", m["extends"]) + } + swapRaw, ok := m["swap"].(map[string]interface{}) + if !ok { + t.Fatalf("swap: expected map, got %T", m["swap"]) + } + if len(swapRaw) != 5 { + t.Errorf("swap entries: got %d, want 5", len(swapRaw)) + } +} + +// ── Swap order verification ──────────────────────────────────────────────── + +func TestEmitYAML_SwapOrder(t *testing.T) { + src := &mdgen.RuleSource{ + Extends: parser.ExtendsSubstitution, + Message: "test", + Level: "warning", + Fields: map[string]interface{}{}, + Swap: []mdgen.SwapPair{ + {Key: "zebra", Value: "z"}, + {Key: "apple", Value: "a"}, + {Key: "mango", Value: "m"}, + }, + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Verify insertion order preserved (zebra before apple before mango). + outStr := string(out) + zebraIdx := strings.Index(outStr, "zebra") + appleIdx := strings.Index(outStr, "apple") + mangoIdx := strings.Index(outStr, "mango") + + if zebraIdx == -1 || appleIdx == -1 || mangoIdx == -1 { + t.Fatalf("missing swap keys in output:\n%s", outStr) + } + if zebraIdx >= appleIdx || appleIdx >= mangoIdx { + t.Errorf("swap order not preserved: zebra@%d, apple@%d, mango@%d", zebraIdx, appleIdx, mangoIdx) + } +} + +// ── Helpers ──────────────────────────────────────────────────────────────── + +func goldenTest(t *testing.T, inputFile, goldenFile string) { + t.Helper() + + data := readTestdata(t, inputFile) + src, _, err := mdgen.ParseMarkdown(data) + if err != nil { + t.Fatalf("parse %s: %v", inputFile, err) + } + + out, _, err := mdgen.EmitYAML(src) + if err != nil { + t.Fatalf("emit: %v", err) + } + + expected, err := os.ReadFile(testdata(goldenFile)) + if err != nil { + t.Fatalf("reading golden file %s: %v", goldenFile, err) + } + + if string(out) != string(expected) { + t.Errorf("output differs from golden file %s:\n--- got ---\n%s\n--- want ---\n%s", goldenFile, out, expected) + } +} diff --git a/internal/mdgen/parse.go b/internal/mdgen/parse.go new file mode 100644 index 0000000..f004d71 --- /dev/null +++ b/internal/mdgen/parse.go @@ -0,0 +1,327 @@ +// Package mdgen converts structured Markdown files into Vale-compatible YAML +// rule files. It handles the .md → YAML direction; the parser package handles +// YAML → ValeRule for downstream content generation. +package mdgen + +import ( + "fmt" + "strings" + + "github.com/armstrongl/rulebound/internal/parser" + "go.yaml.in/yaml/v3" +) + +// Supported extends types for v1 generation. +var supportedTypes = map[string]bool{ + parser.ExtendsSubstitution: true, + parser.ExtendsExistence: true, + parser.ExtendsOccurrence: true, + parser.ExtendsCapitalization: true, +} + +// SwapPair holds a single key-value entry from a vale-swap block, +// preserving the original file order. +type SwapPair struct { + Key string + Value string +} + +// Warning records a non-fatal issue encountered during parsing. +type Warning struct { + Message string +} + +// RuleSource is the intermediate representation of a structured Markdown file, +// ready for YAML emission. +type RuleSource struct { + // Required fields. + Extends string + Message string + Level string + + // Pass-through scalar fields (scope, ignorecase, nonword, link, etc.). + Fields map[string]interface{} + + // Type-specific data. + Swap []SwapPair // substitution + Tokens []string // existence + Exceptions []string // capitalization + + // Occurrence scalars are in Fields (max, min, token). + + // Meta holds rulebound-only metadata stripped before emission. + Meta map[string]interface{} +} + +// ParseMarkdown parses a structured Markdown file into a RuleSource. +// It extracts YAML frontmatter and vale-* fenced code blocks. +func ParseMarkdown(data []byte) (*RuleSource, []Warning, error) { + fmBytes, body, err := parser.ExtractFrontmatter(data) + if err != nil { + return nil, nil, fmt.Errorf("frontmatter: %w", err) + } + + // Parse frontmatter into generic map. + var fm map[string]interface{} + if err := yaml.Unmarshal(fmBytes, &fm); err != nil { + return nil, nil, fmt.Errorf("parsing frontmatter YAML: %w", err) + } + + var warnings []Warning + + // Extract and validate required fields. + extends, err := requireString(fm, "extends") + if err != nil { + return nil, nil, err + } + if !supportedTypes[extends] { + supported := []string{ + parser.ExtendsSubstitution, + parser.ExtendsExistence, + parser.ExtendsOccurrence, + parser.ExtendsCapitalization, + } + return nil, nil, fmt.Errorf("unsupported extends type %q (supported: %s)", extends, strings.Join(supported, ", ")) + } + + message, err := requireString(fm, "message") + if err != nil { + return nil, nil, err + } + + // Level defaults to "warning" with advisory. + level, _ := fm["level"].(string) + if level == "" { + level = "warning" + warnings = append(warnings, Warning{Message: "level not specified, defaulting to 'warning'"}) + } + + // Strip meta block silently. + var meta map[string]interface{} + if m, ok := fm["meta"]; ok { + if mmap, ok := m.(map[string]interface{}); ok { + meta = mmap + } + } + + // Build pass-through fields (everything except extends, message, level, meta). + fields := make(map[string]interface{}) + skipKeys := map[string]bool{"extends": true, "message": true, "level": true, "meta": true} + for k, v := range fm { + if skipKeys[k] { + continue + } + if !parser.IsKnownField(k) { + warnings = append(warnings, Warning{ + Message: fmt.Sprintf("unknown frontmatter field %q (not a known Vale field); passing through to YAML", k), + }) + } + fields[k] = v + } + + // Parse fenced blocks from body. + blocks, blockWarnings := parseFencedBlocks(body) + warnings = append(warnings, blockWarnings...) + + src := &RuleSource{ + Extends: extends, + Message: message, + Level: level, + Fields: fields, + Meta: meta, + } + + // Type-specific extraction and validation. + switch extends { + case parser.ExtendsSubstitution: + swapBlock, ok := blocks["vale-swap"] + if !ok { + return nil, nil, fmt.Errorf("substitution rule requires a ```vale-swap``` fenced block") + } + pairs, swapWarnings, err := parseSwapBlock(swapBlock) + if err != nil { + return nil, nil, fmt.Errorf("parsing vale-swap block: %w", err) + } + warnings = append(warnings, swapWarnings...) + if len(pairs) == 0 { + return nil, nil, fmt.Errorf("substitution rule requires at least one entry in the ```vale-swap``` block") + } + src.Swap = pairs + + case parser.ExtendsExistence: + tokenBlock, ok := blocks["vale-tokens"] + if !ok { + return nil, nil, fmt.Errorf("existence rule requires a ```vale-tokens``` fenced block") + } + tokens := parseLineBlock(tokenBlock) + if len(tokens) == 0 { + return nil, nil, fmt.Errorf("existence rule requires at least one token in the ```vale-tokens``` block") + } + src.Tokens = tokens + + case parser.ExtendsOccurrence: + // max, min, token come from frontmatter (already in Fields). + hasMax := hasField(fm, "max") + hasMin := hasField(fm, "min") + hasToken := hasField(fm, "token") + if !hasMax && !hasMin { + return nil, nil, fmt.Errorf("occurrence rule requires at least one of 'max' or 'min' in frontmatter") + } + if !hasToken { + return nil, nil, fmt.Errorf("occurrence rule requires 'token' in frontmatter") + } + + case parser.ExtendsCapitalization: + if !hasField(fm, "match") { + return nil, nil, fmt.Errorf("capitalization rule requires 'match' in frontmatter") + } + if exBlock, ok := blocks["vale-exceptions"]; ok { + src.Exceptions = parseLineBlock(exBlock) + } + } + + return src, warnings, nil +} + +// requireString extracts a non-empty string field from the frontmatter map. +func requireString(fm map[string]interface{}, key string) (string, error) { + v, ok := fm[key] + if !ok { + return "", fmt.Errorf("missing required field '%s' in frontmatter", key) + } + s, ok := v.(string) + if !ok || s == "" { + return "", fmt.Errorf("missing required field '%s' in frontmatter (empty or non-string value)", key) + } + return s, nil +} + +// hasField reports whether key exists in the frontmatter map. +func hasField(fm map[string]interface{}, key string) bool { + _, ok := fm[key] + return ok +} + +// fencedBlock holds the raw content of a vale-* fenced block. +type fencedBlock struct { + lang string + content string +} + +// parseFencedBlocks extracts vale-* fenced code blocks from the Markdown body. +// It returns a map from language tag to block content, and warnings for +// duplicate blocks. +func parseFencedBlocks(body []byte) (map[string]fencedBlock, []Warning) { + blocks := make(map[string]fencedBlock) + var warnings []Warning + + lines := strings.Split(string(body), "\n") + var inBlock bool + var currentLang string + var currentLines []string + + for _, line := range lines { + trimmed := strings.TrimSpace(line) + + if inBlock { + if trimmed == "```" { + // Closing fence. Only store non-discarded blocks. + if currentLang != "" { + blocks[currentLang] = fencedBlock{ + lang: currentLang, + content: strings.Join(currentLines, "\n"), + } + } + inBlock = false + currentLang = "" + currentLines = nil + continue + } + currentLines = append(currentLines, line) + continue + } + + // Check for opening fence with vale-* language. + if strings.HasPrefix(trimmed, "```vale-") { + lang := strings.TrimPrefix(trimmed, "```") + // Strip any trailing content after the language tag. + if idx := strings.IndexByte(lang, ' '); idx != -1 { + lang = lang[:idx] + } + + if _, exists := blocks[lang]; exists { + warnings = append(warnings, Warning{ + Message: fmt.Sprintf("duplicate %s block; only the first is used", lang), + }) + // Skip this duplicate by consuming until closing fence. + inBlock = true + currentLang = "" // empty lang means we discard + currentLines = nil + continue + } + + inBlock = true + currentLang = lang + currentLines = nil + } + } + + return blocks, warnings +} + +// parseSwapBlock parses a vale-swap block's YAML content into ordered SwapPairs. +// It uses yaml.Node to preserve key insertion order. +func parseSwapBlock(block fencedBlock) ([]SwapPair, []Warning, error) { + if strings.TrimSpace(block.content) == "" { + return nil, nil, nil + } + + var node yaml.Node + if err := yaml.Unmarshal([]byte(block.content), &node); err != nil { + return nil, nil, fmt.Errorf("invalid YAML in vale-swap block: %w", err) + } + + // The document node wraps the actual mapping. + if node.Kind != yaml.DocumentNode || len(node.Content) == 0 { + return nil, nil, nil + } + + mapping := node.Content[0] + if mapping.Kind != yaml.MappingNode { + return nil, []Warning{{Message: "vale-swap block is not a YAML mapping; skipping"}}, nil + } + + var pairs []SwapPair + var warnings []Warning + + for i := 0; i+1 < len(mapping.Content); i += 2 { + keyNode := mapping.Content[i] + valNode := mapping.Content[i+1] + + if keyNode.Kind != yaml.ScalarNode || valNode.Kind != yaml.ScalarNode { + warnings = append(warnings, Warning{ + Message: fmt.Sprintf("vale-swap: skipping non-scalar entry at line %d", keyNode.Line), + }) + continue + } + + pairs = append(pairs, SwapPair{ + Key: keyNode.Value, + Value: valNode.Value, + }) + } + + return pairs, warnings, nil +} + +// parseLineBlock splits a fenced block's content into non-empty lines. +func parseLineBlock(block fencedBlock) []string { + var lines []string + for _, line := range strings.Split(block.content, "\n") { + trimmed := strings.TrimSpace(line) + if trimmed != "" { + lines = append(lines, trimmed) + } + } + return lines +} diff --git a/internal/mdgen/parse_test.go b/internal/mdgen/parse_test.go new file mode 100644 index 0000000..694611b --- /dev/null +++ b/internal/mdgen/parse_test.go @@ -0,0 +1,363 @@ +package mdgen_test + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/armstrongl/rulebound/internal/mdgen" + "github.com/armstrongl/rulebound/internal/parser" +) + +func testdata(parts ...string) string { + _, file, _, _ := runtime.Caller(0) + dir := filepath.Join(filepath.Dir(file), "testdata") + return filepath.Join(append([]string{dir}, parts...)...) +} + +func readTestdata(t *testing.T, name string) []byte { + t.Helper() + data, err := os.ReadFile(testdata(name)) + if err != nil { + t.Fatalf("reading testdata %s: %v", name, err) + } + return data +} + +// ── Happy paths ──────────────────────────────────────────────────────────── + +func TestParseMarkdown_Substitution(t *testing.T) { + src, warnings, err := mdgen.ParseMarkdown(readTestdata(t, "substitution.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if src.Extends != parser.ExtendsSubstitution { + t.Errorf("Extends: got %q, want %q", src.Extends, parser.ExtendsSubstitution) + } + if src.Message != "Prefer '%s' over '%s'." { + t.Errorf("Message: got %q", src.Message) + } + if src.Level != "warning" { + t.Errorf("Level: got %q, want %q", src.Level, "warning") + } + + // Check ignorecase passed through. + if v, ok := src.Fields["ignorecase"]; !ok || v != true { + t.Errorf("Fields[ignorecase]: got %v", src.Fields["ignorecase"]) + } + + // Verify swap pairs in file order. + expectedSwap := []mdgen.SwapPair{ + {Key: "leverage", Value: "use"}, + {Key: "utilize", Value: "use"}, + {Key: "functionality", Value: "feature"}, + {Key: "in order to", Value: "to"}, + {Key: "at this point in time", Value: "now"}, + } + if len(src.Swap) != len(expectedSwap) { + t.Fatalf("Swap: got %d pairs, want %d", len(src.Swap), len(expectedSwap)) + } + for i, want := range expectedSwap { + got := src.Swap[i] + if got.Key != want.Key || got.Value != want.Value { + t.Errorf("Swap[%d]: got %q:%q, want %q:%q", i, got.Key, got.Value, want.Key, want.Value) + } + } + + // No unexpected warnings. + for _, w := range warnings { + t.Logf("warning: %s", w.Message) + } +} + +func TestParseMarkdown_Existence(t *testing.T) { + src, _, err := mdgen.ParseMarkdown(readTestdata(t, "existence.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if src.Extends != parser.ExtendsExistence { + t.Errorf("Extends: got %q", src.Extends) + } + + expectedTokens := []string{ + "was created", "was deleted", "was found", + "was made", "is expected", "are required", + } + if len(src.Tokens) != len(expectedTokens) { + t.Fatalf("Tokens: got %d, want %d", len(src.Tokens), len(expectedTokens)) + } + for i, want := range expectedTokens { + if src.Tokens[i] != want { + t.Errorf("Tokens[%d]: got %q, want %q", i, src.Tokens[i], want) + } + } +} + +func TestParseMarkdown_Occurrence(t *testing.T) { + src, _, err := mdgen.ParseMarkdown(readTestdata(t, "occurrence.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if src.Extends != parser.ExtendsOccurrence { + t.Errorf("Extends: got %q", src.Extends) + } + + if v, ok := src.Fields["max"]; !ok || v != 30 { + t.Errorf("Fields[max]: got %v", src.Fields["max"]) + } + if v, ok := src.Fields["token"]; !ok || v != "[^\\s]+" { + t.Errorf("Fields[token]: got %v", src.Fields["token"]) + } + if v, ok := src.Fields["scope"]; !ok || v != "sentence" { + t.Errorf("Fields[scope]: got %v", src.Fields["scope"]) + } +} + +func TestParseMarkdown_Capitalization(t *testing.T) { + src, _, err := mdgen.ParseMarkdown(readTestdata(t, "capitalization.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if src.Extends != parser.ExtendsCapitalization { + t.Errorf("Extends: got %q", src.Extends) + } + if v, ok := src.Fields["match"]; !ok || v != "$sentence" { + t.Errorf("Fields[match]: got %v", src.Fields["match"]) + } + + expectedExceptions := []string{"iOS", "macOS", "API", "UI", "GraphQL"} + if len(src.Exceptions) != len(expectedExceptions) { + t.Fatalf("Exceptions: got %d, want %d", len(src.Exceptions), len(expectedExceptions)) + } + for i, want := range expectedExceptions { + if src.Exceptions[i] != want { + t.Errorf("Exceptions[%d]: got %q, want %q", i, src.Exceptions[i], want) + } + } +} + +func TestParseMarkdown_MetaStripped(t *testing.T) { + src, _, err := mdgen.ParseMarkdown(readTestdata(t, "meta-block.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if src.Meta == nil { + t.Fatal("Meta: expected non-nil") + } + if src.Meta["author"] != "Jane Smith" { + t.Errorf("Meta[author]: got %v", src.Meta["author"]) + } + + // Meta should not appear in Fields. + if _, ok := src.Fields["meta"]; ok { + t.Error("Fields should not contain 'meta' key") + } +} + +func TestParseMarkdown_UnknownFieldWarning(t *testing.T) { + src, warnings, err := mdgen.ParseMarkdown(readTestdata(t, "unknown-field.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Unknown field should be preserved in Fields. + if v, ok := src.Fields["reviewed_by"]; !ok || v != "Jane Smith" { + t.Errorf("Fields[reviewed_by]: got %v", src.Fields["reviewed_by"]) + } + + // Should have a warning about the unknown field. + found := false + for _, w := range warnings { + if strings.Contains(w.Message, "reviewed_by") && strings.Contains(w.Message, "unknown") { + found = true + break + } + } + if !found { + t.Error("expected warning about unknown field 'reviewed_by'") + } +} + +func TestParseMarkdown_DefaultLevel(t *testing.T) { + src, warnings, err := mdgen.ParseMarkdown(readTestdata(t, "no-level.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if src.Level != "warning" { + t.Errorf("Level: got %q, want %q", src.Level, "warning") + } + + found := false + for _, w := range warnings { + if strings.Contains(w.Message, "level") && strings.Contains(w.Message, "warning") { + found = true + break + } + } + if !found { + t.Error("expected advisory warning about default level") + } +} + +// ── CRLF ─────────────────────────────────────────────────────────────────── + +func TestParseMarkdown_CRLF(t *testing.T) { + src, _, err := mdgen.ParseMarkdown(readTestdata(t, "crlf.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if src.Extends != parser.ExtendsExistence { + t.Errorf("Extends: got %q", src.Extends) + } + if len(src.Tokens) != 2 { + t.Errorf("Tokens: got %d, want 2", len(src.Tokens)) + } +} + +// ── Edge cases ───────────────────────────────────────────────────────────── + +func TestParseMarkdown_EmptySwapBlock(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "empty-swap.md")) + if err == nil { + t.Fatal("expected error for empty swap block") + } + if !strings.Contains(err.Error(), "at least one entry") { + t.Errorf("error should mention empty swap block: %v", err) + } +} + +func TestParseMarkdown_QuotedSwapKeys(t *testing.T) { + src, _, err := mdgen.ParseMarkdown(readTestdata(t, "quoted-swap-keys.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(src.Swap) != 2 { + t.Fatalf("Swap: got %d pairs, want 2", len(src.Swap)) + } + if src.Swap[0].Key != "e.g." || src.Swap[0].Value != "for example" { + t.Errorf("Swap[0]: got %q:%q", src.Swap[0].Key, src.Swap[0].Value) + } + if src.Swap[1].Key != "i.e." || src.Swap[1].Value != "that is" { + t.Errorf("Swap[1]: got %q:%q", src.Swap[1].Key, src.Swap[1].Value) + } +} + +func TestParseMarkdown_DuplicateSwapBlocks(t *testing.T) { + _, warnings, err := mdgen.ParseMarkdown(readTestdata(t, "duplicate-swap-blocks.md")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, w := range warnings { + if strings.Contains(w.Message, "duplicate") { + found = true + break + } + } + if !found { + t.Error("expected warning about duplicate vale-swap block") + } +} + +// ── Error paths ──────────────────────────────────────────────────────────── + +func TestParseMarkdown_MissingExtends(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "missing-extends.md")) + if err == nil { + t.Fatal("expected error for missing extends") + } + if !strings.Contains(err.Error(), "extends") { + t.Errorf("error should name 'extends' field: %v", err) + } +} + +func TestParseMarkdown_MissingMessage(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "missing-message.md")) + if err == nil { + t.Fatal("expected error for missing message") + } + if !strings.Contains(err.Error(), "message") { + t.Errorf("error should name 'message' field: %v", err) + } +} + +func TestParseMarkdown_EmptyMessage(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "empty-message.md")) + if err == nil { + t.Fatal("expected error for empty message") + } + if !strings.Contains(err.Error(), "message") { + t.Errorf("error should name 'message' field: %v", err) + } +} + +func TestParseMarkdown_UnsupportedExtends(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "unsupported-extends.md")) + if err == nil { + t.Fatal("expected error for unsupported extends") + } + if !strings.Contains(err.Error(), "script") { + t.Errorf("error should name the unsupported type: %v", err) + } + if !strings.Contains(err.Error(), "substitution") { + t.Errorf("error should list supported types: %v", err) + } +} + +func TestParseMarkdown_MissingSwapBlock(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "missing-swap.md")) + if err == nil { + t.Fatal("expected error for missing swap block") + } + if !strings.Contains(err.Error(), "vale-swap") { + t.Errorf("error should name expected block: %v", err) + } +} + +func TestParseMarkdown_MissingTokensBlock(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "missing-tokens.md")) + if err == nil { + t.Fatal("expected error for missing tokens block") + } + if !strings.Contains(err.Error(), "vale-tokens") { + t.Errorf("error should name expected block: %v", err) + } +} + +func TestParseMarkdown_OccurrenceMissingToken(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "occurrence-missing-token.md")) + if err == nil { + t.Fatal("expected error for missing token field") + } + if !strings.Contains(err.Error(), "token") { + t.Errorf("error should name 'token' field: %v", err) + } +} + +func TestParseMarkdown_CapitalizationMissingMatch(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "capitalization-missing-match.md")) + if err == nil { + t.Fatal("expected error for missing match field") + } + if !strings.Contains(err.Error(), "match") { + t.Errorf("error should name 'match' field: %v", err) + } +} + +func TestParseMarkdown_NoFrontmatter(t *testing.T) { + _, _, err := mdgen.ParseMarkdown(readTestdata(t, "no-frontmatter.md")) + if err == nil { + t.Fatal("expected error for no frontmatter") + } +} diff --git a/internal/mdgen/testdata/capitalization-missing-match.md b/internal/mdgen/testdata/capitalization-missing-match.md new file mode 100644 index 0000000..231bbf2 --- /dev/null +++ b/internal/mdgen/testdata/capitalization-missing-match.md @@ -0,0 +1,9 @@ +--- +extends: capitalization +message: "'%s' should be sentence-cased." +level: warning +--- + +# Missing Match + +Capitalization rule without match field. diff --git a/internal/mdgen/testdata/capitalization.md b/internal/mdgen/testdata/capitalization.md new file mode 100644 index 0000000..e8ec38d --- /dev/null +++ b/internal/mdgen/testdata/capitalization.md @@ -0,0 +1,18 @@ +--- +extends: capitalization +level: warning +message: "'%s' should be sentence-cased." +match: $sentence +--- + +# Capitalization + +Sentence case is the standard for this style guide. + +```vale-exceptions +iOS +macOS +API +UI +GraphQL +``` diff --git a/internal/mdgen/testdata/crlf.md b/internal/mdgen/testdata/crlf.md new file mode 100644 index 0000000..20587db --- /dev/null +++ b/internal/mdgen/testdata/crlf.md @@ -0,0 +1,12 @@ +--- +extends: existence +message: "'%s' is deprecated." +level: warning +--- + +# CRLF Test + +```vale-tokens +leverage +utilize +``` diff --git a/internal/mdgen/testdata/duplicate-swap-blocks.md b/internal/mdgen/testdata/duplicate-swap-blocks.md new file mode 100644 index 0000000..2f49b1a --- /dev/null +++ b/internal/mdgen/testdata/duplicate-swap-blocks.md @@ -0,0 +1,17 @@ +--- +extends: substitution +message: "Prefer '%s' over '%s'." +level: warning +--- + +# Duplicate Swap Blocks + +```vale-swap +leverage: use +``` + +Second block: + +```vale-swap +utilize: use +``` diff --git a/internal/mdgen/testdata/empty-message.md b/internal/mdgen/testdata/empty-message.md new file mode 100644 index 0000000..f4daee3 --- /dev/null +++ b/internal/mdgen/testdata/empty-message.md @@ -0,0 +1,11 @@ +--- +extends: substitution +message: "" +level: warning +--- + +# Empty Message + +```vale-swap +leverage: use +``` diff --git a/internal/mdgen/testdata/empty-swap.md b/internal/mdgen/testdata/empty-swap.md new file mode 100644 index 0000000..69196fd --- /dev/null +++ b/internal/mdgen/testdata/empty-swap.md @@ -0,0 +1,12 @@ +--- +extends: substitution +message: "Prefer '%s' over '%s'." +level: warning +--- + +# Empty Swap + +```vale-swap +``` + +No swap entries. diff --git a/internal/mdgen/testdata/existence.md b/internal/mdgen/testdata/existence.md new file mode 100644 index 0000000..f7f1053 --- /dev/null +++ b/internal/mdgen/testdata/existence.md @@ -0,0 +1,20 @@ +--- +extends: existence +level: warning +message: "'%s' looks like passive voice. Use active voice instead." +--- + +# Passive Voice + +Active voice makes documentation clearer. + +```vale-tokens +was created +was deleted +was found +was made +is expected +are required +``` + +Passive voice is acceptable when the actor is genuinely unknown. diff --git a/internal/mdgen/testdata/expected/capitalization.yml b/internal/mdgen/testdata/expected/capitalization.yml new file mode 100644 index 0000000..e40bd79 --- /dev/null +++ b/internal/mdgen/testdata/expected/capitalization.yml @@ -0,0 +1,10 @@ +extends: capitalization +message: '''%s'' should be sentence-cased.' +level: warning +match: $sentence +exceptions: + - iOS + - macOS + - API + - UI + - GraphQL diff --git a/internal/mdgen/testdata/expected/existence.yml b/internal/mdgen/testdata/expected/existence.yml new file mode 100644 index 0000000..0f5f266 --- /dev/null +++ b/internal/mdgen/testdata/expected/existence.yml @@ -0,0 +1,10 @@ +extends: existence +message: '''%s'' looks like passive voice. Use active voice instead.' +level: warning +tokens: + - was created + - was deleted + - was found + - was made + - is expected + - are required diff --git a/internal/mdgen/testdata/expected/occurrence.yml b/internal/mdgen/testdata/expected/occurrence.yml new file mode 100644 index 0000000..b15bda0 --- /dev/null +++ b/internal/mdgen/testdata/expected/occurrence.yml @@ -0,0 +1,6 @@ +extends: occurrence +message: This sentence has %s words. Try to keep sentences under 30 words. +level: suggestion +max: 30 +scope: sentence +token: '[^\s]+' diff --git a/internal/mdgen/testdata/expected/substitution.yml b/internal/mdgen/testdata/expected/substitution.yml new file mode 100644 index 0000000..fb2f010 --- /dev/null +++ b/internal/mdgen/testdata/expected/substitution.yml @@ -0,0 +1,10 @@ +extends: substitution +message: Prefer '%s' over '%s'. +level: warning +ignorecase: true +swap: + leverage: use + utilize: use + functionality: feature + in order to: to + at this point in time: now diff --git a/internal/mdgen/testdata/meta-block.md b/internal/mdgen/testdata/meta-block.md new file mode 100644 index 0000000..cfbd269 --- /dev/null +++ b/internal/mdgen/testdata/meta-block.md @@ -0,0 +1,16 @@ +--- +extends: existence +message: "'%s' is deprecated terminology." +level: warning +meta: + author: Jane Smith + reviewed: 2026-04-11 + ticket: STYLE-42 +--- + +# Deprecated Terms + +```vale-tokens +leverage +synergy +``` diff --git a/internal/mdgen/testdata/missing-extends.md b/internal/mdgen/testdata/missing-extends.md new file mode 100644 index 0000000..9e0ee26 --- /dev/null +++ b/internal/mdgen/testdata/missing-extends.md @@ -0,0 +1,8 @@ +--- +message: "This rule has no extends field." +level: warning +--- + +# Missing Extends + +This file is missing the required extends field. diff --git a/internal/mdgen/testdata/missing-message.md b/internal/mdgen/testdata/missing-message.md new file mode 100644 index 0000000..99157ae --- /dev/null +++ b/internal/mdgen/testdata/missing-message.md @@ -0,0 +1,10 @@ +--- +extends: substitution +level: warning +--- + +# Missing Message + +```vale-swap +leverage: use +``` diff --git a/internal/mdgen/testdata/missing-swap.md b/internal/mdgen/testdata/missing-swap.md new file mode 100644 index 0000000..a1919f1 --- /dev/null +++ b/internal/mdgen/testdata/missing-swap.md @@ -0,0 +1,9 @@ +--- +extends: substitution +message: "Prefer '%s' over '%s'." +level: warning +--- + +# Missing Swap Block + +This substitution rule has no vale-swap block. diff --git a/internal/mdgen/testdata/missing-tokens.md b/internal/mdgen/testdata/missing-tokens.md new file mode 100644 index 0000000..6d97573 --- /dev/null +++ b/internal/mdgen/testdata/missing-tokens.md @@ -0,0 +1,9 @@ +--- +extends: existence +message: "'%s' is deprecated." +level: warning +--- + +# Missing Tokens Block + +This existence rule has no vale-tokens block. diff --git a/internal/mdgen/testdata/no-frontmatter.md b/internal/mdgen/testdata/no-frontmatter.md new file mode 100644 index 0000000..fdfb5f6 --- /dev/null +++ b/internal/mdgen/testdata/no-frontmatter.md @@ -0,0 +1,3 @@ +# No Frontmatter + +This file has no YAML frontmatter at all. diff --git a/internal/mdgen/testdata/no-level.md b/internal/mdgen/testdata/no-level.md new file mode 100644 index 0000000..f223667 --- /dev/null +++ b/internal/mdgen/testdata/no-level.md @@ -0,0 +1,10 @@ +--- +extends: existence +message: "'%s' is deprecated." +--- + +# No Level + +```vale-tokens +leverage +``` diff --git a/internal/mdgen/testdata/occurrence-missing-token.md b/internal/mdgen/testdata/occurrence-missing-token.md new file mode 100644 index 0000000..c4e07c2 --- /dev/null +++ b/internal/mdgen/testdata/occurrence-missing-token.md @@ -0,0 +1,10 @@ +--- +extends: occurrence +message: "test" +level: warning +max: 30 +--- + +# Missing Token + +Occurrence rule with max but no token field. diff --git a/internal/mdgen/testdata/occurrence.md b/internal/mdgen/testdata/occurrence.md new file mode 100644 index 0000000..e6c90fe --- /dev/null +++ b/internal/mdgen/testdata/occurrence.md @@ -0,0 +1,12 @@ +--- +extends: occurrence +level: suggestion +message: "This sentence has %s words. Try to keep sentences under 30 words." +scope: sentence +max: 30 +token: '[^\s]+' +--- + +# Sentence Length + +Shorter sentences improve readability and reduce cognitive load. diff --git a/internal/mdgen/testdata/quoted-swap-keys.md b/internal/mdgen/testdata/quoted-swap-keys.md new file mode 100644 index 0000000..33f29fd --- /dev/null +++ b/internal/mdgen/testdata/quoted-swap-keys.md @@ -0,0 +1,12 @@ +--- +extends: substitution +message: "Prefer '%s' over '%s'." +level: warning +--- + +# Quoted Keys + +```vale-swap +"e.g.": "for example" +"i.e.": "that is" +``` diff --git a/internal/mdgen/testdata/substitution.md b/internal/mdgen/testdata/substitution.md new file mode 100644 index 0000000..5b1cc51 --- /dev/null +++ b/internal/mdgen/testdata/substitution.md @@ -0,0 +1,22 @@ +--- +extends: substitution +level: warning +message: "Prefer '%s' over '%s'." +ignorecase: true +--- + +# Jargon + +Replace jargon with plain language alternatives. + +## Word substitutions + +```vale-swap +leverage: use +utilize: use +functionality: feature +in order to: to +at this point in time: now +``` + +Add entries conservatively. diff --git a/internal/mdgen/testdata/unknown-field.md b/internal/mdgen/testdata/unknown-field.md new file mode 100644 index 0000000..05cc002 --- /dev/null +++ b/internal/mdgen/testdata/unknown-field.md @@ -0,0 +1,12 @@ +--- +extends: existence +message: "'%s' is deprecated." +level: warning +reviewed_by: "Jane Smith" +--- + +# Unknown Field Test + +```vale-tokens +leverage +``` diff --git a/internal/mdgen/testdata/unsupported-extends.md b/internal/mdgen/testdata/unsupported-extends.md new file mode 100644 index 0000000..9999ec0 --- /dev/null +++ b/internal/mdgen/testdata/unsupported-extends.md @@ -0,0 +1,9 @@ +--- +extends: script +message: "Scripts are not supported in v1." +level: warning +--- + +# Script Rule + +This uses an unsupported extends type. diff --git a/internal/parser/frontmatter.go b/internal/parser/frontmatter.go new file mode 100644 index 0000000..110190d --- /dev/null +++ b/internal/parser/frontmatter.go @@ -0,0 +1,48 @@ +package parser + +import ( + "fmt" + "strings" +) + +// ExtractFrontmatter splits a Markdown document into its YAML frontmatter and +// body. It normalises CRLF to LF before processing. The returned frontmatter +// bytes do not include the --- delimiters. If no valid frontmatter is found, +// ExtractFrontmatter returns an error. +func ExtractFrontmatter(data []byte) (frontmatter []byte, body []byte, err error) { + const fence = "---" + + content := strings.ReplaceAll(string(data), "\r\n", "\n") + + if !strings.HasPrefix(content, fence) { + return nil, nil, fmt.Errorf("no frontmatter found") + } + + rest := content[len(fence):] + if len(rest) == 0 || rest[0] != '\n' { + return nil, nil, fmt.Errorf("no frontmatter found") + } + rest = rest[1:] // consume newline after opening --- + + // Handle empty frontmatter where closing fence is at start of rest. + var idx int + if strings.HasPrefix(rest, fence) { + idx = 0 + } else { + i := strings.Index(rest, "\n"+fence) + if i == -1 { + return nil, nil, fmt.Errorf("no closing frontmatter fence") + } + idx = i + 1 // skip the leading \n to point at --- + } + + fmRaw := rest[:idx] + bodyStr := rest[idx+len(fence):] + + // Consume optional trailing newline after the closing fence. + if len(bodyStr) > 0 && bodyStr[0] == '\n' { + bodyStr = bodyStr[1:] + } + + return []byte(fmRaw), []byte(bodyStr), nil +} diff --git a/internal/parser/parser.go b/internal/parser/parser.go index e8329bc..a9ae9a0 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -46,6 +46,11 @@ var knownFields = map[string]bool{ "filters": true, } +// IsKnownField reports whether name is a recognised Vale rule YAML key. +func IsKnownField(name string) bool { + return knownFields[name] +} + // rawRule is the intermediate YAML representation used during parsing. // Several fields use custom unmarshalers to handle Vale's polymorphic YAML // syntax (action, swap, scope, tokens). diff --git a/internal/parser/testdata/validate/invalid-extends.yml b/internal/parser/testdata/validate/invalid-extends.yml new file mode 100644 index 0000000..f35fd9a --- /dev/null +++ b/internal/parser/testdata/validate/invalid-extends.yml @@ -0,0 +1,3 @@ +extends: script +message: "test" +level: warning diff --git a/internal/parser/testdata/validate/invalid-level.yml b/internal/parser/testdata/validate/invalid-level.yml new file mode 100644 index 0000000..76ca9dd --- /dev/null +++ b/internal/parser/testdata/validate/invalid-level.yml @@ -0,0 +1,5 @@ +extends: existence +message: "test" +level: critical +tokens: + - foo diff --git a/internal/parser/testdata/validate/missing-message.yml b/internal/parser/testdata/validate/missing-message.yml new file mode 100644 index 0000000..5d87342 --- /dev/null +++ b/internal/parser/testdata/validate/missing-message.yml @@ -0,0 +1,4 @@ +extends: substitution +level: warning +swap: + leverage: use diff --git a/internal/parser/testdata/validate/missing-swap.yml b/internal/parser/testdata/validate/missing-swap.yml new file mode 100644 index 0000000..2f2014c --- /dev/null +++ b/internal/parser/testdata/validate/missing-swap.yml @@ -0,0 +1,3 @@ +extends: substitution +message: "Prefer '%s' over '%s'." +level: warning diff --git a/internal/parser/testdata/validate/multiple-errors.yml b/internal/parser/testdata/validate/multiple-errors.yml new file mode 100644 index 0000000..d27e719 --- /dev/null +++ b/internal/parser/testdata/validate/multiple-errors.yml @@ -0,0 +1,2 @@ +extends: substitution +level: warning diff --git a/internal/parser/testdata/validate/occurrence-max-only.yml b/internal/parser/testdata/validate/occurrence-max-only.yml new file mode 100644 index 0000000..66c3869 --- /dev/null +++ b/internal/parser/testdata/validate/occurrence-max-only.yml @@ -0,0 +1,5 @@ +extends: occurrence +message: "test" +level: warning +max: 10 +token: '\S+' diff --git a/internal/parser/testdata/validate/occurrence-min-zero.yml b/internal/parser/testdata/validate/occurrence-min-zero.yml new file mode 100644 index 0000000..144db1a --- /dev/null +++ b/internal/parser/testdata/validate/occurrence-min-zero.yml @@ -0,0 +1,5 @@ +extends: occurrence +message: "test" +level: warning +min: 0 +token: '\S+' diff --git a/internal/parser/testdata/validate/valid-capitalization.yml b/internal/parser/testdata/validate/valid-capitalization.yml new file mode 100644 index 0000000..1f693cd --- /dev/null +++ b/internal/parser/testdata/validate/valid-capitalization.yml @@ -0,0 +1,4 @@ +extends: capitalization +message: "'%s' should be sentence-cased." +level: warning +match: $sentence diff --git a/internal/parser/testdata/validate/valid-existence.yml b/internal/parser/testdata/validate/valid-existence.yml new file mode 100644 index 0000000..a574995 --- /dev/null +++ b/internal/parser/testdata/validate/valid-existence.yml @@ -0,0 +1,6 @@ +extends: existence +message: "'%s' is deprecated." +level: warning +tokens: + - leverage + - utilize diff --git a/internal/parser/testdata/validate/valid-occurrence.yml b/internal/parser/testdata/validate/valid-occurrence.yml new file mode 100644 index 0000000..bc82b9a --- /dev/null +++ b/internal/parser/testdata/validate/valid-occurrence.yml @@ -0,0 +1,5 @@ +extends: occurrence +message: "Too many words." +level: suggestion +max: 30 +token: '\S+' diff --git a/internal/parser/testdata/validate/valid-substitution.yml b/internal/parser/testdata/validate/valid-substitution.yml new file mode 100644 index 0000000..b6d680a --- /dev/null +++ b/internal/parser/testdata/validate/valid-substitution.yml @@ -0,0 +1,6 @@ +extends: substitution +message: "Prefer '%s' over '%s'." +level: warning +swap: + leverage: use + utilize: use diff --git a/internal/parser/validate.go b/internal/parser/validate.go new file mode 100644 index 0000000..568bec8 --- /dev/null +++ b/internal/parser/validate.go @@ -0,0 +1,307 @@ +package parser + +import ( + "fmt" + "os" + "strings" + + "go.yaml.in/yaml/v3" +) + +// ValidationError records a single structural issue found in a Vale rule file. +type ValidationError struct { + Field string // The YAML field that caused the error (e.g. "extends", "message"). + Message string // Human-readable description of the problem. + Severity string // "error" or "warning". +} + +// supportedExtendsV1 lists the extends types that ValidateRule performs +// type-specific checks for. +var supportedExtendsV1 = map[string]bool{ + ExtendsSubstitution: true, + ExtendsExistence: true, + ExtendsOccurrence: true, + ExtendsCapitalization: true, +} + +// validLevels lists the accepted values for the level field. +var validLevels = map[string]bool{ + "suggestion": true, + "warning": true, + "error": true, +} + +// ValidateRule reads a Vale YAML rule file and performs structural validation. +// It returns a slice of all validation errors found (empty when the rule is +// valid) and an error only for I/O or YAML-parse failures. +// +// Validation is performed against a map[string]interface{} rather than a typed +// struct so that key presence can be distinguished from Go zero values. +func ValidateRule(path string) ([]ValidationError, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading %s: %w", path, err) + } + return ValidateRuleBytes(data) +} + +// ValidateRuleBytes validates raw YAML bytes as a Vale rule. It performs the +// same structural checks as ValidateRule but without reading from disk. +func ValidateRuleBytes(data []byte) ([]ValidationError, error) { + var m map[string]interface{} + if err := yaml.Unmarshal(data, &m); err != nil { + return nil, fmt.Errorf("parsing YAML: %w", err) + } + + var errs []ValidationError + + // ── extends ───────────────────────────────────────────────────────────── + extendsVal, hasExtends := m["extends"] + if !hasExtends { + errs = append(errs, ValidationError{ + Field: "extends", + Message: "missing required field 'extends'", + Severity: "error", + }) + return errs, nil + } + + extendsStr, ok := extendsVal.(string) + if !ok || strings.TrimSpace(extendsStr) == "" { + errs = append(errs, ValidationError{ + Field: "extends", + Message: "field 'extends' must be a non-empty string", + Severity: "error", + }) + return errs, nil + } + + // ── message ───────────────────────────────────────────────────────────── + if msgVal, hasMsg := m["message"]; !hasMsg { + errs = append(errs, ValidationError{ + Field: "message", + Message: "missing required field 'message'", + Severity: "error", + }) + } else if msgStr, ok := msgVal.(string); !ok || strings.TrimSpace(msgStr) == "" { + errs = append(errs, ValidationError{ + Field: "message", + Message: "field 'message' must be a non-empty string", + Severity: "error", + }) + } + + // ── level ─────────────────────────────────────────────────────────────── + if levelVal, hasLevel := m["level"]; hasLevel { + levelStr, ok := levelVal.(string) + if !ok || !validLevels[levelStr] { + errs = append(errs, ValidationError{ + Field: "level", + Message: fmt.Sprintf("invalid level %q; must be one of: suggestion, warning, error", levelVal), + Severity: "error", + }) + } + } + + // ── unsupported extends ───────────────────────────────────────────────── + if !supportedExtendsV1[extendsStr] { + supported := []string{ + ExtendsSubstitution, + ExtendsExistence, + ExtendsOccurrence, + ExtendsCapitalization, + } + errs = append(errs, ValidationError{ + Field: "extends", + Message: fmt.Sprintf("unsupported extends type %q; supported types: %s", extendsStr, strings.Join(supported, ", ")), + Severity: "error", + }) + return errs, nil + } + + // ── type-specific checks ──────────────────────────────────────────────── + switch extendsStr { + case ExtendsSubstitution: + errs = append(errs, validateSubstitution(m)...) + case ExtendsExistence: + errs = append(errs, validateExistence(m)...) + case ExtendsOccurrence: + errs = append(errs, validateOccurrence(m)...) + case ExtendsCapitalization: + errs = append(errs, validateCapitalization(m)...) + } + + return errs, nil +} + +// validateSubstitution checks substitution-specific fields. +func validateSubstitution(m map[string]interface{}) []ValidationError { + var errs []ValidationError + + swapVal, hasSwap := m["swap"] + if !hasSwap { + errs = append(errs, ValidationError{ + Field: "swap", + Message: "substitution rule requires a non-empty 'swap' map", + Severity: "error", + }) + return errs + } + + switch v := swapVal.(type) { + case map[string]interface{}: + if len(v) == 0 { + errs = append(errs, ValidationError{ + Field: "swap", + Message: "substitution rule requires a non-empty 'swap' map", + Severity: "error", + }) + } else { + for key, val := range v { + if _, ok := val.(string); !ok { + errs = append(errs, ValidationError{ + Field: "swap", + Message: fmt.Sprintf("swap value for key %q must be a string, got %T", key, val), + Severity: "error", + }) + } + } + } + case []interface{}: + if len(v) == 0 { + errs = append(errs, ValidationError{ + Field: "swap", + Message: "substitution rule requires a non-empty 'swap' map", + Severity: "error", + }) + } else { + for i, item := range v { + m, ok := item.(map[string]interface{}) + if !ok { + errs = append(errs, ValidationError{ + Field: "swap", + Message: fmt.Sprintf("swap item [%d] must be a mapping, got %T", i, item), + Severity: "error", + }) + continue + } + for key, val := range m { + if _, ok := val.(string); !ok { + errs = append(errs, ValidationError{ + Field: "swap", + Message: fmt.Sprintf("swap value for key %q must be a string, got %T", key, val), + Severity: "error", + }) + } + } + } + } + default: + errs = append(errs, ValidationError{ + Field: "swap", + Message: fmt.Sprintf("'swap' must be a mapping, got %T", swapVal), + Severity: "error", + }) + } + + return errs +} + +// validateExistence checks existence-specific fields. +func validateExistence(m map[string]interface{}) []ValidationError { + var errs []ValidationError + + tokensVal, hasTokens := m["tokens"] + if !hasTokens { + errs = append(errs, ValidationError{ + Field: "tokens", + Message: "existence rule requires a non-empty 'tokens' list", + Severity: "error", + }) + return errs + } + + switch v := tokensVal.(type) { + case []interface{}: + if len(v) == 0 { + errs = append(errs, ValidationError{ + Field: "tokens", + Message: "existence rule requires a non-empty 'tokens' list", + Severity: "error", + }) + } + default: + errs = append(errs, ValidationError{ + Field: "tokens", + Message: fmt.Sprintf("'tokens' must be a list, got %T", tokensVal), + Severity: "error", + }) + } + + return errs +} + +// validateOccurrence checks occurrence-specific fields. +func validateOccurrence(m map[string]interface{}) []ValidationError { + var errs []ValidationError + + maxVal, hasMax := m["max"] + minVal, hasMin := m["min"] + if !hasMax && !hasMin { + errs = append(errs, ValidationError{ + Field: "max/min", + Message: "occurrence rule requires at least one of 'max' or 'min'", + Severity: "error", + }) + } + if hasMax { + if _, ok := maxVal.(int); !ok { + errs = append(errs, ValidationError{ + Field: "max", + Message: fmt.Sprintf("'max' must be an integer, got %T", maxVal), + Severity: "error", + }) + } + } + if hasMin { + if _, ok := minVal.(int); !ok { + errs = append(errs, ValidationError{ + Field: "min", + Message: fmt.Sprintf("'min' must be an integer, got %T", minVal), + Severity: "error", + }) + } + } + + tokenVal, hasToken := m["token"] + if !hasToken { + errs = append(errs, ValidationError{ + Field: "token", + Message: "occurrence rule requires a 'token' field", + Severity: "error", + }) + } else if tokenStr, ok := tokenVal.(string); !ok || strings.TrimSpace(tokenStr) == "" { + errs = append(errs, ValidationError{ + Field: "token", + Message: "field 'token' must be a non-empty string", + Severity: "error", + }) + } + + return errs +} + +// validateCapitalization checks capitalization-specific fields. +func validateCapitalization(m map[string]interface{}) []ValidationError { + var errs []ValidationError + + if _, hasMatch := m["match"]; !hasMatch { + errs = append(errs, ValidationError{ + Field: "match", + Message: "capitalization rule requires a 'match' field", + Severity: "error", + }) + } + + return errs +} diff --git a/internal/parser/validate_test.go b/internal/parser/validate_test.go new file mode 100644 index 0000000..003bcd9 --- /dev/null +++ b/internal/parser/validate_test.go @@ -0,0 +1,403 @@ +package parser_test + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/armstrongl/rulebound/internal/parser" +) + +// validateTestdata returns the absolute path to the validate testdata directory. +func validateTestdata(parts ...string) string { + _, file, _, _ := runtime.Caller(0) + dir := filepath.Join(filepath.Dir(file), "testdata", "validate") + return filepath.Join(append([]string{dir}, parts...)...) +} + +func TestValidateRule(t *testing.T) { + tests := []struct { + name string + file string + wantCount int // expected number of validation errors + wantField string // if non-empty, at least one error must reference this field + wantSubstr string // if non-empty, at least one error message must contain this + }{ + { + name: "valid substitution", + file: "valid-substitution.yml", + wantCount: 0, + }, + { + name: "valid existence", + file: "valid-existence.yml", + wantCount: 0, + }, + { + name: "valid occurrence", + file: "valid-occurrence.yml", + wantCount: 0, + }, + { + name: "valid capitalization", + file: "valid-capitalization.yml", + wantCount: 0, + }, + { + name: "missing message", + file: "missing-message.yml", + wantCount: 1, + wantField: "message", + wantSubstr: "message", + }, + { + name: "invalid extends (script)", + file: "invalid-extends.yml", + wantCount: 1, + wantField: "extends", + wantSubstr: "supported types", + }, + { + name: "substitution missing swap", + file: "missing-swap.yml", + wantCount: 1, + wantField: "swap", + wantSubstr: "swap", + }, + { + name: "invalid level", + file: "invalid-level.yml", + wantCount: 1, + wantField: "level", + wantSubstr: "critical", + }, + { + name: "occurrence with min zero is valid", + file: "occurrence-min-zero.yml", + wantCount: 0, + }, + { + name: "occurrence with only max is valid", + file: "occurrence-max-only.yml", + wantCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + errs, err := parser.ValidateRule(validateTestdata(tt.file)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(errs) != tt.wantCount { + t.Errorf("got %d validation errors, want %d: %+v", len(errs), tt.wantCount, errs) + } + + if tt.wantField != "" { + found := false + for _, ve := range errs { + if ve.Field == tt.wantField { + found = true + break + } + } + if !found { + t.Errorf("expected error referencing field %q, got: %+v", tt.wantField, errs) + } + } + + if tt.wantSubstr != "" { + found := false + for _, ve := range errs { + if strings.Contains(ve.Message, tt.wantSubstr) { + found = true + break + } + } + if !found { + t.Errorf("expected error message containing %q, got: %+v", tt.wantSubstr, errs) + } + } + }) + } +} + +func TestValidateRule_MultipleErrors(t *testing.T) { + errs, err := parser.ValidateRule(validateTestdata("multiple-errors.yml")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // multiple-errors.yml is missing both message and swap + if len(errs) < 2 { + t.Errorf("expected at least 2 validation errors, got %d: %+v", len(errs), errs) + } + + fields := make(map[string]bool) + for _, ve := range errs { + fields[ve.Field] = true + } + if !fields["message"] { + t.Errorf("expected error for field 'message', got fields: %v", fields) + } + if !fields["swap"] { + t.Errorf("expected error for field 'swap', got fields: %v", fields) + } +} + +func TestValidateRule_MissingExtends(t *testing.T) { + // Create a temp file with no extends field + dir := t.TempDir() + path := filepath.Join(dir, "no-extends.yml") + if err := writeTestFile(t, path, "message: test\nlevel: warning\n"); err != nil { + t.Fatalf("setup: %v", err) + } + + errs, err := parser.ValidateRule(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(errs) != 1 { + t.Fatalf("expected 1 error, got %d: %+v", len(errs), errs) + } + if errs[0].Field != "extends" { + t.Errorf("expected error for 'extends', got %q", errs[0].Field) + } +} + +func TestValidateRule_ExistenceEmptyTokens(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "empty-tokens.yml") + content := "extends: existence\nmessage: test\nlevel: warning\ntokens: []\n" + if err := writeTestFile(t, path, content); err != nil { + t.Fatalf("setup: %v", err) + } + + errs, err := parser.ValidateRule(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, ve := range errs { + if ve.Field == "tokens" { + found = true + break + } + } + if !found { + t.Errorf("expected error for empty tokens, got: %+v", errs) + } +} + +func TestValidateRule_OccurrenceMissingBothMaxMin(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "no-max-min.yml") + content := "extends: occurrence\nmessage: test\nlevel: warning\ntoken: '\\S+'\n" + if err := writeTestFile(t, path, content); err != nil { + t.Fatalf("setup: %v", err) + } + + errs, err := parser.ValidateRule(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, ve := range errs { + if ve.Field == "max/min" { + found = true + break + } + } + if !found { + t.Errorf("expected error for missing max/min, got: %+v", errs) + } +} + +func TestValidateRule_CapitalizationMissingMatch(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "no-match.yml") + content := "extends: capitalization\nmessage: test\nlevel: warning\n" + if err := writeTestFile(t, path, content); err != nil { + t.Fatalf("setup: %v", err) + } + + errs, err := parser.ValidateRule(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, ve := range errs { + if ve.Field == "match" { + found = true + break + } + } + if !found { + t.Errorf("expected error for missing match, got: %+v", errs) + } +} + +func TestValidateRule_ExtraFieldsNoError(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "extras.yml") + content := "extends: existence\nmessage: test\nlevel: warning\ntokens:\n - foo\ncustom_field: bar\n" + if err := writeTestFile(t, path, content); err != nil { + t.Fatalf("setup: %v", err) + } + + errs, err := parser.ValidateRule(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(errs) != 0 { + t.Errorf("extra unknown fields should not cause errors, got: %+v", errs) + } +} + +func TestValidateRule_ValidLevelError(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "level-error.yml") + content := "extends: existence\nmessage: test\nlevel: error\ntokens:\n - foo\n" + if err := writeTestFile(t, path, content); err != nil { + t.Fatalf("setup: %v", err) + } + + errs, err := parser.ValidateRule(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(errs) != 0 { + t.Errorf("level 'error' should be valid, got: %+v", errs) + } +} + +// ── Validate real Microsoft rules ─────────────────────────────────────────── + +func TestValidateRule_RealRules(t *testing.T) { + tests := []struct { + name string + file string + }{ + {"Avoid (existence)", "Avoid.yml"}, + {"Headings (capitalization)", "Headings.yml"}, + {"SentenceLength (occurrence)", "SentenceLength.yml"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path := testdata(tt.file) + errs, err := parser.ValidateRule(path) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(errs) != 0 { + t.Errorf("expected 0 validation errors for %s, got %d: %+v", tt.file, len(errs), errs) + } + }) + } +} + +func TestValidateRuleBytes_SwapNonStringValues(t *testing.T) { + content := "extends: substitution\nmessage: test\nlevel: warning\nswap:\n foo: 123\n" + errs, err := parser.ValidateRuleBytes([]byte(content)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, ve := range errs { + if ve.Field == "swap" && strings.Contains(ve.Message, "string") { + found = true + break + } + } + if !found { + t.Errorf("expected error for non-string swap value, got: %+v", errs) + } +} + +func TestValidateRuleBytes_SwapSequenceNonMapping(t *testing.T) { + content := "extends: substitution\nmessage: test\nlevel: warning\nswap:\n - notamap\n" + errs, err := parser.ValidateRuleBytes([]byte(content)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, ve := range errs { + if ve.Field == "swap" && strings.Contains(ve.Message, "mapping") { + found = true + break + } + } + if !found { + t.Errorf("expected error for non-mapping swap item, got: %+v", errs) + } +} + +func TestValidateRuleBytes_OccurrenceNonIntMax(t *testing.T) { + content := "extends: occurrence\nmessage: test\nlevel: warning\nmax: foo\ntoken: '\\S+'\n" + errs, err := parser.ValidateRuleBytes([]byte(content)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, ve := range errs { + if ve.Field == "max" && strings.Contains(ve.Message, "integer") { + found = true + break + } + } + if !found { + t.Errorf("expected error for non-integer max, got: %+v", errs) + } +} + +func TestValidateRuleBytes_OccurrenceNonStringToken(t *testing.T) { + content := "extends: occurrence\nmessage: test\nlevel: warning\nmax: 10\ntoken: 123\n" + errs, err := parser.ValidateRuleBytes([]byte(content)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + found := false + for _, ve := range errs { + if ve.Field == "token" && strings.Contains(ve.Message, "non-empty string") { + found = true + break + } + } + if !found { + t.Errorf("expected error for non-string token, got: %+v", errs) + } +} + +func TestExtractFrontmatter_EmptyFrontmatter(t *testing.T) { + data := []byte("---\n---\nBody here.\n") + fm, body, err := parser.ExtractFrontmatter(data) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(fm) != 0 { + t.Errorf("expected empty frontmatter, got %q", fm) + } + if string(body) != "Body here.\n" { + t.Errorf("body: got %q", body) + } +} + +// writeTestFile is a helper that writes content to a file, returning any error. +func writeTestFile(t *testing.T, path, content string) error { + t.Helper() + return os.WriteFile(path, []byte(content), 0o644) +}