From 1ff0f59fac783bbb4b5e2c276ed6aff0f296383f Mon Sep 17 00:00:00 2001 From: Ronald Tse Date: Thu, 18 Jun 2026 17:09:57 +0800 Subject: [PATCH] feat(schema): accept optional categories field (issue #241) Adds structured font classification to formula YAML: categories: style: sans-serif # serif | sans-serif | monospace | display | script | handwriting | decorative script: [latin, cjk] # string or array: latin | cjk | arabic | cyrillic | hebrew | devanagari | thai | other variable: false # optional boolean (inferred from resources.format if omitted) use_case: body # body | heading | code | ui | decorative | caption Why: the docs site's browse filters and formula detail page badges need structured data to enable filtering by 'show me monospace fonts' or 'show me CJK fonts'. Currently formulas have no way to express what they ARE. Validation behavior: - Field is optional (warn-if-missing is NOT enforced) - If present, must be a mapping (hash) - Values outside the controlled vocabulary emit a WARNING, not error, so the vocabulary can evolve without breaking existing formulas - script accepts string or array (CJK fonts often support multiple) - variable is validated as boolean if present This is the schema layer only (Layer 1 of 3 in TODO.clean/09): - Layer 2 (data migration): apply categories to ~10 well-known fonts as proof of concept; full migration is ongoing community effort - Layer 3 (UX): add category facet to FormulaBrowser filter sidebar and category badges to formula detail page (after redesigns land) Documented in docs/guide/formula-structure.md (public) and CLAUDE.md (local). Schema follows the proposal in issue #241. --- .github/scripts/validate_schema.rb | 50 ++++++++++++++++++++++++++++++ docs/guide/formula-structure.md | 23 ++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/.github/scripts/validate_schema.rb b/.github/scripts/validate_schema.rb index 05f69e287..cd6e1eaa3 100755 --- a/.github/scripts/validate_schema.rb +++ b/.github/scripts/validate_schema.rb @@ -17,6 +17,10 @@ class ValidateSchema # But we should warn if it's missing for clarity SCHEMA_V5_OPTIONAL_BUT_RECOMMENDED = %w[name description].freeze + CATEGORY_STYLES = %w[serif sans-serif monospace display script handwriting decorative].freeze + CATEGORY_SCRIPTS = %w[latin cjk arabic cyrillic hebrew devanagari thai other].freeze + CATEGORY_USE_CASES = %w[body heading code ui decorative caption].freeze + def initialize(args) OptionParser.new do |opts| opts.banner = "Usage: ruby validate_schema.rb [options]" @@ -77,6 +81,8 @@ def validate_file(file) validate_fonts_or_collections(file, content) + validate_categories(file, content) + # Common validations validate_fonts(file, content) validate_naming(file, content) @@ -116,6 +122,50 @@ def validate_v5_schema(file, content) end end + def validate_categories(file, content) + categories = content["categories"] + return unless categories + + unless categories.is_a?(Hash) + add_error(file, "categories must be a mapping (hash), got #{categories.class}") + return + end + + validate_category_enum(file, categories, "style", CATEGORY_STYLES, array: false) + validate_category_enum(file, categories, "script", CATEGORY_SCRIPTS, array: true) + validate_category_enum(file, categories, "use_case", CATEGORY_USE_CASES, array: false) + + return unless categories.key?("variable") && + ![true, false].include?(categories["variable"]) + + add_error(file, "categories.variable must be boolean, " \ + "got #{categories["variable"].inspect}") + end + + def validate_category_enum(file, categories, key, allowed, array:) + return unless categories.key?(key) + + value = categories[key] + values = array ? normalize_array(value, key) : [value] + return if values.nil? + + values.each do |v| + next if allowed.include?(v) + + add_warning(file, "categories.#{key} unknown value '#{v}' " \ + "— allowed: #{allowed.join(', ')}") + end + end + + def normalize_array(value, key) + return [value] unless value.is_a?(Array) + + value + rescue StandardError + add_error(file, "categories.#{key} must be string or array") + nil + end + def validate_v5_resource(file, name, resource) # Google-sourced resources if resource["source"] == "google" diff --git a/docs/guide/formula-structure.md b/docs/guide/formula-structure.md index 8b2f02d7b..047ec2e8c 100644 --- a/docs/guide/formula-structure.md +++ b/docs/guide/formula-structure.md @@ -147,6 +147,29 @@ extract: format: gzip # For GZIP files ``` +## Categories + +Optional structured classification used by the docs site's browse filters and the formula detail page badges. All fields are optional — omit the ones you don't know. + +```yaml +categories: + style: sans-serif # Primary style classification + script: [latin, cjk] # Supported scripts (string or array) + variable: false # Whether the font is a variable font + use_case: body # Intended primary use case +``` + +### Controlled Vocabulary + +| Field | Type | Allowed Values | +|-------|------|----------------| +| `style` | string | `serif`, `sans-serif`, `monospace`, `display`, `script`, `handwriting`, `decorative` | +| `script` | string or array | `latin`, `cjk`, `arabic`, `cyrillic`, `hebrew`, `devanagari`, `thai`, `other` | +| `variable` | boolean | `true`, `false` (inferred from `resources.format` if omitted) | +| `use_case` | string | `body`, `heading`, `code`, `ui`, `decorative`, `caption` | + +Values outside this vocabulary emit a warning (not error) during schema validation, so the vocabulary can evolve as new fonts are added. + ## Example: Complete Formula ```yaml