From b90f58e79d284671d5325401e2584eb17a74001e Mon Sep 17 00:00:00 2001 From: xinz Date: Wed, 13 May 2026 23:59:05 +0800 Subject: [PATCH 1/5] Add JSONSchex.Ref API --- CHANGELOG.md | 7 + guide/loader.md | 2 + guide/ref.md | 202 ++++++ lib/jsonschex.ex | 8 +- lib/jsonschex/compiler.ex | 15 +- lib/jsonschex/ref.ex | 968 ++++++++++++++++++++++++++ lib/jsonschex/scope_scanner.ex | 25 +- lib/jsonschex/validator/reference.ex | 159 ++++- mix.exs | 4 + test/ref_internal_regression_test.exs | 77 ++ test/ref_test.exs | 279 ++++++++ 11 files changed, 1715 insertions(+), 31 deletions(-) create mode 100644 guide/ref.md create mode 100644 lib/jsonschex/ref.ex create mode 100644 test/ref_internal_regression_test.exs create mode 100644 test/ref_test.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d51838..fc1cc2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## Unreleased + +### Bug Fixes and Improvements + + * Add a new public structural `$ref` API in `JSONSchex.Ref` for discovery (`scan/2`), single-step resolution (`resolve/3`), and transitive traversal with cycle reporting (`walk/2`) + * Improve internal scope scanning so `contentSchema` is traversed for nested `$id`, anchor, and local `$ref` discovery + ## v0.6.0 (2026-05-09) ### Bug Fixes and Improvements diff --git a/guide/loader.md b/guide/loader.md index 7bea420..88c2f2c 100644 --- a/guide/loader.md +++ b/guide/loader.md @@ -2,6 +2,8 @@ This guide explains how JSONSchex resolves remote references and how to supply an external loader when compiling schemas. +If you need low-level structural `$ref` discovery and traversal before compilation, see the [Structural `$ref` guide](ref.md) and `JSONSchex.Ref`. That API uses a related but distinct loader contract. + ## Overview JSONSchex supports: diff --git a/guide/ref.md b/guide/ref.md new file mode 100644 index 0000000..d12c763 --- /dev/null +++ b/guide/ref.md @@ -0,0 +1,202 @@ +# Structural `$ref` Guide + +This guide explains the low-level reference discovery helpers in `JSONSchex.Ref`. + +Unlike `JSONSchex.compile/2`, this API is intentionally **policy-free**. It does not rewrite documents, merge sibling keywords, or apply OpenAPI-specific reference semantics. Instead, it exposes reusable mechanics for: + +- discovering `$ref` occurrences in nested maps and lists +- resolving local and external references +- tracking source and base URI context +- walking the transitive `$ref` graph +- detecting cycles during transitive traversal + +Use this API when you need to inspect or normalize documents **before** compilation, or when your application owns its own reference expansion policy. + +## Overview + +`JSONSchex.Ref` exposes three main entry points: + +- `scan/2` — discover structural `$ref` locations +- `resolve/3` — resolve one location or raw ref string +- `walk/2` — traverse reachable `$ref` targets transitively + +## `scan/2` + +`scan/2` walks nested maps and lists structurally and returns a list of `%JSONSchex.Ref.Location{}` values. + +Each location includes: + +- `:raw_ref` — original `$ref` string +- `:path` — path to the `$ref` key within the scanned document +- `:source` — caller-supplied source identifier +- `:base_uri` — effective base URI at that location, honoring nested `$id` +- `:absolute_uri` — resolved target URI when it can be derived +- `:fragment` — target fragment without the leading `#` + +Example: + +```elixir +root = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "user" => %{ + "$id" => "schemas/user.json", + "schema" => %{"$ref" => "#/$defs/name"}, + "$defs" => %{ + "name" => %{"type" => "string"} + } + } + } +} + +[location] = JSONSchex.Ref.scan(root) + +location.raw_ref +#=> "#/$defs/name" + +location.path +#=> ["$defs", "user", "schema", "$ref"] + +location.base_uri +#=> "https://example.com/schemas/user.json" + +location.absolute_uri +#=> "https://example.com/schemas/user.json#/$defs/name" +``` + +## `resolve/3` + +`resolve/3` resolves one ref from a given document context. + +You can pass either: + +- a raw reference string +- a `%JSONSchex.Ref.Location{}` returned by `scan/2` + +Passing a `Location` is usually the better choice because it preserves nested `$id` scoping. + +### Loader contract + +External documents are loaded through `:loader` or `:external_loader`. + +The loader receives a **document URI without the fragment** and may return either: + +- `{:ok, document}` +- `{:ok, %{document: document, source: source}}` +- `{:error, term}` + +Example: + +```elixir +root = %{ + "user" => %{"$ref" => "schemas/common.json#/$defs/id"} +} + +loader = fn uri -> + case uri do + "specs/schemas/common.json" -> + {:ok, + %{ + document: %{ + "$defs" => %{ + "id" => %{"type" => "string"} + } + }, + source: uri + }} + + _ -> + {:error, :enoent} + end +end + +[location] = JSONSchex.Ref.scan(root, source: "specs/root.json") + +{:ok, resolution} = + JSONSchex.Ref.resolve(root, location, + source: "specs/root.json", + loader: loader + ) + +resolution.target_uri +#=> "specs/schemas/common.json#/$defs/id" + +resolution.target_pointer +#=> "#/$defs/id" + +resolution.target_value +#=> %{"type" => "string"} +``` + +### Built-in Draft 2020-12 resources + +Bundled Draft 2020-12 resources can be resolved without a custom loader. + +```elixir +root = %{ + "$ref" => "https://json-schema.org/draft/2020-12/meta/core#/$defs/uriString" +} + +[location] = JSONSchex.Ref.scan(root) +{:ok, resolution} = JSONSchex.Ref.resolve(root, location) + +resolution.target_value +#=> %{"type" => "string", "format" => "uri"} +``` + +## `walk/2` + +`walk/2` performs a depth-first transitive traversal over reachable `$ref` targets. + +It returns `{:ok, events}` where `events` is an ordered list of: + +- `%JSONSchex.Ref.Resolution{}` +- `%JSONSchex.Ref.Error{}` +- `%JSONSchex.Ref.Cycle{}` + +This makes `walk/2` inspection-oriented rather than fail-fast: you can see successful edges, missing targets, and cycles in one result. + +### Cycle handling + +When a resolved target would recurse into an already-active target, `walk/2` emits `%JSONSchex.Ref.Cycle{}` and stops expanding that branch. + +### External document caching + +Within a single `walk/2` call, externally loaded documents are cached internally by document URI. Repeated edges still emit their own resolution events, but loader calls are not repeated for the same external resource. + +Example: + +```elixir +root = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "a" => %{"$ref" => "#/$defs/b"}, + "b" => %{"$ref" => "#/$defs/a"} + }, + "start" => %{"$ref" => "#/$defs/a"} +} + +{:ok, events} = JSONSchex.Ref.walk(root, base_uri: "https://example.com/root.json") + +Enum.map(events, & &1.__struct__) +#=> [JSONSchex.Ref.Resolution, JSONSchex.Ref.Resolution, JSONSchex.Ref.Cycle, ...] +``` + +## Structured errors + +`resolve/3` and `walk/2` use `%JSONSchex.Ref.Error{}` for resolution failures. + +Current error kinds are: + +- `:invalid_ref` +- `:missing_document` +- `:missing_target` +- `:invalid_loader_response` + +These errors preserve the originating location and target URI when available, making them useful for downstream diagnostics. + +## Choosing between APIs + +Use `JSONSchex.compile/2` when you want validation-ready compiled schemas. + +Use `JSONSchex.Ref` when you want structural facts and traversal mechanics, but your application will decide what to do with those facts. diff --git a/lib/jsonschex.ex b/lib/jsonschex.ex index 6a5811c..0df304c 100644 --- a/lib/jsonschex.ex +++ b/lib/jsonschex.ex @@ -26,6 +26,11 @@ defmodule JSONSchex do iex> JSONSchex.format_error(error) ~s(Invalid email format: "not-an-email") + For structural `$ref` discovery and traversal before compilation, see: + + - `JSONSchex.Ref` + - the [Structural `$ref` guide](guide/ref.md) + For compile-time schema embedding, see: - `JSONSchex.Schema` for the `compile!/2` macro @@ -48,7 +53,8 @@ defmodule JSONSchex do - `:format_assertion` — Enable strict `format` validation (default: `false`) - `:content_assertion` — Enable strict content vocabulary validation (default: `false`) - See the [Loader guide](guide/loader.md) and + See the [Loader guide](guide/loader.md), + [Structural `$ref` guide](guide/ref.md), and [Content and Format guide](guide/content_and_format.md) for details. ## Examples diff --git a/lib/jsonschex/compiler.ex b/lib/jsonschex/compiler.ex index 3eb56d1..4c479e8 100644 --- a/lib/jsonschex/compiler.ex +++ b/lib/jsonschex/compiler.ex @@ -97,7 +97,13 @@ defmodule JSONSchex.Compiler do end) resolved_runtime_defs = - resolve_refs(raw_schema, MapSet.to_list(explicit_refs), root_vocabs, ctx) + resolve_refs( + raw_schema, + MapSet.to_list(explicit_refs), + root_compiled.source_id, + root_vocabs, + ctx + ) case merge_defs(full_defs, resolved_runtime_defs) do {:error, _} = error -> @@ -154,7 +160,9 @@ defmodule JSONSchex.Compiler do {:ok, current_vocabs} end - defp resolve_refs(raw_schema, refs, vocabs, ctx) do + defp resolve_refs(raw_schema, refs, _base_uri, vocabs, ctx) do + refs = Enum.filter(refs, &local_pointer_ref?/1) + ExJSONPointer.batch_resolve_reduce(raw_schema, refs, %{}, fn ref, result, acc -> case result do {:ok, fragment} -> @@ -172,6 +180,9 @@ defmodule JSONSchex.Compiler do end) end + defp local_pointer_ref?("#/" <> _), do: true + defp local_pointer_ref?(_), do: false + defp compile_schema_node(true, _id, _vocabs, ctx) do {:ok, %Schema{ diff --git a/lib/jsonschex/ref.ex b/lib/jsonschex/ref.ex new file mode 100644 index 0000000..d2cfc35 --- /dev/null +++ b/lib/jsonschex/ref.ex @@ -0,0 +1,968 @@ +defmodule JSONSchex.Ref do + @moduledoc """ + Structural `$ref` discovery and resolution helpers. + + This module exposes low-level, policy-free building blocks for downstream + tooling that needs to inspect or resolve references before schema + compilation. + + Unlike `JSONSchex.compile/2`, this API does **not** rewrite documents, + interpret OpenAPI-specific behavior, or apply merge policy. It focuses on the + mechanical side of references: + + - structurally scanning nested maps and lists for `$ref` entries + - tracking the effective base URI at each location, honoring nested `$id` + - resolving local JSON Pointer and anchor references + - resolving external references through a caller-provided loader + - traversing the transitive `$ref` graph with cycle detection + - preserving source metadata for downstream diagnostics + + See the [Structural `$ref` guide](guide/ref.md) for a longer walkthrough. + + ## Main entry points + + - `scan/2` returns `%Location{}` values for every structural `$ref` + - `resolve/3` resolves one location or raw ref string into a `%Resolution{}` + - `walk/2` performs a depth-first transitive traversal and returns ordered + `%Resolution{}`, `%Error{}`, and `%Cycle{}` events + + ## Options + + - `:source` — source identifier for the root document + - `:base_uri` — explicit starting base URI override + - `:loader` — `(document_uri -> {:ok, document} | {:ok, %{document: document, source: source}} | {:error, term()})` + - `:external_loader` — accepted as an alias for `:loader` + + When resolving a bare reference string, resolution starts from the root + document context. To preserve nested `$id` scope, prefer passing a scanned + `%Location{}` into `resolve/3`. + + ## Example + + iex> document = %{ + ...> "$id" => "https://example.com/root.json", + ...> "$defs" => %{ + ...> "user" => %{ + ...> "$id" => "schemas/user.json", + ...> "$defs" => %{"name" => %{"type" => "string"}}, + ...> "schema" => %{"$ref" => "#/$defs/name"} + ...> } + ...> } + ...> } + iex> [location] = JSONSchex.Ref.scan(document) + iex> location.absolute_uri + "https://example.com/schemas/user.json#/$defs/name" + iex> {:ok, resolution} = JSONSchex.Ref.resolve(document, location, base_uri: "https://example.com/root.json") + iex> resolution.target_value + %{"type" => "string"} + """ + + alias JSONSchex.Draft202012.Schemas + alias JSONSchex.URIUtil + + @type path_segment :: String.t() | non_neg_integer() + @type path :: [path_segment] + @type source :: term() + @type document :: map() | list() | boolean() + + @typedoc "A document loader used for external reference resolution." + @type loader_result :: + {:ok, document()} + | {:ok, %{required(:document) => document(), optional(:source) => source()}} + | {:error, term()} + + @type loader :: (String.t() -> loader_result()) + + defmodule Location do + @moduledoc """ + A discovered `$ref` location. + + The `path` is reported from the root of the scanned document to the `$ref` + key itself. + """ + + @enforce_keys [:raw_ref, :path] + defstruct [ + :raw_ref, + :path, + :source, + :base_uri, + :absolute_uri, + :fragment + ] + + @type t :: %__MODULE__{ + raw_ref: String.t(), + path: JSONSchex.Ref.path(), + source: JSONSchex.Ref.source() | nil, + base_uri: String.t() | nil, + absolute_uri: String.t() | nil, + fragment: String.t() | nil + } + end + + defmodule Resolution do + @moduledoc """ + The result of resolving a single `$ref` location. + + `target_document` is the resolved target resource root. For embedded + resources introduced by nested `$id`, this is the local subschema/resource + rather than the original root document. + """ + + @enforce_keys [:location, :target_source, :target_document, :target_value] + defstruct [ + :location, + :target_uri, + :target_source, + :target_document, + :target_value, + :target_pointer + ] + + @type t :: %__MODULE__{ + location: JSONSchex.Ref.Location.t(), + target_uri: String.t() | nil, + target_source: JSONSchex.Ref.source() | nil, + target_document: JSONSchex.Ref.document(), + target_value: term(), + target_pointer: String.t() | nil + } + end + + defmodule Error do + @moduledoc """ + Structured ref resolution error. + """ + + @enforce_keys [:kind] + defstruct [ + :kind, + :location, + :target_uri, + :details + ] + + @type kind :: :invalid_ref | :missing_document | :missing_target | :invalid_loader_response + + @type t :: %__MODULE__{ + kind: kind(), + location: JSONSchex.Ref.Location.t() | nil, + target_uri: String.t() | nil, + details: term() + } + end + + defmodule Cycle do + @moduledoc """ + A cycle detected while transitively walking `$ref` targets. + """ + + @enforce_keys [:location, :target_uri, :trail] + defstruct [ + :location, + :target_uri, + :trail + ] + + @type t :: %__MODULE__{ + location: JSONSchex.Ref.Location.t(), + target_uri: String.t(), + trail: [String.t()] + } + end + + @typedoc "Ordered event emitted by `walk/2`." + @type walk_event :: Resolution.t() | Error.t() | Cycle.t() + + @doc """ + Recursively scans a document for `$ref` locations. + + The traversal is structural: nested maps and lists are walked regardless of + keyword meaning. + + Nested `$id` values update the effective `base_uri` recorded on each returned + `%Location{}`. + + ## Example + + iex> document = %{ + ...> "$id" => "https://example.com/root.json", + ...> "child" => %{ + ...> "$id" => "schemas/user.json", + ...> "schema" => %{"$ref" => "#/$defs/name"} + ...> } + ...> } + iex> [location] = JSONSchex.Ref.scan(document) + iex> location.path + ["child", "schema", "$ref"] + iex> location.base_uri + "https://example.com/schemas/user.json" + """ + @spec scan(document(), keyword()) :: [Location.t()] + def scan(document, opts \\ []) + when is_map(document) or is_list(document) or is_boolean(document) do + source = Keyword.get(opts, :source) + base_uri = initial_base_uri(opts, source) + + document + |> do_scan([], source, base_uri, []) + |> Enum.reverse() + end + + @doc """ + Resolves a single `$ref` from the given document context. + + Passing a scanned `%Location{}` preserves nested `$id` scope. Passing a raw + reference string resolves from the root document context derived from `opts`. + + External documents are loaded through `:loader` or `:external_loader`. The + loader receives the resolved document URI without the fragment and may return + either a document directly or `%{document: document, source: source}`. + + ## Example + + iex> document = %{ + ...> "$defs" => %{"name" => %{"type" => "string"}}, + ...> "schema" => %{"$ref" => "#/$defs/name"} + ...> } + iex> [location] = JSONSchex.Ref.scan(document) + iex> {:ok, resolution} = JSONSchex.Ref.resolve(document, location) + iex> resolution.target_pointer + "#/$defs/name" + iex> resolution.target_value + %{"type" => "string"} + """ + @spec resolve(document(), String.t() | Location.t(), keyword()) :: + {:ok, Resolution.t()} | {:error, Error.t()} + def resolve(document, ref_or_location, opts \\ []) + + def resolve(document, %Location{} = location, opts) + when is_map(document) or is_list(document) or is_boolean(document) do + {result, _cache} = resolve_location(document, location, opts, %{}) + result + end + + def resolve(document, ref, opts) + when (is_map(document) or is_list(document) or is_boolean(document)) and is_binary(ref) do + source = Keyword.get(opts, :source) + root_base_uri = initial_base_uri(opts, source) + + location = + normalize_location( + %Location{raw_ref: ref, path: [], source: source, base_uri: root_base_uri}, + source, + root_base_uri + ) + + resolve(document, location, opts) + end + + def resolve(_document, _ref_or_location, _opts) do + {:error, %Error{kind: :invalid_ref, details: :expected_binary_ref_or_location}} + end + + @doc """ + Transitively walks reachable `$ref` targets in depth-first order. + + The returned event list contains: + + - `%Resolution{}` for each successfully resolved location + - `%Error{}` for each location that failed to resolve + - `%Cycle{}` when a resolved target would recurse into an already-active trail + + Shared targets are only expanded once, but every location still produces its + own `%Resolution{}` event. + + This function is inspection-oriented rather than fail-fast: successful edges, + missing targets, and cycles are all returned in the same ordered result. + + ## Example + + iex> document = %{ + ...> "$id" => "https://example.com/root.json", + ...> "$defs" => %{ + ...> "a" => %{"$ref" => "#/$defs/b"}, + ...> "b" => %{"$ref" => "#/$defs/a"} + ...> }, + ...> "start" => %{"$ref" => "#/$defs/a"} + ...> } + iex> {:ok, events} = JSONSchex.Ref.walk(document, base_uri: "https://example.com/root.json") + iex> Enum.any?(events, &match?(%JSONSchex.Ref.Cycle{}, &1)) + true + """ + @spec walk(document(), keyword()) :: {:ok, [walk_event()]} + def walk(document, opts \\ []) + when is_map(document) or is_list(document) or is_boolean(document) do + source = Keyword.get(opts, :source) + base_uri = initial_base_uri(opts, source) + loader = loader_from_opts(opts) + + state = %{ + events: [], + active: MapSet.new(), + expanded: MapSet.new(), + seen_locations: MapSet.new(), + cache: %{} + } + + state = walk_document(document, document, source, base_uri, loader, state, [], []) + + {:ok, Enum.reverse(state.events)} + end + + defp resolve_location(document, %Location{} = location, opts, cache) do + source = location.source || Keyword.get(opts, :source) + root_base_uri = initial_base_uri(opts, source) + loader = loader_from_opts(opts) + + location = normalize_location(location, source, root_base_uri) + index = build_index(document, source, root_base_uri) + + case resolve_target(index, location, loader, cache) do + {:ok, target, updated_cache} -> + {{:ok, build_resolution(location, target)}, updated_cache} + + {:error, %Error{} = error, updated_cache} -> + {{:error, error}, updated_cache} + end + end + + defp walk_document( + scan_document, + resolve_document, + source, + base_uri, + loader, + state, + trail, + path_prefix + ) do + scan(scan_document, source: source, base_uri: base_uri) + |> Enum.reduce(state, fn location, acc_state -> + location = prefix_location_path(location, path_prefix) + seen? = seen_location?(acc_state, location) + acc_state = if seen?, do: acc_state, else: mark_seen_location(acc_state, location) + + opts = [source: source, base_uri: base_uri, loader: loader] + {result, cache} = resolve_location(resolve_document, location, opts, acc_state.cache) + acc_state = %{acc_state | cache: cache} + + case result do + {:error, %Error{} = error} -> + if seen?, do: acc_state, else: push_event(acc_state, error) + + {:ok, %Resolution{} = resolution} -> + acc_state = if seen?, do: acc_state, else: push_event(acc_state, resolution) + maybe_walk_resolution(acc_state, resolution, loader, trail) + end + end) + end + + defp maybe_walk_resolution(state, %Resolution{} = resolution, loader, trail) do + target_uri = resolution.target_uri + + cond do + not walkable_document?(resolution.target_value) -> + state + + not is_binary(target_uri) -> + state + + MapSet.member?(state.active, target_uri) -> + push_event(state, %Cycle{ + location: resolution.location, + target_uri: target_uri, + trail: Enum.reverse([target_uri | trail]) + }) + + same_source_resource_root?(resolution) -> + state + + MapSet.member?(state.expanded, target_uri) -> + state + + true -> + next_state = %{state | active: MapSet.put(state.active, target_uri)} + + next_state = + walk_document( + resolution.target_value, + resolution.target_document, + resolution.target_source, + next_base_uri(resolution), + loader, + next_state, + [target_uri | trail], + path_prefix_from_resolution(resolution) + ) + + %{ + next_state + | active: MapSet.delete(next_state.active, target_uri), + expanded: MapSet.put(next_state.expanded, target_uri) + } + end + end + + defp push_event(state, event) do + %{state | events: [event | state.events]} + end + + defp prefix_location_path(%Location{} = location, []), do: location + + defp prefix_location_path(%Location{} = location, prefix) when is_list(prefix) do + %{location | path: prefix ++ location.path} + end + + defp seen_location?(state, %Location{} = location) do + MapSet.member?(state.seen_locations, location_key(location)) + end + + defp mark_seen_location(state, %Location{} = location) do + %{state | seen_locations: MapSet.put(state.seen_locations, location_key(location))} + end + + defp location_key(%Location{} = location) do + {location.source, location.base_uri, location.path, location.absolute_uri} + end + + defp path_prefix_from_resolution(%Resolution{target_pointer: target_pointer}) do + pointer_to_path(target_pointer) + end + + defp same_source_resource_root?(%Resolution{} = resolution) do + resolution.target_value === resolution.target_document and + resolution.target_source == resolution.location.source + end + + defp next_base_uri(%Resolution{target_uri: target_uri, target_source: target_source}) do + cond do + is_binary(target_uri) -> + base_of(target_uri) + + is_binary(target_source) -> + target_source + + true -> + nil + end + end + + defp walkable_document?(value) when is_map(value) or is_list(value) or is_boolean(value), + do: true + + defp walkable_document?(_), do: false + + defp build_resolution(location, target) do + %Resolution{ + location: location, + target_uri: location.absolute_uri, + target_source: target.source, + target_document: target.document, + target_value: target.value, + target_pointer: target.pointer + } + end + + defp normalize_location(%Location{} = location, source, root_base_uri) do + base_uri = location.base_uri || root_base_uri + absolute_uri = location.absolute_uri || resolve_reference(base_uri, location.raw_ref) + + %Location{ + location + | source: location.source || source, + base_uri: base_uri, + absolute_uri: absolute_uri, + fragment: location.fragment || fragment_of(absolute_uri || location.raw_ref) + } + end + + defp do_scan(value, _path, _source, _base_uri, acc) + when is_boolean(value) or is_binary(value) or is_number(value) or is_nil(value), + do: acc + + defp do_scan(list, path, source, base_uri, acc) when is_list(list) do + list + |> Enum.with_index() + |> Enum.reduce(acc, fn {item, index}, inner_acc -> + do_scan(item, path ++ [index], source, base_uri, inner_acc) + end) + end + + defp do_scan(map, path, source, base_uri, acc) when is_map(map) do + effective_base_uri = effective_base_uri(base_uri, map) + + acc = + case Map.get(map, "$ref") do + ref when is_binary(ref) -> + absolute_uri = resolve_reference(effective_base_uri, ref) + + [ + %Location{ + raw_ref: ref, + path: path ++ ["$ref"], + source: source, + base_uri: effective_base_uri, + absolute_uri: absolute_uri, + fragment: fragment_of(absolute_uri || ref) + } + | acc + ] + + _ -> + acc + end + + map + |> Enum.sort_by(&sort_entry/1) + |> Enum.reduce(acc, fn {key, value}, inner_acc -> + do_scan(value, path ++ [key], source, effective_base_uri, inner_acc) + end) + end + + defp build_index(document, source, base_uri) do + index = %{resources: %{}, anchors: %{}} + + do_build_index(document, [], source, base_uri, document, index) + end + + defp do_build_index(value, path, source, base_uri, _resource_document, index) + when is_boolean(value) do + resource_key = resource_key(base_uri) + + if path == [] do + put_resource(index, resource_key, %{ + base_uri: resource_key, + document: value, + path: path, + source: source + }) + else + index + end + end + + defp do_build_index(value, _path, _source, _base_uri, _resource_document, index) + when is_binary(value) or is_number(value) or is_nil(value), + do: index + + defp do_build_index(list, path, source, base_uri, resource_document, index) + when is_list(list) do + resource_document = if path == [], do: list, else: resource_document + + index = + if path == [] do + put_resource(index, resource_key(base_uri), %{ + base_uri: resource_key(base_uri), + document: resource_document, + path: path, + source: source + }) + else + index + end + + list + |> Enum.with_index() + |> Enum.reduce(index, fn {item, index_value}, inner_index -> + do_build_index( + item, + path ++ [index_value], + source, + base_uri, + resource_document, + inner_index + ) + end) + end + + defp do_build_index(map, path, source, base_uri, resource_document, index) when is_map(map) do + effective_base_uri = effective_base_uri(base_uri, map) + + resource_document = + if path == [] or is_binary(Map.get(map, "$id")), do: map, else: resource_document + + index = + if path == [] or is_binary(Map.get(map, "$id")) do + put_resource(index, resource_key(effective_base_uri), %{ + base_uri: resource_key(effective_base_uri), + document: resource_document, + path: path, + source: source + }) + else + index + end + + index = + index + |> put_anchor(map, "$anchor", effective_base_uri, path, source, resource_document) + |> put_anchor(map, "$dynamicAnchor", effective_base_uri, path, source, resource_document) + + map + |> Enum.sort_by(&sort_entry/1) + |> Enum.reduce(index, fn {key, value}, inner_index -> + do_build_index( + value, + path ++ [key], + source, + effective_base_uri, + resource_document, + inner_index + ) + end) + end + + defp put_resource(index, key, resource) do + update_in(index.resources, &Map.put_new(&1, key, resource)) + end + + defp put_anchor(index, map, keyword, base_uri, path, source, resource_document) do + case Map.get(map, keyword) do + anchor when is_binary(anchor) -> + anchor_uri = with_optional_fragment(base_uri, anchor) + + entry = %{ + absolute_uri: anchor_uri, + base_uri: resource_key(base_uri), + document: resource_document, + path: path, + source: source, + value: map + } + + update_in(index.anchors, &Map.put_new(&1, anchor_uri, entry)) + + _ -> + index + end + end + + defp resolve_target(index, %Location{} = location, loader, cache) do + case split_target(location.absolute_uri || location.raw_ref) do + {:ok, target_base_uri, fragment} -> + case Map.get(index.resources, target_base_uri) do + nil -> + resolve_external_target(target_base_uri, fragment, location, loader, cache) + + resource -> + with_cache(resolve_within_index(index, resource, fragment, location), cache) + end + + :error -> + {:error, %Error{kind: :invalid_ref, location: location, details: location.raw_ref}, cache} + end + end + + defp resolve_external_target(target_base_uri, _fragment, location, _loader, cache) + when target_base_uri in [nil, ""] do + {:error, + %Error{ + kind: :missing_target, + location: location, + target_uri: location.absolute_uri, + details: :unknown_local_resource + }, cache} + end + + defp resolve_external_target(target_base_uri, fragment, location, loader, cache) do + with {:ok, document, source, updated_cache} <- + load_document(target_base_uri, loader, location, cache), + index <- build_index(document, source, target_base_uri), + resource when not is_nil(resource) <- + Map.get(index.resources, resource_key(target_base_uri)) do + with_cache(resolve_within_index(index, resource, fragment, location), updated_cache) + else + nil -> + {:error, + %Error{ + kind: :missing_target, + location: location, + target_uri: location.absolute_uri, + details: :missing_external_resource + }, cache} + + {:error, %Error{} = error, updated_cache} -> + {:error, error, updated_cache} + end + end + + defp resolve_within_index(_index, resource, nil, _location) do + {:ok, + %{ + document: resource.document, + pointer: nil, + source: resource.source, + value: resource.document + }} + end + + defp resolve_within_index(_index, resource, "/" <> _ = fragment, location) do + pointer = URIUtil.local_ref(fragment) + + case ExJSONPointer.resolve(resource.document, pointer) do + {:ok, value} -> + {:ok, + %{ + document: resource.document, + pointer: pointer, + source: resource.source, + value: value + }} + + {:error, reason} -> + {:error, + %Error{ + kind: :missing_target, + location: location, + target_uri: location.absolute_uri, + details: reason + }} + end + end + + defp resolve_within_index(index, _resource, fragment, location) do + anchor_uri = with_optional_fragment(base_of(location.absolute_uri), fragment) + + case Map.get(index.anchors, anchor_uri) do + nil -> + {:error, + %Error{ + kind: :missing_target, + location: location, + target_uri: location.absolute_uri, + details: fragment + }} + + anchor -> + {:ok, + %{ + document: anchor.document, + pointer: path_to_pointer(anchor.path), + source: anchor.source, + value: anchor.value + }} + end + end + + defp load_document(target_base_uri, loader, location, cache) do + case Map.get(cache, target_base_uri) do + %{document: document, source: source} -> + {:ok, document, source, cache} + + nil -> + case Schemas.fetch(target_base_uri) do + {:ok, document} -> + updated_cache = put_cached_document(cache, target_base_uri, document, target_base_uri) + {:ok, document, target_base_uri, updated_cache} + + :error -> + do_load_document(target_base_uri, loader, location, cache) + end + end + end + + defp do_load_document(_target_base_uri, nil, location, cache) do + {:error, + %Error{ + kind: :missing_document, + location: location, + target_uri: location.absolute_uri, + details: :loader_not_configured + }, cache} + end + + defp do_load_document(target_base_uri, loader, location, cache) when is_function(loader, 1) do + case loader.(target_base_uri) do + {:ok, %{document: document} = loaded} -> + source = Map.get(loaded, :source, target_base_uri) + updated_cache = put_cached_document(cache, target_base_uri, document, source) + {:ok, document, source, updated_cache} + + {:ok, document} when is_map(document) or is_list(document) or is_boolean(document) -> + updated_cache = put_cached_document(cache, target_base_uri, document, target_base_uri) + {:ok, document, target_base_uri, updated_cache} + + {:error, reason} -> + {:error, + %Error{ + kind: :missing_document, + location: location, + target_uri: location.absolute_uri, + details: reason + }, cache} + + other -> + {:error, + %Error{ + kind: :invalid_loader_response, + location: location, + target_uri: location.absolute_uri, + details: other + }, cache} + end + end + + defp with_cache({:ok, target}, cache), do: {:ok, target, cache} + defp with_cache({:error, %Error{} = error}, cache), do: {:error, error, cache} + + defp put_cached_document(cache, target_base_uri, document, source) do + Map.put(cache, target_base_uri, %{document: document, source: source}) + end + + defp initial_base_uri(opts, source) do + case Keyword.fetch(opts, :base_uri) do + {:ok, value} -> value + :error when is_binary(source) -> source + :error -> nil + end + end + + defp loader_from_opts(opts) do + Keyword.get(opts, :loader) || Keyword.get(opts, :external_loader) + end + + defp effective_base_uri(base_uri, map) do + case Map.get(map, "$id") do + id when is_binary(id) -> resolve_reference(base_uri, id) + _ -> base_uri + end + end + + defp resolve_reference(nil, uri), do: uri + defp resolve_reference(base, nil), do: base + + defp resolve_reference(base, uri) when is_binary(base) and is_binary(uri) do + cond do + uri == "" -> + base_of(base) + + absolute_uri?(uri) -> + uri + + String.starts_with?(uri, "#") -> + base = base_of(base) + with_optional_fragment(base, String.trim_leading(uri, "#")) + + absolute_uri?(base) -> + URIUtil.resolve(base, uri) + + true -> + resolve_path_reference(base, uri) + end + end + + defp resolve_path_reference(base, uri) do + {ref_path, fragment} = URIUtil.split_fragment(uri) + + resolved_path = + cond do + ref_path == "" -> + base_of(base) + + String.starts_with?(ref_path, "/") -> + ref_path + + true -> + base + |> base_of() + |> path_dirname() + |> join_and_normalize(ref_path) + end + + with_optional_fragment(resolved_path, fragment) + end + + defp absolute_uri?(value) when is_binary(value) do + match?(%URI{scheme: scheme} when not is_nil(scheme), URI.parse(value)) + end + + defp absolute_uri?(_), do: false + + defp base_of(value) when is_binary(value) do + value + |> URIUtil.split_fragment() + |> elem(0) + end + + defp base_of(_), do: "" + + defp fragment_of(value) when is_binary(value), do: URIUtil.fragment(value) + defp fragment_of(_), do: nil + + defp path_to_pointer([]), do: nil + + defp path_to_pointer(path) when is_list(path) do + encoded = Enum.map(path, &encode_pointer_segment/1) + "#/" <> Enum.join(encoded, "/") + end + + defp pointer_to_path(nil), do: [] + defp pointer_to_path("#"), do: [] + + defp pointer_to_path("#/" <> rest) do + rest + |> String.split("/", trim: true) + |> Enum.map(&decode_pointer_segment/1) + end + + defp pointer_to_path(_), do: [] + + defp encode_pointer_segment(segment) when is_integer(segment), do: Integer.to_string(segment) + + defp encode_pointer_segment(segment) when is_binary(segment) do + segment + |> String.replace("~", "~0") + |> String.replace("/", "~1") + end + + defp decode_pointer_segment(segment) when is_binary(segment) do + segment + |> String.replace("~1", "/") + |> String.replace("~0", "~") + end + + defp split_target(value) when is_binary(value) do + {base, fragment} = URIUtil.split_fragment(value) + {:ok, resource_key(base), fragment} + rescue + _ -> :error + end + + defp split_target(_), do: :error + + defp resource_key(nil), do: "" + defp resource_key(value) when is_binary(value), do: value + + defp with_optional_fragment(base, nil), do: resource_key(base) + defp with_optional_fragment(base, ""), do: resource_key(base) + defp with_optional_fragment(base, fragment), do: resource_key(base) <> "#" <> fragment + + defp path_dirname(path) do + case Path.dirname(path) do + "." -> "" + value -> value + end + end + + defp join_and_normalize("", path) do + path + |> Path.expand("/") + |> String.trim_leading("/") + end + + defp join_and_normalize(base, path) do + if String.starts_with?(base, "/") do + Path.expand(path, base) + else + base + |> then(&Path.expand(path, "/" <> &1)) + |> String.trim_leading("/") + end + end + + defp sort_entry({key, _value}) when is_binary(key), do: {0, key} + defp sort_entry({key, _value}) when is_integer(key), do: {1, key} + defp sort_entry({key, _value}), do: {2, inspect(key)} +end diff --git a/lib/jsonschex/scope_scanner.ex b/lib/jsonschex/scope_scanner.ex index 364581c..735c081 100644 --- a/lib/jsonschex/scope_scanner.ex +++ b/lib/jsonschex/scope_scanner.ex @@ -55,6 +55,7 @@ defmodule JSONSchex.ScopeScanner do case Map.get(schema, key) do "#" <> _ = value -> MapSet.put(acc, value) + _ -> acc end @@ -71,21 +72,37 @@ defmodule JSONSchex.ScopeScanner do case Map.get(schema, keyword) do anchor when is_binary(anchor) -> Map.put(acc, base_uri <> "#" <> anchor, schema) + _ -> acc end - end - defp recurse_keyword(key, map, base, acc) when key in ["properties", "$defs", "definitions", "patternProperties", "dependentSchemas"] and is_map(map) do + defp recurse_keyword(key, map, base, acc) + when key in ["properties", "$defs", "definitions", "patternProperties", "dependentSchemas"] and + is_map(map) do Enum.reduce(map, acc, fn {_k, sub}, inner_acc -> do_scan(sub, base, inner_acc) end) end - defp recurse_keyword(key, list, base, acc) when key in ["allOf", "anyOf", "oneOf", "prefixItems"] and is_list(list) do + defp recurse_keyword(key, list, base, acc) + when key in ["allOf", "anyOf", "oneOf", "prefixItems"] and is_list(list) do Enum.reduce(list, acc, fn sub, inner_acc -> do_scan(sub, base, inner_acc) end) end - defp recurse_keyword(key, sub, base, acc) when key in ["items", "additionalProperties", "if", "then", "else", "not", "contains", "propertyNames", "unevaluatedItems", "unevaluatedProperties"] and is_map(sub) do + defp recurse_keyword(key, sub, base, acc) + when key in [ + "items", + "additionalProperties", + "if", + "then", + "else", + "not", + "contains", + "propertyNames", + "unevaluatedItems", + "unevaluatedProperties", + "contentSchema" + ] and is_map(sub) do do_scan(sub, base, acc) end diff --git a/lib/jsonschex/validator/reference.ex b/lib/jsonschex/validator/reference.ex index 578f8cc..a6390ca 100644 --- a/lib/jsonschex/validator/reference.ex +++ b/lib/jsonschex/validator/reference.ex @@ -5,7 +5,7 @@ defmodule JSONSchex.Validator.Reference do """ alias JSONSchex.Validator - alias JSONSchex.Compiler + alias JSONSchex.{Compiler, Ref} alias JSONSchex.URIUtil alias JSONSchex.Draft202012.Schemas alias JSONSchex.Types.ErrorContext @@ -18,10 +18,16 @@ defmodule JSONSchex.Validator.Reference do def validate_dynamic_ref(data, ref_string, {path, evaluated, validation_context} = context) do anchor = URIUtil.fragment(ref_string) - static_match = resolve_scoped_ref(validation_context.source_id, ref_string, validation_context.root_schema.defs) + static_match = + resolve_scoped_ref( + validation_context.source_id, + ref_string, + validation_context.root_schema.defs + ) is_dynamic_candidate = - anchor && static_match && is_map(static_match.raw) && Map.get(static_match.raw, "$dynamicAnchor") == anchor + anchor && static_match && is_map(static_match.raw) && + Map.get(static_match.raw, "$dynamicAnchor") == anchor if is_dynamic_candidate do dynamic_match = @@ -37,6 +43,7 @@ defmodule JSONSchex.Validator.Reference do else nil end + _ -> nil end @@ -62,7 +69,11 @@ defmodule JSONSchex.Validator.Reference do def validate_ref(data, ref_string, {path, evaluated, validation_context}) do effective_context = effective_context_for_ref(validation_context, ref_string) - case resolve_scoped_ref(effective_context.source_id, ref_string, effective_context.root_schema.defs) do + case resolve_scoped_ref( + effective_context.source_id, + ref_string, + effective_context.root_schema.defs + ) do nil -> resolve_missing_ref(data, ref_string, path, effective_context, evaluated) @@ -90,21 +101,51 @@ defmodule JSONSchex.Validator.Reference do result else {:ok, base_schema, fragment} when is_map(base_schema) -> - resolve_registry_base_match(data, ref_string, path, validation_context, evaluated, base_schema, fragment) + resolve_registry_base_match( + data, + ref_string, + path, + validation_context, + evaluated, + base_schema, + fragment + ) :halt -> - resolve_and_validate_jit(data, validation_context.raw, ref_string, path, validation_context, evaluated) + resolve_and_validate_jit( + data, + validation_context.raw, + ref_string, + path, + validation_context, + evaluated + ) {:error, _} = error -> error end end - defp resolve_registry_base_match(data, _ref_string, path, validation_context, evaluated, base_schema, fragment) do + defp resolve_registry_base_match( + data, + _ref_string, + path, + validation_context, + evaluated, + base_schema, + fragment + ) do local_ref = URIUtil.local_ref(fragment) if base_schema.source_id == validation_context.source_id do - resolve_and_validate_jit(data, validation_context.raw, local_ref, path, validation_context, evaluated) + resolve_and_validate_jit( + data, + validation_context.raw, + local_ref, + path, + validation_context, + evaluated + ) else updated_context = merge_defs_into_context(validation_context, base_schema.defs) validate_ref(data, local_ref, {path, evaluated, updated_context}) @@ -135,9 +176,11 @@ defmodule JSONSchex.Validator.Reference do defp check_load_remote(external_loader, uri_to_load) when is_function(external_loader) do if URIUtil.remote_ref?(uri_to_load), do: :ok, else: :halt end + defp check_load_remote(_, _), do: :halt defp built_in_defs_for_ref(_, nil), do: nil + defp built_in_defs_for_ref(base_uri, ref) do uri = uri_to_resolve(base_uri, ref) {base, _fragment} = URIUtil.split_fragment(uri) @@ -147,6 +190,7 @@ defmodule JSONSchex.Validator.Reference do defp resolve_scoped_ref(base_uri, ref, registry) when base_uri == ref do Map.get(registry, ref) end + defp resolve_scoped_ref(base_uri, ref, registry) do uri = uri_to_resolve(base_uri, ref) Map.get(registry, uri) || Map.get(registry, ref) @@ -155,13 +199,16 @@ defmodule JSONSchex.Validator.Reference do defp uri_to_resolve(base_uri, "#" <> _ = ref) when base_uri != nil do URIUtil.with_fragment(base_uri, URIUtil.fragment(ref)) end + defp uri_to_resolve(base_uri, ref) when base_uri != nil do resolve_relative_uri(base_uri, ref) end + defp uri_to_resolve(nil, ref) when ref != nil, do: ref defp uri_to_resolve(_, _), do: nil defp resolve_relative_uri(nil, ref), do: ref + defp resolve_relative_uri(base_uri, ref_string) do cond do String.starts_with?(ref_string, base_uri <> "#/") -> @@ -169,12 +216,15 @@ defmodule JSONSchex.Validator.Reference do String.starts_with?(ref_string, base_uri <> "#") -> ref_string + true -> case URI.parse(base_uri) do %{scheme: "urn"} -> ref_string + %{scheme: scheme} when scheme != nil -> URIUtil.resolve(base_uri, ref_string) + _ -> ref_string end @@ -190,12 +240,27 @@ defmodule JSONSchex.Validator.Reference do built_in_defs when is_map(built_in_defs) -> merged_context = merge_defs_into_context(validation_context, built_in_defs) compiled_remote = Map.fetch!(merged_context.root_schema.defs, base) - validate_loaded_schema(data, compiled_remote, fragment, current_path, merged_context, evaluated) + + validate_loaded_schema( + data, + compiled_remote, + fragment, + current_path, + merged_context, + evaluated + ) nil -> case load_external_schema(uri, base, validation_context) do {:ok, compiled_remote, merged_context} -> - validate_loaded_schema(data, compiled_remote, fragment, current_path, merged_context, evaluated) + validate_loaded_schema( + data, + compiled_remote, + fragment, + current_path, + merged_context, + evaluated + ) :halt -> :halt @@ -209,12 +274,17 @@ defmodule JSONSchex.Validator.Reference do end loaded_schema -> - updated_context = %{validation_context | - source_id: loaded_schema.source_id, - raw: loaded_schema.raw + updated_context = %{ + validation_context + | source_id: loaded_schema.source_id, + raw: loaded_schema.raw } - validate_ref(data, URIUtil.local_ref(fragment), {current_path, evaluated, updated_context}) + validate_ref( + data, + URIUtil.local_ref(fragment), + {current_path, evaluated, updated_context} + ) end end @@ -237,7 +307,8 @@ defmodule JSONSchex.Validator.Reference do {:ok, compiled_remote, merged_context} {:error, error} -> - {:error, %ErrorContext{contrast: "compile_remote", input: uri, error_detail: error}} + {:error, + %ErrorContext{contrast: "compile_remote", input: uri, error_detail: error}} end other -> @@ -249,7 +320,14 @@ defmodule JSONSchex.Validator.Reference do end end - defp validate_loaded_schema(data, compiled_schema, fragment, current_path, validation_context, evaluated) do + defp validate_loaded_schema( + data, + compiled_schema, + fragment, + current_path, + validation_context, + evaluated + ) do updated_context = loaded_schema_context(validation_context, compiled_schema) if fragment != nil do @@ -267,31 +345,64 @@ defmodule JSONSchex.Validator.Reference do validation_context.scope_stack end - %{validation_context | - scope_stack: new_stack, - source_id: compiled_schema.source_id, - raw: compiled_schema.raw + %{ + validation_context + | scope_stack: new_stack, + source_id: compiled_schema.source_id, + raw: compiled_schema.raw } end - defp resolve_and_validate_jit(data, raw_root, pointer, current_path, validation_context, evaluated) do - case ExJSONPointer.resolve(raw_root, pointer) do + defp resolve_and_validate_jit( + data, + raw_root, + pointer, + current_path, + validation_context, + evaluated + ) do + case resolve_jit_fragment(raw_root, pointer, validation_context) do {:ok, found_fragment} -> opts = [ external_loader: validation_context.root_schema.external_loader, format_assertion: validation_context.root_schema.format_assertion, content_assertion: validation_context.root_schema.content_assertion ] + case Compiler.compile(found_fragment, opts) do {:ok, compiled_fragment} -> - Validator.validate_entry(compiled_fragment, data, current_path, validation_context, evaluated) + Validator.validate_entry( + compiled_fragment, + data, + current_path, + validation_context, + evaluated + ) {:error, error} -> - {:error, %ErrorContext{contrast: "invalid_schema", input: pointer, error_detail: error}} + {:error, + %ErrorContext{contrast: "invalid_schema", input: pointer, error_detail: error}} end {:error, _token} -> {:error, %ErrorContext{contrast: "ref_not_found", input: pointer}} end end + + defp resolve_jit_fragment(raw_root, "#/" <> _ = pointer, validation_context) do + case Ref.resolve(raw_root, pointer, + source: validation_context.source_id, + base_uri: validation_context.source_id + ) do + {:ok, %Ref.Resolution{target_value: found_fragment}} -> + {:ok, found_fragment} + + {:error, _reason} -> + {:error, :not_found} + end + end + + defp resolve_jit_fragment(raw_root, pointer, _validation_context) do + ExJSONPointer.resolve(raw_root, pointer) + end end diff --git a/mix.exs b/mix.exs index 14fb867..c841d32 100644 --- a/mix.exs +++ b/mix.exs @@ -30,6 +30,7 @@ defmodule JSONSchex.MixProject do "README.md", "CHANGELOG.md", "guide/loader.md", + "guide/ref.md", "guide/dialect_and_vocabulary.md", "guide/feature_matrix.md", "guide/content_and_format.md", @@ -38,6 +39,7 @@ defmodule JSONSchex.MixProject do groups_for_extras: [ Guides: [ "guide/loader.md", + "guide/ref.md", "guide/dialect_and_vocabulary.md", "guide/feature_matrix.md", "guide/content_and_format.md", @@ -47,6 +49,7 @@ defmodule JSONSchex.MixProject do groups_for_modules: [ "Public API": [ JSONSchex, + JSONSchex.Ref, JSONSchex.Schema, JSONSchex.Sigil ], @@ -84,6 +87,7 @@ defmodule JSONSchex.MixProject do ] ], nest_modules_by_prefix: [ + JSONSchex.Ref, JSONSchex.Types, JSONSchex.Compiler, JSONSchex.Validator, diff --git a/test/ref_internal_regression_test.exs b/test/ref_internal_regression_test.exs new file mode 100644 index 0000000..5fa1a7c --- /dev/null +++ b/test/ref_internal_regression_test.exs @@ -0,0 +1,77 @@ +defmodule JSONSchex.Test.RefInternalRegression do + use ExUnit.Case, async: true + + alias JSONSchex.ScopeScanner + + test "compiler still precompiles explicit local pointer refs when the root has an $id" do + schema = %{ + "$id" => "https://example.com/root.json", + "type" => "object", + "properties" => %{ + "foo" => %{"type" => "string"}, + "bar" => %{"$ref" => "#/properties/foo"} + } + } + + assert {:ok, compiled} = JSONSchex.compile(schema) + + assert Map.has_key?(compiled.defs, "#/properties/foo") + assert compiled.defs["#/properties/foo"].raw == %{"type" => "string"} + + assert :ok == JSONSchex.validate(compiled, %{"bar" => "hello"}) + assert {:error, [%{rule: :type}]} = JSONSchex.validate(compiled, %{"bar" => 123}) + end + + test "scope scanning reaches contentSchema so nested ids participate in ref resolution" do + schema = %{ + "$id" => "https://example.com/root.json", + "type" => "string", + "contentMediaType" => "application/json", + "contentSchema" => %{ + "$ref" => "inner", + "allOf" => [ + %{ + "$id" => "inner", + "type" => "integer" + } + ] + } + } + + {registry, _refs} = ScopeScanner.scan(schema) + assert Map.has_key?(registry, "https://example.com/inner") + + assert {:ok, compiled} = JSONSchex.compile(schema, content_assertion: true) + assert Map.has_key?(compiled.defs, "https://example.com/inner") + + assert :ok == JSONSchex.validate(compiled, "1") + assert {:error, errors} = JSONSchex.validate(compiled, ~S("1")) + assert Enum.any?(errors, &(&1.rule == :type)) + end + + test "validator JIT fallback still resolves local pointer fragments inside loaded schemas with a different root $id" do + loader = fn + "http://example.com/remote.json#/properties/foo" -> + {:ok, + %{ + "$id" => "http://example.com/actual/loaded.json", + "type" => "object", + "properties" => %{ + "foo" => %{"type" => "integer"} + } + }} + + _ -> + {:error, :enoent} + end + + schema = %{ + "$ref" => "http://example.com/remote.json#/properties/foo" + } + + assert {:ok, compiled} = JSONSchex.compile(schema, external_loader: loader) + + assert :ok == JSONSchex.validate(compiled, 42) + assert {:error, [%{rule: :type}]} = JSONSchex.validate(compiled, "not an integer") + end +end diff --git a/test/ref_test.exs b/test/ref_test.exs new file mode 100644 index 0000000..fe26d96 --- /dev/null +++ b/test/ref_test.exs @@ -0,0 +1,279 @@ +defmodule JSONSchex.Test.Ref do + use ExUnit.Case, async: true + + doctest JSONSchex.Ref + + alias JSONSchex.Ref + alias JSONSchex.Ref.{Cycle, Error, Resolution} + + describe "scan/2" do + test "discovers structural refs and tracks nested base URIs" do + document = %{ + "components" => %{ + "schemas" => %{ + "Base" => %{"type" => "string"}, + "Wrapper" => %{"$ref" => "#/components/schemas/Base"} + } + }, + "paths" => [ + %{ + "$id" => "schemas/user.json", + "response" => %{"$ref" => "./common.json#/$defs/error"} + } + ] + } + + occurrences = Ref.scan(document, source: "https://example.com/root.json") + + assert Enum.map(occurrences, & &1.path) == [ + ["components", "schemas", "Wrapper", "$ref"], + ["paths", 0, "response", "$ref"] + ] + + [wrapper_ref, response_ref] = occurrences + + assert wrapper_ref.source == "https://example.com/root.json" + assert wrapper_ref.base_uri == "https://example.com/root.json" + assert wrapper_ref.absolute_uri == "https://example.com/root.json#/components/schemas/Base" + assert wrapper_ref.fragment == "/components/schemas/Base" + + assert response_ref.base_uri == "https://example.com/schemas/user.json" + assert response_ref.absolute_uri == "https://example.com/schemas/common.json#/$defs/error" + assert response_ref.fragment == "/$defs/error" + end + end + + describe "walk/2" do + test "walks refs transitively across nested resources in depth-first order" do + document = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "entry" => %{ + "$id" => "schemas/entry.json", + "$ref" => "leaf.json" + }, + "leaf" => %{ + "$id" => "schemas/leaf.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + } + }, + "start" => %{"$ref" => "#/$defs/entry"} + } + + assert {:ok, events} = + Ref.walk(document, + source: "specs/root.json", + base_uri: "https://example.com/root.json" + ) + + resolutions = Enum.filter(events, &match?(%Resolution{}, &1)) + + assert Enum.map(resolutions, & &1.target_uri) == [ + "https://example.com/schemas/leaf.json", + "https://example.com/schemas/leaf.json#/$defs/name", + "https://example.com/root.json#/$defs/entry" + ] + + assert Enum.all?(events, &(not match?(%Cycle{}, &1))) + assert Enum.all?(events, &(not match?(%Error{}, &1))) + end + + test "caches external documents while still emitting each resolution event" do + document = %{ + "first" => %{"$ref" => "schemas/common.json#/$defs/node"}, + "second" => %{"$ref" => "schemas/common.json#/$defs/node"} + } + + parent = self() + + loader = fn uri -> + send(parent, {:loaded, uri}) + + case uri do + "specs/schemas/common.json" -> + {:ok, + %{ + document: %{ + "$defs" => %{ + "node" => %{"$ref" => "#/terminal"} + }, + "terminal" => %{"type" => "string"} + }, + source: uri + }} + + _ -> + {:error, :enoent} + end + end + + assert {:ok, events} = + Ref.walk(document, + source: "specs/root.json", + loader: loader + ) + + resolutions = Enum.filter(events, &match?(%Resolution{}, &1)) + + assert Enum.map(resolutions, & &1.target_uri) == [ + "specs/schemas/common.json#/$defs/node", + "specs/schemas/common.json#/terminal", + "specs/schemas/common.json#/$defs/node" + ] + + assert_received {:loaded, "specs/schemas/common.json"} + refute_received {:loaded, "specs/schemas/common.json"} + end + + test "detects cycles without infinite recursion" do + document = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "a" => %{"$ref" => "#/$defs/b"}, + "b" => %{"$ref" => "#/$defs/a"} + }, + "start" => %{"$ref" => "#/$defs/a"} + } + + assert {:ok, events} = + Ref.walk(document, + base_uri: "https://example.com/root.json" + ) + + assert 3 == Enum.count(events, &match?(%Resolution{}, &1)) + + [cycle] = Enum.filter(events, &match?(%Cycle{}, &1)) + assert cycle.target_uri == "https://example.com/root.json#/$defs/b" + + assert cycle.trail == [ + "https://example.com/root.json#/$defs/b", + "https://example.com/root.json#/$defs/a", + "https://example.com/root.json#/$defs/b" + ] + end + + test "returns mixed resolution and error events" do + document = %{ + "$defs" => %{ + "ok" => %{"type" => "string"} + }, + "bad" => %{"$ref" => "#/$defs/missing"}, + "good" => %{"$ref" => "#/$defs/ok"} + } + + assert {:ok, events} = Ref.walk(document) + + assert 1 == Enum.count(events, &match?(%Resolution{}, &1)) + assert 1 == Enum.count(events, &match?(%Error{}, &1)) + end + end + + describe "resolve/3" do + test "resolves local pointers against the current nested resource" do + document = %{ + "$id" => "https://example.com/root.json", + "container" => %{ + "$id" => "schemas/user.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + } + } + + [location] = + Ref.scan(document, + source: "specs/root.json", + base_uri: "https://example.com/root.json" + ) + + assert {:ok, resolution} = + Ref.resolve(document, location, + source: "specs/root.json", + base_uri: "https://example.com/root.json" + ) + + assert resolution.target_uri == "https://example.com/schemas/user.json#/$defs/name" + assert resolution.target_source == "specs/root.json" + assert resolution.target_pointer == "#/$defs/name" + assert resolution.target_document == document["container"] + assert resolution.target_value == %{"type" => "string"} + end + + test "resolves external relative refs through the loader using path-like sources" do + document = %{ + "components" => %{ + "User" => %{"$ref" => "schemas/common.json#/$defs/id"} + } + } + + [location] = Ref.scan(document, source: "specs/root.json") + parent = self() + + loader = fn uri -> + send(parent, {:loaded, uri}) + + case uri do + "specs/schemas/common.json" -> + {:ok, + %{ + document: %{ + "$defs" => %{ + "id" => %{"type" => "string"} + } + }, + source: uri + }} + + _ -> + {:error, :enoent} + end + end + + assert {:ok, resolution} = + Ref.resolve(document, location, + source: "specs/root.json", + loader: loader + ) + + assert_received {:loaded, "specs/schemas/common.json"} + assert resolution.target_uri == "specs/schemas/common.json#/$defs/id" + assert resolution.target_source == "specs/schemas/common.json" + assert resolution.target_pointer == "#/$defs/id" + assert resolution.target_document == %{"$defs" => %{"id" => %{"type" => "string"}}} + assert resolution.target_value == %{"type" => "string"} + end + + test "resolves bundled draft resources without a loader" do + document = %{ + "$ref" => "https://json-schema.org/draft/2020-12/meta/core#/$defs/uriString" + } + + [location] = Ref.scan(document) + + assert {:ok, resolution} = Ref.resolve(document, location) + + assert resolution.target_source == "https://json-schema.org/draft/2020-12/meta/core" + assert resolution.target_pointer == "#/$defs/uriString" + assert resolution.target_value == %{"type" => "string", "format" => "uri"} + assert is_map(resolution.target_document) + end + + test "returns structured errors for missing targets" do + document = %{ + "$defs" => %{}, + "schema" => %{"$ref" => "#/$defs/missing"} + } + + [location] = Ref.scan(document) + + assert {:error, %Error{} = error} = Ref.resolve(document, location) + assert error.kind == :missing_target + assert error.target_uri == "#/$defs/missing" + assert error.location == location + end + end +end From 71c152041522a7aa1332a7f6f6109a942c297a44 Mon Sep 17 00:00:00 2001 From: xinz Date: Thu, 14 May 2026 00:04:08 +0800 Subject: [PATCH 2/5] Clean --- lib/jsonschex/compiler.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/jsonschex/compiler.ex b/lib/jsonschex/compiler.ex index 4c479e8..94480f4 100644 --- a/lib/jsonschex/compiler.ex +++ b/lib/jsonschex/compiler.ex @@ -100,7 +100,6 @@ defmodule JSONSchex.Compiler do resolve_refs( raw_schema, MapSet.to_list(explicit_refs), - root_compiled.source_id, root_vocabs, ctx ) @@ -160,7 +159,7 @@ defmodule JSONSchex.Compiler do {:ok, current_vocabs} end - defp resolve_refs(raw_schema, refs, _base_uri, vocabs, ctx) do + defp resolve_refs(raw_schema, refs, vocabs, ctx) do refs = Enum.filter(refs, &local_pointer_ref?/1) ExJSONPointer.batch_resolve_reduce(raw_schema, refs, %{}, fn ref, result, acc -> From ef0950853515aa814bb4ab566589cde4363bf588 Mon Sep 17 00:00:00 2001 From: xinz Date: Thu, 14 May 2026 13:51:23 +0800 Subject: [PATCH 3/5] Update docs for ref source --- guide/ref.md | 19 ++++++++++++++++++- lib/jsonschex.ex | 2 +- lib/jsonschex/ref.ex | 11 +++++++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/guide/ref.md b/guide/ref.md index d12c763..f68b4bd 100644 --- a/guide/ref.md +++ b/guide/ref.md @@ -12,6 +12,21 @@ Unlike `JSONSchex.compile/2`, this API is intentionally **policy-free**. It does Use this API when you need to inspect or normalize documents **before** compilation, or when your application owns its own reference expansion policy. +## `:source` vs `:base_uri` + +These two options are related, but they are not the same: + +- `:base_uri` controls how relative references resolve +- `:source` identifies where the current document came from + +In practice, `:source` is primarily provenance metadata that is copied into returned locations, resolutions, errors, and walk events. However, when `:base_uri` is omitted and `:source` is a binary, `JSONSchex.Ref` also uses `:source` as the initial base URI. + +That means: + +- if you only care about resolution semantics, passing `:base_uri` is enough +- if you also want meaningful source metadata, pass `:source` +- if your source path or URI should also act as the reference base, you can pass only `:source` + ## Overview `JSONSchex.Ref` exposes three main entry points: @@ -28,7 +43,7 @@ Each location includes: - `:raw_ref` — original `$ref` string - `:path` — path to the `$ref` key within the scanned document -- `:source` — caller-supplied source identifier +- `:source` — caller-supplied source identifier used for provenance - `:base_uri` — effective base URI at that location, honoring nested `$id` - `:absolute_uri` — resolved target URI when it can be derived - `:fragment` — target fragment without the leading `#` @@ -75,6 +90,8 @@ You can pass either: Passing a `Location` is usually the better choice because it preserves nested `$id` scoping. +If you omit `:base_uri`, a binary `:source` also becomes the initial base URI for the root document. + ### Loader contract External documents are loaded through `:loader` or `:external_loader`. diff --git a/lib/jsonschex.ex b/lib/jsonschex.ex index 0df304c..c06557e 100644 --- a/lib/jsonschex.ex +++ b/lib/jsonschex.ex @@ -29,7 +29,7 @@ defmodule JSONSchex do For structural `$ref` discovery and traversal before compilation, see: - `JSONSchex.Ref` - - the [Structural `$ref` guide](guide/ref.md) + - the [Structural `$ref` guide](guide/ref.md), including the distinction between `:source` provenance and `:base_uri` resolution For compile-time schema embedding, see: diff --git a/lib/jsonschex/ref.ex b/lib/jsonschex/ref.ex index d2cfc35..11a6898 100644 --- a/lib/jsonschex/ref.ex +++ b/lib/jsonschex/ref.ex @@ -28,11 +28,18 @@ defmodule JSONSchex.Ref do ## Options - - `:source` — source identifier for the root document - - `:base_uri` — explicit starting base URI override + - `:source` — source identifier for the root document. This is primarily + provenance metadata for returned `%Location{}`, `%Resolution{}`, `%Error{}`, + and `%Cycle{}` values. + - `:base_uri` — explicit starting base URI override used for reference + resolution. - `:loader` — `(document_uri -> {:ok, document} | {:ok, %{document: document, source: source}} | {:error, term()})` - `:external_loader` — accepted as an alias for `:loader` + If `:base_uri` is omitted and `:source` is a binary, `:source` is also used + as the initial base URI. This is convenient when the source path or URI is + both the document identifier and the desired reference base. + When resolving a bare reference string, resolution starts from the root document context. To preserve nested `$id` scope, prefer passing a scanned `%Location{}` into `resolve/3`. From d5f2db6cf33b5601881d553d7b2871e57de5152f Mon Sep 17 00:00:00 2001 From: xinz Date: Mon, 18 May 2026 22:03:54 +0800 Subject: [PATCH 4/5] Add more APIs to JSONSchex.Ref --- CHANGELOG.md | 2 +- README.md | 4 +- guide/dialect_and_vocabulary.md | 6 +- guide/feature_matrix.md | 2 +- guide/loader.md | 14 +- guide/ref.md | 203 +++++- lib/jsonschex.ex | 2 +- lib/jsonschex/compiler.ex | 12 +- lib/jsonschex/ref.ex | 784 +++++++++++++++++++--- lib/jsonschex/schema.ex | 4 +- lib/jsonschex/types.ex | 4 +- lib/jsonschex/uri_util.ex | 35 +- lib/jsonschex/validator/reference.ex | 89 ++- mix.exs | 2 +- mix.lock | 2 +- test/debug_defs_suite_test.exs | 2 +- test/debug_dynamic_ref_suite_test.exs | 8 +- test/debug_optional_format_suite_test.exs | 4 +- test/debug_ref_suite_test.exs | 2 +- test/debug_vocabulary_suite_test.exs | 2 +- test/format_assertion_test.exs | 10 +- test/meta_schema_test.exs | 8 +- test/ref_internal_regression_test.exs | 43 +- test/ref_remote_test.exs | 12 +- test/ref_test.exs | 398 ++++++++++- test/schema_static_compile_test.exs | 2 +- test/support/suite_runner.ex | 2 +- test/vocabulary_dialect_test.exs | 2 +- 28 files changed, 1492 insertions(+), 168 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc1cc2e..c9fac2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Bug Fixes and Improvements - * Add a new public structural `$ref` API in `JSONSchex.Ref` for discovery (`scan/2`), single-step resolution (`resolve/3`), and transitive traversal with cycle reporting (`walk/2`) + * Add a new public structural `$ref` API in `JSONSchex.Ref` for discovery (`scan/2`), single-step resolution (`resolve/3`), transitive traversal with cycle reporting (`walk/2`), callback-based transformation (`transform/3`), preserved-ref rendering (`render_ref/3`), and location-keyed walk indexing (`index_walk_events/1`) * Improve internal scope scanning so `contentSchema` is traversed for nested `$id`, anchor, and local `$ref` discovery ## v0.6.0 (2026-05-09) diff --git a/README.md b/README.md index fee44cb..779adca 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ end - `f` — `format_assertion: true` - `c` — `content_assertion: true` -For compile-time embeddable options such as `:external_loader`, prefer remote +For compile-time embeddable options such as `:loader`, prefer remote captures like `&MyLoader.fetch/1` over anonymous functions. `~X` is preferred over `~J` to avoid the common sigil-name conflict with Jason. @@ -181,7 +181,7 @@ Enum.map(errors, &JSONSchex.format_error/1) `JSONSchex.compile/2` accepts an optional keyword list with the following options: -- `:external_loader` — Function for loading remote `$ref` schemas (see [Loader guide](guide/loader.md)) +- `:loader` — Function for loading remote `$ref` schemas (see [Loader guide](guide/loader.md)) - `:base_uri` — Starting base URI for resolving relative references (see [Loader guide](guide/loader.md)) - `:format_assertion` — Enable strict `format` validation (default: `false`; the built-in Draft 2020-12 dialect keeps `format` annotation-only unless explicitly enabled, see [Content and format guide](guide/content_and_format.md)) - `:content_assertion` — Enable strict content vocabulary validation (default: `false`, see [Content and format guide](guide/content_and_format.md)) diff --git a/guide/dialect_and_vocabulary.md b/guide/dialect_and_vocabulary.md index dbe7645..94dc56c 100644 --- a/guide/dialect_and_vocabulary.md +++ b/guide/dialect_and_vocabulary.md @@ -14,7 +14,7 @@ JSONSchex resolves dialect in this order: 1. If the root schema declares the canonical Draft 2020-12 meta-schema URI (`https://json-schema.org/draft/2020-12/schema`), JSONSchex treats it as a built-in dialect and does not invoke the external loader for that URI. 2. For the built-in Draft 2020-12 dialect, JSONSchex uses the standard Draft 2020-12 active vocabulary defaults, while still honoring an explicit root-level `$vocabulary` declaration when present. -3. If the root schema contains another `$schema` URI and an `external_loader` is provided, JSONSchex attempts to load that meta-schema remotely. +3. If the root schema contains another `$schema` URI and an `loader` is provided, JSONSchex attempts to load that meta-schema remotely. 4. If a custom meta-schema loads successfully, JSONSchex reads `$vocabulary` from it to build the enabled vocabulary set. 5. If no loader is available or the meta-schema cannot be loaded, JSONSchex proceeds with the implementation default capability set. @@ -150,7 +150,7 @@ schema = %{ } # Compile with the custom loader -{:ok, compiled} = JSONSchex.compile(schema, external_loader: loader) +{:ok, compiled} = JSONSchex.compile(schema, loader: loader) # The schema compiles, but 'allOf' is ignored because the applicator # vocabulary is not in the restricted meta-schema's vocabulary list @@ -192,7 +192,7 @@ JSONSchex.validate(compiled2, "not-an-email") # => {:error, [...]} ## Practical guidance - Use the canonical Draft 2020-12 `$schema` URI when you want standard behavior without requiring a remote meta-schema fetch. -- If you rely on a custom meta-schema or vocabulary, provide an `external_loader`. +- If you rely on a custom meta-schema or vocabulary, provide an `loader`. - Use `$schema` to make the dialect explicit and predictable. - Use an explicit root `$vocabulary` only when you need to override the built-in active vocabulary set for the selected dialect. - Avoid mixing keywords from unsupported vocabularies unless you also ship a loader that resolves the meta-schema. diff --git a/guide/feature_matrix.md b/guide/feature_matrix.md index fa316a3..8fca001 100644 --- a/guide/feature_matrix.md +++ b/guide/feature_matrix.md @@ -84,4 +84,4 @@ This guide summarizes JSONSchex support for Draft 2020-12 keywords and vocabular ## Notes - When assertion options are not enabled, the corresponding keywords are accepted but do not enforce validation. -- For remote meta-schema resolution, provide an `external_loader` to `JSONSchex.compile/2`. +- For remote meta-schema resolution, provide an `loader` to `JSONSchex.compile/2`. diff --git a/guide/loader.md b/guide/loader.md index 88c2f2c..55d205e 100644 --- a/guide/loader.md +++ b/guide/loader.md @@ -12,11 +12,11 @@ JSONSchex supports: - `$schema` resolution when a meta-schema needs to be fetched - `$id`-based base URI scoping for nested schemas -Remote fetching is **opt-in** via the `external_loader` option passed to `JSONSchex.compile/2`. +Remote fetching is **opt-in** via the `loader` option passed to `JSONSchex.compile/2`. ## Loader contract -Your loader is a function that receives a URI string and returns one of: +Your loader is a function that receives a resolved **document URI without the fragment** and returns one of: - `{:ok, map}` — a decoded JSON Schema map - `{:error, term}` — any error reason you want to propagate @@ -42,7 +42,7 @@ loader = fn uri -> end {:ok, compiled} = - JSONSchex.compile(schema, external_loader: loader, base_uri: "https://example.com/root.json") + JSONSchex.compile(schema, loader: loader, base_uri: "https://example.com/root.json") ``` ### HTTP-based loader example @@ -96,7 +96,7 @@ schema = %{ "$ref" => "https://json-schema.org/draft/2020-12/schema" } -{:ok, compiled} = JSONSchex.compile(schema, external_loader: &MyApp.SchemaLoader.loader/1) +{:ok, compiled} = JSONSchex.compile(schema, loader: &MyApp.SchemaLoader.loader/1) ``` **Important considerations for HTTP loaders:** @@ -111,7 +111,7 @@ schema = %{ The loader is invoked when: -1. A `$ref` points to a **remote URI** that is not already in the registry. +1. A `$ref` points to an unresolved **external** resource that is not already in the registry. This includes `http(s)` refs as well as other non-local refs such as path-like file refs. 2. A `$schema` URI must be loaded to resolve dialect and `$vocabulary` (if a loader is provided). If no loader is supplied, JSONSchex skips remote fetches and proceeds with defaults where possible. @@ -122,8 +122,8 @@ At a high level: 1. Resolve the `$ref` against the current base URI. 2. Check the local registry for a match. -3. If the ref is remote and not in the registry, call the loader. -4. Compile the remote schema and merge its registry into the root context. +3. If the ref points to an unresolved external resource, call the loader with the resolved document URI without the fragment. +4. Compile the loaded schema and merge its registry into the root context. 5. Continue validation from the referenced fragment, if any. ## :base_uri option and $id interaction diff --git a/guide/ref.md b/guide/ref.md index f68b4bd..c7c4878 100644 --- a/guide/ref.md +++ b/guide/ref.md @@ -34,6 +34,9 @@ That means: - `scan/2` — discover structural `$ref` locations - `resolve/3` — resolve one location or raw ref string - `walk/2` — traverse reachable `$ref` targets transitively +- `transform/3` — apply a callback-driven structural rewrite over discovered `$ref` locations +- `render_ref/3` — render a stable `$ref` string for a resolved target +- `index_walk_events/1` — convert ordered walk events into a location-keyed index ## `scan/2` @@ -46,7 +49,8 @@ Each location includes: - `:source` — caller-supplied source identifier used for provenance - `:base_uri` — effective base URI at that location, honoring nested `$id` - `:absolute_uri` — resolved target URI when it can be derived -- `:fragment` — target fragment without the leading `#` + +When you need the fragment portion, derive it from `location.absolute_uri` (or `location.raw_ref`) via `JSONSchex.URIUtil.fragment/1`. Example: @@ -94,7 +98,7 @@ If you omit `:base_uri`, a binary `:source` also becomes the initial base URI fo ### Loader contract -External documents are loaded through `:loader` or `:external_loader`. +External documents are loaded through `:loader`. The loader receives a **document URI without the fragment** and may return either: @@ -135,7 +139,7 @@ end loader: loader ) -resolution.target_uri +resolution.location.absolute_uri #=> "specs/schemas/common.json#/$defs/id" resolution.target_pointer @@ -199,6 +203,197 @@ Enum.map(events, & &1.__struct__) #=> [JSONSchex.Ref.Resolution, JSONSchex.Ref.Resolution, JSONSchex.Ref.Cycle, ...] ``` +## `transform/3` + +`transform/3` builds on the same traversal engine as `walk/2`, but lets you decide what to do with each discovered location. + +It accepts the same root-context options as `resolve/3` and `walk/2`: + +- `:source` +- `:base_uri` +- `:loader` + +The callback receives: + +- the `%JSONSchex.Ref.Location{}` being processed +- one of: + - `{:ok, resolution}` + - `{:cycle, resolution, cycle}` + - `{:error, error}` + +It returns one of: + +- `{:replace, term}` — replace the node containing the `$ref` +- `:keep` — keep the current node unchanged +- `{:error, term}` — abort the transform + +Nested targets are transformed before the callback runs for a successful parent location, which makes `transform/3` useful for post-order expansion. + +When a callback is triggered for a nested ref discovered inside a resolved target, the returned `%JSONSchex.Ref.Location{}` path is expressed in that resolved target's own document context. In other words, it is not prefixed by the original referring location's path. + +### Expanding non-cyclic refs + +A simple downstream expansion policy can replace every successful ref with its resolved target value: + +```elixir +policy = fn _location, outcome -> + case outcome do + {:ok, resolution} -> {:replace, resolution.target_value} + {:cycle, _resolution, _cycle} -> :keep + {:error, error} -> {:error, error} + end +end + +{:ok, expanded} = + JSONSchex.Ref.transform(document, policy, + source: "specs/root.json", + loader: loader + ) +``` + +### Preserving recursive back-edges + +For recursive schemas, a downstream policy can preserve a cycle edge while still expanding non-cyclic refs: + +```elixir +policy = fn location, outcome -> + case outcome do + {:ok, resolution} -> + {:replace, resolution.target_value} + + {:cycle, resolution, _cycle} -> + {:replace, %{"$ref" => JSONSchex.Ref.render_ref(location, resolution)}} + + {:error, error} -> + {:error, error} + end +end +``` + +That keeps `jsonschex` structural and low-level, while letting downstream code decide its own rewrite policy. + +## `render_ref/3` + +`render_ref/3` renders a stable `$ref` string for a resolved target. + +Supported modes are: + +- `:original` — reuse the original raw `$ref` spelling from the source location +- `:absolute` — render an absolute target URI +- `:prefer_local` — default; render a local fragment for same-resource targets, otherwise prefer a relative resource ref and fall back to absolute rendering + +Examples: + +- same-resource pointer target → `#/$defs/name` +- same-resource anchor target → `#name` +- same-resource root target → `#` +- cross-resource target → `schemas/common.json#/$defs/name` or an absolute URI + +This is especially useful when `transform/3` decides to preserve a cycle edge instead of expanding it. + +## `index_walk_events/1` + +`index_walk_events/1` turns the ordered output of `walk/2` into a map keyed by `location_key/1`. + +This is useful when downstream code wants fast lookup by location rather than replaying the ordered event stream. + +```elixir +{:ok, events} = JSONSchex.Ref.walk(document, source: "specs/root.json", loader: loader) +index = JSONSchex.Ref.index_walk_events(events) + +location = hd(JSONSchex.Ref.scan(document, source: "specs/root.json")) +key = JSONSchex.Ref.location_key(location) + +resolution = index.resolutions[key] +``` + +## Local files, nested `$id`, and loader consistency + +A common downstream workflow is using local file refs together with nested `$id` boundaries. + +Example: + +```elixir +root = %{ + "$id" => "specs/root.json", + "components" => %{ + "user" => %{ + "$id" => "schemas/user.json", + "schema" => %{"$ref" => "./common.json#/$defs/id"} + } + } +} + +loader = fn + "specs/schemas/common.json" -> + {:ok, + %{ + "$defs" => %{ + "id" => %{"type" => "integer"} + } + }} + + _ -> + {:error, :enoent} +end + +[location] = JSONSchex.Ref.scan(root, source: "specs/root.json") +{:ok, resolution} = JSONSchex.Ref.resolve(root, location, source: "specs/root.json", loader: loader) +``` + +In that example: + +- `scan/2` records the nested resource base as `specs/schemas/user.json` +- the relative ref `./common.json#/$defs/id` resolves to `specs/schemas/common.json#/$defs/id` +- the loader receives the **document URI without the fragment**: `specs/schemas/common.json` + +Runtime validation uses the same document-loading contract for unresolved external refs, so preserved local-file `$ref` values can participate in validation through `loader` as well. + +### Recursive local-file traversal with `walk/2` + +The same loader contract works for recursive local-file schemas too: + +```elixir +root = %{ + "$id" => "specs/root.json", + "start" => %{"$ref" => "schemas/node.json#/$defs/node"} +} + +loader = fn + "specs/schemas/node.json" -> + {:ok, + %{ + "$defs" => %{ + "node" => %{ + "type" => "object", + "properties" => %{ + "next" => %{"$ref" => "#/$defs/node"} + } + } + } + }} + + _ -> + {:error, :enoent} +end + +{:ok, events} = + JSONSchex.Ref.walk(root, + source: "specs/root.json", + loader: loader + ) + +Enum.map(events, & &1.__struct__) +#=> [JSONSchex.Ref.Resolution, JSONSchex.Ref.Resolution, JSONSchex.Ref.Cycle] +``` + +In that example: + +- the root ref resolves through the loader to `specs/schemas/node.json#/$defs/node` +- the nested `#/$defs/node` ref is resolved inside the loaded document +- `walk/2` emits a `%JSONSchex.Ref.Cycle{}` instead of recursing forever +- the external document is still loaded only once for the whole traversal + ## Structured errors `resolve/3` and `walk/2` use `%JSONSchex.Ref.Error{}` for resolution failures. @@ -210,7 +405,7 @@ Current error kinds are: - `:missing_target` - `:invalid_loader_response` -These errors preserve the originating location and target URI when available, making them useful for downstream diagnostics. +These errors preserve the originating location, which includes the resolved target URI when it can be derived via `location.absolute_uri`, making them useful for downstream diagnostics. ## Choosing between APIs diff --git a/lib/jsonschex.ex b/lib/jsonschex.ex index c06557e..ad5e7ff 100644 --- a/lib/jsonschex.ex +++ b/lib/jsonschex.ex @@ -48,7 +48,7 @@ defmodule JSONSchex do ## Options - - `:external_loader` — `(uri -> {:ok, map()} | {:error, term()})` for remote `$ref` schemas + - `:loader` — `(uri -> {:ok, map()} | {:error, term()})` for remote `$ref` schemas - `:base_uri` — Starting base URI for resolving relative references - `:format_assertion` — Enable strict `format` validation (default: `false`) - `:content_assertion` — Enable strict content vocabulary validation (default: `false`) diff --git a/lib/jsonschex/compiler.ex b/lib/jsonschex/compiler.ex index 94480f4..4cc8b5d 100644 --- a/lib/jsonschex/compiler.ex +++ b/lib/jsonschex/compiler.ex @@ -45,19 +45,19 @@ defmodule JSONSchex.Compiler do def compile(raw_schema, opts \\ []) def compile(raw_schema, opts) when is_map(raw_schema) do - external_loader = Keyword.get(opts, :external_loader) + loader = Keyword.get(opts, :loader) init_base = Keyword.get(opts, :base_uri) format_assertion = Keyword.get(opts, :format_assertion, false) content_assertion = Keyword.get(opts, :content_assertion, false) ctx = %{ - loader: external_loader, + loader: loader, format_assertion: format_assertion, content_assertion: content_assertion } with :ok <- Dialect.validate_required_vocabularies(raw_schema), - {:ok, root_vocabs} <- resolve_dialect(raw_schema, external_loader, @default_vocabs_list), + {:ok, root_vocabs} <- resolve_dialect(raw_schema, loader, @default_vocabs_list), {:ok, root_compiled} <- compile_schema_node(raw_schema, init_base, root_vocabs, ctx) do {global_scopes, explicit_refs} = ScopeScanner.scan(raw_schema) @@ -71,7 +71,7 @@ defmodule JSONSchex.Compiler do else case Dialect.validate_required_vocabularies(sub_raw) do :ok -> - case resolve_dialect(sub_raw, external_loader, root_vocabs) do + case resolve_dialect(sub_raw, loader, root_vocabs) do {:ok, sub_vocabs} -> sub_raw |> Map.delete("$id") @@ -109,7 +109,7 @@ defmodule JSONSchex.Compiler do error defs -> - {:ok, %{root_compiled | defs: defs, external_loader: external_loader}} + {:ok, %{root_compiled | defs: defs, loader: loader}} end end end @@ -227,7 +227,7 @@ defmodule JSONSchex.Compiler do defs: compiled_defs, source_id: base, raw: schema, - external_loader: ctx.loader, + loader: ctx.loader, format_assertion: ctx.format_assertion, content_assertion: ctx.content_assertion }} diff --git a/lib/jsonschex/ref.ex b/lib/jsonschex/ref.ex index 11a6898..33bb307 100644 --- a/lib/jsonschex/ref.ex +++ b/lib/jsonschex/ref.ex @@ -25,6 +25,10 @@ defmodule JSONSchex.Ref do - `resolve/3` resolves one location or raw ref string into a `%Resolution{}` - `walk/2` performs a depth-first transitive traversal and returns ordered `%Resolution{}`, `%Error{}`, and `%Cycle{}` events + - `transform/3` applies a callback-driven, policy-free structural rewrite over + discovered `$ref` locations + - `render_ref/3` renders a stable `$ref` string for a resolved target + - `index_walk_events/1` turns ordered walk events into a location-keyed index ## Options @@ -34,7 +38,6 @@ defmodule JSONSchex.Ref do - `:base_uri` — explicit starting base URI override used for reference resolution. - `:loader` — `(document_uri -> {:ok, document} | {:ok, %{document: document, source: source}} | {:error, term()})` - - `:external_loader` — accepted as an alias for `:loader` If `:base_uri` is omitted and `:source` is a binary, `:source` is also used as the initial base URI. This is convenient when the source path or URI is @@ -94,8 +97,7 @@ defmodule JSONSchex.Ref do :path, :source, :base_uri, - :absolute_uri, - :fragment + :absolute_uri ] @type t :: %__MODULE__{ @@ -103,9 +105,19 @@ defmodule JSONSchex.Ref do path: JSONSchex.Ref.path(), source: JSONSchex.Ref.source() | nil, base_uri: String.t() | nil, - absolute_uri: String.t() | nil, - fragment: String.t() | nil + absolute_uri: String.t() | nil } + + @doc """ + Returns the path to the node containing the `$ref`, excluding the `$ref` key itself. + """ + @spec node_path(t()) :: JSONSchex.Ref.path() + def node_path(%__MODULE__{path: path}) when is_list(path) do + case Enum.reverse(path) do + ["$ref" | rest] -> Enum.reverse(rest) + _ -> path + end + end end defmodule Resolution do @@ -120,7 +132,6 @@ defmodule JSONSchex.Ref do @enforce_keys [:location, :target_source, :target_document, :target_value] defstruct [ :location, - :target_uri, :target_source, :target_document, :target_value, @@ -129,7 +140,6 @@ defmodule JSONSchex.Ref do @type t :: %__MODULE__{ location: JSONSchex.Ref.Location.t(), - target_uri: String.t() | nil, target_source: JSONSchex.Ref.source() | nil, target_document: JSONSchex.Ref.document(), target_value: term(), @@ -146,7 +156,6 @@ defmodule JSONSchex.Ref do defstruct [ :kind, :location, - :target_uri, :details ] @@ -155,7 +164,6 @@ defmodule JSONSchex.Ref do @type t :: %__MODULE__{ kind: kind(), location: JSONSchex.Ref.Location.t() | nil, - target_uri: String.t() | nil, details: term() } end @@ -165,16 +173,14 @@ defmodule JSONSchex.Ref do A cycle detected while transitively walking `$ref` targets. """ - @enforce_keys [:location, :target_uri, :trail] + @enforce_keys [:location, :trail] defstruct [ :location, - :target_uri, :trail ] @type t :: %__MODULE__{ location: JSONSchex.Ref.Location.t(), - target_uri: String.t(), trail: [String.t()] } end @@ -182,6 +188,344 @@ defmodule JSONSchex.Ref do @typedoc "Ordered event emitted by `walk/2`." @type walk_event :: Resolution.t() | Error.t() | Cycle.t() + @typedoc "Stable key for indexing locations and walk events." + @type location_key :: {source(), String.t() | nil, path(), String.t() | nil} + + @typedoc "Indexed view of walk events keyed by location." + @type walk_index :: %{ + resolutions: %{optional(location_key()) => Resolution.t()}, + errors: %{optional(location_key()) => Error.t()}, + cycles: %{optional(location_key()) => Cycle.t()} + } + + @typedoc "Outcome passed to `transform/3` callbacks for a discovered location." + @type transform_outcome :: + {:ok, Resolution.t()} | {:cycle, Resolution.t(), Cycle.t()} | {:error, Error.t()} + + @typedoc "Return value expected from a `transform/3` callback." + @type transform_callback_result :: {:replace, term()} | :keep | {:error, term()} + + @typedoc "Callback used by `transform/3`." + @type transform_callback :: (Location.t(), transform_outcome() -> transform_callback_result()) + + @typedoc "Rendering mode used by `render_ref/3`." + @type render_mode :: :original | :absolute | :prefer_local + + @doc """ + Returns `true` if the given ref is a same-document local ref. + + ## Examples + + iex> JSONSchex.Ref.local_ref?("#/$defs/name") + true + + iex> JSONSchex.Ref.local_ref?("schemas/common.json#/$defs/name") + false + """ + @spec local_ref?(String.t()) :: boolean() + def local_ref?("#" <> _), do: true + def local_ref?(_), do: false + + @doc """ + Returns `true` if the given ref is external to the current document. + + ## Examples + + iex> JSONSchex.Ref.external_ref?("#/$defs/name") + false + + iex> JSONSchex.Ref.external_ref?("schemas/common.json#/$defs/name") + true + """ + @spec external_ref?(String.t()) :: boolean() + def external_ref?(ref) when is_binary(ref), do: not local_ref?(ref) + def external_ref?(_), do: false + + @doc """ + Returns a stable key for indexing a `%Location{}`. + """ + @spec location_key(Location.t()) :: location_key() + def location_key(%Location{} = location) do + {location.source, location.base_uri, location.path, location.absolute_uri} + end + + @doc """ + Indexes walk events by `location_key/1`. + + The returned map separates successful resolutions, errors, and cycles. + + ## Examples + + iex> location = %JSONSchex.Ref.Location{ + ...> raw_ref: "#/$defs/name", + ...> path: ["schema", "$ref"], + ...> source: "https://example.com/root.json", + ...> base_uri: "https://example.com/root.json", + ...> absolute_uri: "https://example.com/root.json#/$defs/name" + ...> } + iex> resolution = %JSONSchex.Ref.Resolution{ + ...> location: location, + ...> target_source: "https://example.com/root.json", + ...> target_document: %{}, + ...> target_value: %{}, + ...> target_pointer: "#/$defs/name" + ...> } + iex> index = JSONSchex.Ref.index_walk_events([resolution]) + iex> Map.has_key?(index.resolutions, JSONSchex.Ref.location_key(location)) + true + """ + @spec index_walk_events([walk_event()]) :: walk_index() + def index_walk_events(events) when is_list(events) do + Enum.reduce(events, %{resolutions: %{}, errors: %{}, cycles: %{}}, fn event, acc -> + case event do + %Resolution{location: location} = resolution -> + put_in(acc, [:resolutions, location_key(location)], resolution) + + %Error{location: location} = error when is_struct(location, Location) -> + put_in(acc, [:errors, location_key(location)], error) + + %Cycle{location: location} = cycle -> + put_in(acc, [:cycles, location_key(location)], cycle) + + _ -> + acc + end + end) + end + + @doc """ + Returns the resource URI represented by the given ref struct. + + For `%Location{}`, this is the current resource being scanned. + For `%Resolution{}`, `%Error{}`, and `%Cycle{}`, this is the target resource. + """ + @spec resource_uri(Location.t() | Resolution.t() | Error.t() | Cycle.t()) :: String.t() | nil + def resource_uri(%Location{base_uri: base_uri}) when is_binary(base_uri), + do: URIUtil.base(base_uri) + + def resource_uri(%Location{absolute_uri: absolute_uri}) when is_binary(absolute_uri), + do: URIUtil.base(absolute_uri) + + def resource_uri(%Location{}), do: nil + + def resource_uri(%Resolution{location: location, target_source: target_source}) do + cond do + match?(%Location{absolute_uri: absolute_uri} when is_binary(absolute_uri), location) -> + URIUtil.base(location.absolute_uri) + + is_binary(target_source) -> + URIUtil.base(target_source) + + true -> + nil + end + end + + def resource_uri(%Error{location: location}) do + cond do + match?(%Location{absolute_uri: absolute_uri} when is_binary(absolute_uri), location) -> + URIUtil.base(location.absolute_uri) + + match?(%Location{}, location) -> + resource_uri(location) + + true -> + nil + end + end + + def resource_uri(%Cycle{location: location, trail: trail}) do + cond do + is_list(trail) and trail != [] and is_binary(hd(trail)) -> URIUtil.base(hd(trail)) + match?(%Location{}, location) -> resource_uri(location) + true -> nil + end + end + + @doc """ + Renders a `$ref` string for the given resolved target. + + Supported modes are: + + - `:original` — keep the original raw `$ref` from the source location + - `:absolute` — render the target as an absolute resource URI plus fragment + - `:prefer_local` — render a local fragment for same-resource targets, otherwise + a relative ref when it can be computed safely, falling back to absolute + + The default mode is `:prefer_local`. + + ## Examples + + iex> location = %JSONSchex.Ref.Location{ + ...> raw_ref: "#/$defs/name", + ...> path: ["schema", "$ref"], + ...> source: "https://example.com/root.json", + ...> base_uri: "https://example.com/root.json", + ...> absolute_uri: "https://example.com/root.json#/$defs/name" + ...> } + iex> resolution = %JSONSchex.Ref.Resolution{ + ...> location: location, + ...> target_source: "https://example.com/root.json", + ...> target_document: %{"$defs" => %{"name" => %{"type" => "string"}}}, + ...> target_value: %{"type" => "string"}, + ...> target_pointer: "#/$defs/name" + ...> } + iex> JSONSchex.Ref.render_ref(location, resolution) + "#/$defs/name" + iex> JSONSchex.Ref.render_ref(location, resolution, mode: :absolute) + "https://example.com/root.json#/$defs/name" + """ + @spec render_ref(Location.t(), Resolution.t(), keyword()) :: String.t() | nil + def render_ref(%Location{} = location, %Resolution{} = resolution, opts \\ []) do + case Keyword.get(opts, :mode, :prefer_local) do + :original -> + render_original_ref(location, resolution) + + :absolute -> + render_absolute_ref(resolution) + + :prefer_local -> + render_prefer_local_ref(location, resolution) + end + end + + defp render_original_ref(%Location{raw_ref: raw_ref}, _resolution) when is_binary(raw_ref), + do: raw_ref + + defp render_original_ref(_location, %Resolution{} = resolution), + do: render_prefer_local_ref(nil, resolution) + + defp render_absolute_ref(%Resolution{location: %Location{absolute_uri: absolute_uri}}) + when is_binary(absolute_uri), + do: absolute_uri + + defp render_absolute_ref(%Resolution{} = resolution) do + target_resource = resource_uri(resolution) + fragment = preferred_fragment(resolution) + + cond do + is_binary(target_resource) and is_binary(fragment) -> + target_resource <> "#" <> fragment + + is_binary(target_resource) and is_binary(resolution.target_pointer) -> + target_resource <> resolution.target_pointer + + is_binary(target_resource) -> + target_resource + + true -> + nil + end + end + + defp render_prefer_local_ref(%Location{} = location, %Resolution{} = resolution) do + source_resource = resource_uri(location) + target_resource = resource_uri(resolution) + + cond do + is_binary(source_resource) and is_binary(target_resource) and + source_resource == target_resource -> + render_same_resource_ref(source_resource, resolution) + + is_binary(source_resource) and is_binary(target_resource) -> + render_relative_resource_ref(source_resource, target_resource, resolution) || + render_absolute_ref(resolution) + + true -> + render_absolute_ref(resolution) + end + end + + defp render_prefer_local_ref(_location, %Resolution{} = resolution), + do: render_absolute_ref(resolution) + + defp render_same_resource_ref(source_resource, %Resolution{} = resolution) do + fragment = preferred_fragment(resolution) + + cond do + is_binary(fragment) -> + "#" <> fragment + + is_binary(resolution.target_pointer) -> + resolution.target_pointer + + same_resource_root?(source_resource, resolution) -> + "#" + + true -> + nil + end + end + + defp render_relative_resource_ref(source_resource, target_resource, %Resolution{} = resolution) do + with relative_resource when is_binary(relative_resource) <- + relativize_resource_uri(source_resource, target_resource) do + case preferred_fragment(resolution) do + fragment when is_binary(fragment) -> + relative_resource <> "#" <> fragment + + _ when is_binary(resolution.target_pointer) -> + relative_resource <> resolution.target_pointer + + _ -> + relative_resource + end + else + _ -> nil + end + end + + defp preferred_fragment(%Resolution{location: %Location{absolute_uri: absolute_uri}}) + when is_binary(absolute_uri) do + URIUtil.fragment(absolute_uri) + end + + defp preferred_fragment(%Resolution{target_pointer: "#" <> fragment}) when is_binary(fragment), + do: fragment + + defp preferred_fragment(_), do: nil + + defp same_resource_root?(source_resource, %Resolution{} = resolution) do + absolute_uri = + case resolution.location do + %Location{absolute_uri: absolute_uri} when is_binary(absolute_uri) -> absolute_uri + _ -> nil + end + + resource_uri(resolution) == source_resource and + ((is_binary(absolute_uri) and is_nil(URIUtil.fragment(absolute_uri))) or + (is_nil(absolute_uri) and is_nil(resolution.target_pointer) and + resolution.target_value === resolution.target_document)) + end + + defp relativize_resource_uri(source_resource, target_resource) do + source = URI.parse(source_resource) + target = URI.parse(target_resource) + + cond do + path_like_resource?(source_resource) and path_like_resource?(target_resource) -> + Path.relative_to(target_resource, path_dirname(source_resource)) + + same_hierarchical_uri_origin?(source, target) and is_binary(source.path) and + is_binary(target.path) -> + Path.relative_to(target.path, path_dirname(source.path)) + + true -> + nil + end + end + + defp path_like_resource?(resource) when is_binary(resource) do + match?(%URI{scheme: nil}, URI.parse(resource)) + end + + defp path_like_resource?(_), do: false + + defp same_hierarchical_uri_origin?(source, target) do + source.scheme == target.scheme and source.host == target.host and source.port == target.port and + source.scheme not in [nil, "urn"] and is_binary(source.path) and is_binary(target.path) + end + @doc """ Recursively scans a document for `$ref` locations. @@ -223,9 +567,9 @@ defmodule JSONSchex.Ref do Passing a scanned `%Location{}` preserves nested `$id` scope. Passing a raw reference string resolves from the root document context derived from `opts`. - External documents are loaded through `:loader` or `:external_loader`. The - loader receives the resolved document URI without the fragment and may return - either a document directly or `%{document: document, source: source}`. + External documents are loaded through `:loader`. The loader receives the + resolved document URI without the fragment and may return either a document + directly or `%{document: document, source: source}`. ## Example @@ -318,6 +662,60 @@ defmodule JSONSchex.Ref do {:ok, Enum.reverse(state.events)} end + @doc """ + Structurally transforms a document by applying a callback to each discovered `$ref` location. + + The callback receives the location and one of: + + - `{:ok, resolution}` for a successfully transformed target + - `{:cycle, resolution, cycle}` when a target would recurse into an active trail + - `{:error, error}` when a location could not be resolved + + Returning `{:replace, term}` replaces the node containing the `$ref`. Returning + `:keep` leaves the current node in place. Returning `{:error, term}` aborts the + transform. + + Nested refs inside successfully resolved targets are transformed before the + callback runs for their successful parent location, making this API suitable + for post-order expansion policies. When that happens, the nested + `%Location{}` path is reported in the resolved target's own document context, + not as a path prefixed by the original referring location. + + This function uses the same root-context options as `walk/2` and `resolve/3`. + + ## Options + + - `:source` — source identifier for the root document. This is primarily + provenance metadata for returned `%Location{}`, `%Resolution{}`, + `%Error{}`, and `%Cycle{}` values seen by the callback. If `:base_uri` is + omitted and `:source` is a binary, `:source` is also used as the initial + base URI. + - `:base_uri` — explicit starting base URI override used for reference + resolution. + - `:loader` — `(document_uri -> {:ok, document} | {:ok, %{document: document, source: source}} | {:error, term()})` + """ + @spec transform(document(), transform_callback(), keyword()) :: {:ok, term()} | {:error, term()} + def transform(document, fun, opts \\ []) + when (is_map(document) or is_list(document) or is_boolean(document)) and is_function(fun, 2) do + source = Keyword.get(opts, :source) + base_uri = initial_base_uri(opts, source) + loader = loader_from_opts(opts) + + state = %{ + active: MapSet.new(), + cache: %{}, + transformed_targets: %{} + } + + case transform_node(document, [], document, source, base_uri, loader, fun, state, [], []) do + {:ok, transformed_document, _state} -> + {:ok, transformed_document} + + {:error, reason, _state} -> + {:error, reason} + end + end + defp resolve_location(document, %Location{} = location, opts, cache) do source = location.source || Keyword.get(opts, :source) root_base_uri = initial_base_uri(opts, source) @@ -335,6 +733,259 @@ defmodule JSONSchex.Ref do end end + defp transform_node( + value, + _path, + _resolve_document, + _source, + _base_uri, + _loader, + _fun, + state, + _trail, + _path_prefix + ) + when is_boolean(value) or is_binary(value) or is_number(value) or is_nil(value) do + {:ok, value, state} + end + + defp transform_node( + list, + path, + resolve_document, + source, + base_uri, + loader, + fun, + state, + trail, + path_prefix + ) + when is_list(list) do + Enum.reduce_while(Enum.with_index(list), {:ok, [], state}, fn {item, index}, + {:ok, acc, acc_state} -> + case transform_node( + item, + path ++ [index], + resolve_document, + source, + base_uri, + loader, + fun, + acc_state, + trail, + path_prefix + ) do + {:ok, transformed_item, next_state} -> + {:cont, {:ok, [transformed_item | acc], next_state}} + + {:error, reason, next_state} -> + {:halt, {:error, reason, next_state}} + end + end) + |> case do + {:ok, acc, next_state} -> {:ok, Enum.reverse(acc), next_state} + {:error, reason, next_state} -> {:error, reason, next_state} + end + end + + defp transform_node( + map, + path, + resolve_document, + source, + base_uri, + loader, + fun, + state, + trail, + path_prefix + ) + when is_map(map) do + effective_base_uri = effective_base_uri(base_uri, map) + + case transform_map_entries( + map, + path, + resolve_document, + source, + effective_base_uri, + loader, + fun, + state, + trail, + path_prefix + ) do + {:ok, transformed_map, next_state} -> + maybe_transform_current_location( + transformed_map, + path, + resolve_document, + source, + effective_base_uri, + loader, + fun, + next_state, + trail, + path_prefix + ) + + {:error, reason, next_state} -> + {:error, reason, next_state} + end + end + + defp transform_map_entries( + map, + path, + resolve_document, + source, + base_uri, + loader, + fun, + state, + trail, + path_prefix + ) do + map + |> Enum.sort_by(&sort_entry/1) + |> Enum.reduce_while({:ok, %{}, state}, fn {key, value}, {:ok, acc, acc_state} -> + case transform_node( + value, + path ++ [key], + resolve_document, + source, + base_uri, + loader, + fun, + acc_state, + trail, + path_prefix + ) do + {:ok, transformed_value, next_state} -> + {:cont, {:ok, Map.put(acc, key, transformed_value), next_state}} + + {:error, reason, next_state} -> + {:halt, {:error, reason, next_state}} + end + end) + end + + defp maybe_transform_current_location( + map, + path, + resolve_document, + source, + base_uri, + loader, + fun, + state, + trail, + path_prefix + ) do + case Map.get(map, "$ref") do + ref when is_binary(ref) -> + location = + normalize_location( + %Location{raw_ref: ref, path: path ++ ["$ref"], source: source, base_uri: base_uri}, + source, + base_uri + ) + |> prefix_location_path(path_prefix) + + opts = [source: source, base_uri: base_uri, loader: loader] + {result, cache} = resolve_location(resolve_document, location, opts, state.cache) + state = %{state | cache: cache} + + case result do + {:error, %Error{} = error} -> + apply_transform_callback(fun, location, {:error, error}, map, state) + + {:ok, %Resolution{} = resolution} -> + case transform_resolution(resolution, loader, fun, state, trail) do + {:ok, outcome, next_state} -> + apply_transform_callback(fun, location, outcome, map, next_state) + + {:error, reason, next_state} -> + {:error, reason, next_state} + end + end + + _ -> + {:ok, map, state} + end + end + + defp transform_resolution(%Resolution{} = resolution, loader, fun, state, trail) do + target_uri = resolution_uri(resolution) + + cond do + not walkable_document?(resolution.target_value) -> + {:ok, {:ok, resolution}, state} + + not is_binary(target_uri) -> + {:ok, {:ok, resolution}, state} + + MapSet.member?(state.active, target_uri) -> + cycle = %Cycle{ + location: resolution.location, + trail: Enum.reverse([target_uri | trail]) + } + + {:ok, {:cycle, resolution, cycle}, state} + + same_source_resource_root?(resolution) -> + {:ok, {:ok, resolution}, state} + + match?({:ok, _}, Map.fetch(state.transformed_targets, target_uri)) -> + transformed_target = Map.fetch!(state.transformed_targets, target_uri) + {:ok, {:ok, %{resolution | target_value: transformed_target}}, state} + + true -> + next_state = %{state | active: MapSet.put(state.active, target_uri)} + + case transform_node( + resolution.target_value, + [], + resolution.target_document, + resolution.target_source, + next_base_uri(resolution), + loader, + fun, + next_state, + [target_uri | trail], + path_prefix_from_resolution(resolution) + ) do + {:ok, transformed_target, next_state} -> + next_state = %{ + next_state + | active: MapSet.delete(next_state.active, target_uri), + transformed_targets: + Map.put(next_state.transformed_targets, target_uri, transformed_target) + } + + {:ok, {:ok, %{resolution | target_value: transformed_target}}, next_state} + + {:error, reason, next_state} -> + next_state = %{next_state | active: MapSet.delete(next_state.active, target_uri)} + {:error, reason, next_state} + end + end + end + + defp apply_transform_callback(fun, location, outcome, current_node, state) do + case fun.(location, outcome) do + {:replace, replacement} -> + {:ok, replacement, state} + + :keep -> + {:ok, current_node, state} + + {:error, reason} -> + {:error, reason, state} + end + end + defp walk_document( scan_document, resolve_document, @@ -367,7 +1018,7 @@ defmodule JSONSchex.Ref do end defp maybe_walk_resolution(state, %Resolution{} = resolution, loader, trail) do - target_uri = resolution.target_uri + target_uri = resolution_uri(resolution) cond do not walkable_document?(resolution.target_value) -> @@ -379,7 +1030,6 @@ defmodule JSONSchex.Ref do MapSet.member?(state.active, target_uri) -> push_event(state, %Cycle{ location: resolution.location, - target_uri: target_uri, trail: Enum.reverse([target_uri | trail]) }) @@ -430,12 +1080,8 @@ defmodule JSONSchex.Ref do %{state | seen_locations: MapSet.put(state.seen_locations, location_key(location))} end - defp location_key(%Location{} = location) do - {location.source, location.base_uri, location.path, location.absolute_uri} - end - defp path_prefix_from_resolution(%Resolution{target_pointer: target_pointer}) do - pointer_to_path(target_pointer) + decode_target_pointer_path!(target_pointer) end defp same_source_resource_root?(%Resolution{} = resolution) do @@ -443,10 +1089,12 @@ defmodule JSONSchex.Ref do resolution.target_source == resolution.location.source end - defp next_base_uri(%Resolution{target_uri: target_uri, target_source: target_source}) do + defp next_base_uri(%Resolution{target_source: target_source} = resolution) do + target_uri = resolution_uri(resolution) + cond do is_binary(target_uri) -> - base_of(target_uri) + URIUtil.base(target_uri) is_binary(target_source) -> target_source @@ -464,7 +1112,6 @@ defmodule JSONSchex.Ref do defp build_resolution(location, target) do %Resolution{ location: location, - target_uri: location.absolute_uri, target_source: target.source, target_document: target.document, target_value: target.value, @@ -472,6 +1119,12 @@ defmodule JSONSchex.Ref do } end + defp resolution_uri(%Resolution{location: %Location{absolute_uri: absolute_uri}}) + when is_binary(absolute_uri), + do: absolute_uri + + defp resolution_uri(_), do: nil + defp normalize_location(%Location{} = location, source, root_base_uri) do base_uri = location.base_uri || root_base_uri absolute_uri = location.absolute_uri || resolve_reference(base_uri, location.raw_ref) @@ -480,8 +1133,7 @@ defmodule JSONSchex.Ref do location | source: location.source || source, base_uri: base_uri, - absolute_uri: absolute_uri, - fragment: location.fragment || fragment_of(absolute_uri || location.raw_ref) + absolute_uri: absolute_uri } end @@ -511,8 +1163,7 @@ defmodule JSONSchex.Ref do path: path ++ ["$ref"], source: source, base_uri: effective_base_uri, - absolute_uri: absolute_uri, - fragment: fragment_of(absolute_uri || ref) + absolute_uri: absolute_uri } | acc ] @@ -668,7 +1319,6 @@ defmodule JSONSchex.Ref do %Error{ kind: :missing_target, location: location, - target_uri: location.absolute_uri, details: :unknown_local_resource }, cache} end @@ -686,7 +1336,6 @@ defmodule JSONSchex.Ref do %Error{ kind: :missing_target, location: location, - target_uri: location.absolute_uri, details: :missing_external_resource }, cache} @@ -723,14 +1372,13 @@ defmodule JSONSchex.Ref do %Error{ kind: :missing_target, location: location, - target_uri: location.absolute_uri, details: reason }} end end defp resolve_within_index(index, _resource, fragment, location) do - anchor_uri = with_optional_fragment(base_of(location.absolute_uri), fragment) + anchor_uri = with_optional_fragment(URIUtil.base(location.absolute_uri), fragment) case Map.get(index.anchors, anchor_uri) do nil -> @@ -738,15 +1386,19 @@ defmodule JSONSchex.Ref do %Error{ kind: :missing_target, location: location, - target_uri: location.absolute_uri, details: fragment }} anchor -> + pointer = + anchor.path + |> ExJSONPointer.encode_path(format: "uri_fragment") + |> normalize_root_target_pointer() + {:ok, %{ document: anchor.document, - pointer: path_to_pointer(anchor.path), + pointer: pointer, source: anchor.source, value: anchor.value }} @@ -775,7 +1427,6 @@ defmodule JSONSchex.Ref do %Error{ kind: :missing_document, location: location, - target_uri: location.absolute_uri, details: :loader_not_configured }, cache} end @@ -796,7 +1447,6 @@ defmodule JSONSchex.Ref do %Error{ kind: :missing_document, location: location, - target_uri: location.absolute_uri, details: reason }, cache} @@ -805,7 +1455,6 @@ defmodule JSONSchex.Ref do %Error{ kind: :invalid_loader_response, location: location, - target_uri: location.absolute_uri, details: other }, cache} end @@ -827,7 +1476,7 @@ defmodule JSONSchex.Ref do end defp loader_from_opts(opts) do - Keyword.get(opts, :loader) || Keyword.get(opts, :external_loader) + Keyword.get(opts, :loader) end defp effective_base_uri(base_uri, map) do @@ -843,13 +1492,13 @@ defmodule JSONSchex.Ref do defp resolve_reference(base, uri) when is_binary(base) and is_binary(uri) do cond do uri == "" -> - base_of(base) + URIUtil.base(base) absolute_uri?(uri) -> uri String.starts_with?(uri, "#") -> - base = base_of(base) + base = URIUtil.base(base) with_optional_fragment(base, String.trim_leading(uri, "#")) absolute_uri?(base) -> @@ -866,14 +1515,14 @@ defmodule JSONSchex.Ref do resolved_path = cond do ref_path == "" -> - base_of(base) + URIUtil.base(base) String.starts_with?(ref_path, "/") -> ref_path true -> base - |> base_of() + |> URIUtil.base() |> path_dirname() |> join_and_normalize(ref_path) end @@ -887,48 +1536,21 @@ defmodule JSONSchex.Ref do defp absolute_uri?(_), do: false - defp base_of(value) when is_binary(value) do - value - |> URIUtil.split_fragment() - |> elem(0) - end - - defp base_of(_), do: "" - - defp fragment_of(value) when is_binary(value), do: URIUtil.fragment(value) - defp fragment_of(_), do: nil - - defp path_to_pointer([]), do: nil + defp decode_target_pointer_path!(nil), do: [] - defp path_to_pointer(path) when is_list(path) do - encoded = Enum.map(path, &encode_pointer_segment/1) - "#/" <> Enum.join(encoded, "/") - end - - defp pointer_to_path(nil), do: [] - defp pointer_to_path("#"), do: [] - - defp pointer_to_path("#/" <> rest) do - rest - |> String.split("/", trim: true) - |> Enum.map(&decode_pointer_segment/1) - end - - defp pointer_to_path(_), do: [] + defp decode_target_pointer_path!(target_pointer) when is_binary(target_pointer) do + case ExJSONPointer.decode_path(target_pointer) do + {:ok, path} -> + path - defp encode_pointer_segment(segment) when is_integer(segment), do: Integer.to_string(segment) - - defp encode_pointer_segment(segment) when is_binary(segment) do - segment - |> String.replace("~", "~0") - |> String.replace("/", "~1") + {:error, reason} -> + raise RuntimeError, + "invalid internal target_pointer #{inspect(target_pointer)}: #{inspect(reason)}" + end end - defp decode_pointer_segment(segment) when is_binary(segment) do - segment - |> String.replace("~1", "/") - |> String.replace("~0", "~") - end + defp normalize_root_target_pointer("#"), do: nil + defp normalize_root_target_pointer(pointer), do: pointer defp split_target(value) when is_binary(value) do {base, fragment} = URIUtil.split_fragment(value) diff --git a/lib/jsonschex/schema.ex b/lib/jsonschex/schema.ex index 70660ce..cbd6bdd 100644 --- a/lib/jsonschex/schema.ex +++ b/lib/jsonschex/schema.ex @@ -29,7 +29,7 @@ defmodule JSONSchex.Schema do `JSONSchex.Types.Schema` directly into the caller module. The schema argument must be a compile-time literal map or boolean. Options - must also be compile-time literals. If you pass `:external_loader`, prefer a + must also be compile-time literals. If you pass `:loader`, prefer a remote capture such as `&MyLoader.fetch/1` so the compiled schema remains embeddable. @@ -37,7 +37,7 @@ defmodule JSONSchex.Schema do The available options are the same as `JSONSchex.compile/2`: - - `:external_loader` — `(uri -> {:ok, map()} | {:error, term()})` for remote `$ref` schemas + - `:loader` — `(uri -> {:ok, map()} | {:error, term()})` for remote `$ref` schemas - `:base_uri` — Starting base URI for resolving relative references - `:format_assertion` — Enable strict `format` validation (default: `false`) - `:content_assertion` — Enable strict content vocabulary validation (default: `false`) diff --git a/lib/jsonschex/types.ex b/lib/jsonschex/types.ex index 2ca6969..266bffc 100644 --- a/lib/jsonschex/types.ex +++ b/lib/jsonschex/types.ex @@ -14,7 +14,7 @@ defmodule JSONSchex.Types do # Used for error reporting context source_id: String.t() | nil, raw: map() | nil, - external_loader: (String.t() -> {:ok, map()} | {:error, term()}) | nil, + loader: (String.t() -> {:ok, map()} | {:error, term()}) | nil, format_assertion: boolean(), content_assertion: boolean() } @@ -23,7 +23,7 @@ defmodule JSONSchex.Types do :defs, :source_id, :raw, - :external_loader, + :loader, format_assertion: false, content_assertion: false ] diff --git a/lib/jsonschex/uri_util.ex b/lib/jsonschex/uri_util.ex index e91180a..a8ef0fe 100644 --- a/lib/jsonschex/uri_util.ex +++ b/lib/jsonschex/uri_util.ex @@ -25,6 +25,7 @@ defmodule JSONSchex.URIUtil do @spec resolve(String.t() | nil, String.t() | nil) :: String.t() | nil def resolve(nil, uri), do: uri def resolve(base, nil), do: base + def resolve(base, uri) do try do base |> URI.merge(uri) |> URI.to_string() @@ -62,6 +63,26 @@ defmodule JSONSchex.URIUtil do end end + @doc """ + Returns the base portion of a URI or reference, without any fragment. + + ## Examples + + iex> JSONSchex.URIUtil.base("https://example.com/schema#meta") + "https://example.com/schema" + + iex> JSONSchex.URIUtil.base("#/$defs/foo") + "" + + iex> JSONSchex.URIUtil.base("https://example.com/schema") + "https://example.com/schema" + """ + @spec base(String.t()) :: String.t() + def base(uri) when is_binary(uri) do + {base, _fragment} = split_fragment(uri) + base + end + @doc """ Returns only the fragment portion of a URI or reference, without the leading `#`. @@ -101,8 +122,11 @@ defmodule JSONSchex.URIUtil do """ @spec with_fragment(String.t(), String.t() | nil) :: String.t() def with_fragment(base, nil) when is_binary(base), do: base - def with_fragment(base, fragment) when is_binary(base) and is_binary(fragment), do: base <> "#" <> fragment - #def with_fragment(base, fragment) when is_binary(base) and is_binary(fragment), do: base <> fragment + + def with_fragment(base, fragment) when is_binary(base) and is_binary(fragment), + do: base <> "#" <> fragment + + # def with_fragment(base, fragment) when is_binary(base) and is_binary(fragment), do: base <> fragment @doc """ Converts a fragment into a local reference string. @@ -145,11 +169,14 @@ defmodule JSONSchex.URIUtil do @spec remote_ref?(String.t()) :: boolean() # Optimization: Use binary pattern matching with case-insensitive check def remote_ref?(<>) - when (h == ?h or h == ?H) and (t == ?t or t == ?T) and (t2 == ?t or t2 == ?T) and (p == ?p or p == ?P), + when (h == ?h or h == ?H) and (t == ?t or t == ?T) and (t2 == ?t or t2 == ?T) and + (p == ?p or p == ?P), do: true + def remote_ref?(<>) when (h == ?h or h == ?H) and (t == ?t or t == ?T) and (t2 == ?t or t2 == ?T) and - (p == ?p or p == ?P) and (s == ?s or s == ?S), + (p == ?p or p == ?P) and (s == ?s or s == ?S), do: true + def remote_ref?(_), do: false end diff --git a/lib/jsonschex/validator/reference.ex b/lib/jsonschex/validator/reference.ex index a6390ca..ca788af 100644 --- a/lib/jsonschex/validator/reference.ex +++ b/lib/jsonschex/validator/reference.ex @@ -1,7 +1,7 @@ defmodule JSONSchex.Validator.Reference do @moduledoc """ - Resolves `$ref` and `$dynamicRef` during validation, including remote schema - loading and JIT compilation of JSON Pointer references. + Resolves `$ref` and `$dynamicRef` during validation, including external + document loading and JIT compilation of JSON Pointer references. """ alias JSONSchex.Validator @@ -62,9 +62,13 @@ defmodule JSONSchex.Validator.Reference do @doc """ Resolves and validates a static `$ref`. - Looks up the target schema in the compiled `defs` registry. If not found locally - and the reference points to a remote URI, attempts to load it using the `external_loader`. - Supports JSON Pointer references, anchor references, and absolute/relative URIs. + Looks up the target schema in the compiled `defs` registry. If not found + locally and the reference is external, attempts to load it using the + `loader`. + + The loader receives the resolved document URI without the fragment. Supports + JSON Pointer references, anchor references, and absolute/relative URIs, + including path-like local file refs. """ def validate_ref(data, ref_string, {path, evaluated, validation_context}) do effective_context = effective_context_for_ref(validation_context, ref_string) @@ -96,7 +100,12 @@ defmodule JSONSchex.Validator.Reference do uri_to_load = resolve_relative_uri(validation_context.source_id, ref_string) with nil <- check_registry_for_base(uri_to_load, validation_context.root_schema.defs), - :ok <- check_load_remote(validation_context.root_schema.external_loader, uri_to_load), + :ok <- + check_load_remote( + validation_context.root_schema.loader, + ref_string, + uri_to_load + ), result <- load_remote_schema(data, uri_to_load, path, validation_context, evaluated) do result else @@ -173,11 +182,12 @@ defmodule JSONSchex.Validator.Reference do end end - defp check_load_remote(external_loader, uri_to_load) when is_function(external_loader) do - if URIUtil.remote_ref?(uri_to_load), do: :ok, else: :halt + defp check_load_remote(loader, ref_string, _uri_to_load) + when is_function(loader) do + if Ref.external_ref?(ref_string), do: :ok, else: :halt end - defp check_load_remote(_, _), do: :halt + defp check_load_remote(_, _, _), do: :halt defp built_in_defs_for_ref(_, nil), do: nil @@ -217,6 +227,9 @@ defmodule JSONSchex.Validator.Reference do String.starts_with?(ref_string, base_uri <> "#") -> ref_string + Ref.local_ref?(ref_string) -> + URIUtil.with_fragment(base_uri, URIUtil.fragment(ref_string)) + true -> case URI.parse(base_uri) do %{scheme: "urn"} -> @@ -226,11 +239,36 @@ defmodule JSONSchex.Validator.Reference do URIUtil.resolve(base_uri, ref_string) _ -> - ref_string + resolve_path_like_uri(base_uri, ref_string) end end end + defp resolve_path_like_uri(base_uri, ref_string) do + {ref_path, fragment} = URIUtil.split_fragment(ref_string) + + resolved_path = + cond do + ref_path == "" -> + URIUtil.base(base_uri) + + String.starts_with?(ref_path, "/") -> + ref_path + + true -> + base_uri + |> URIUtil.base() + |> path_dirname() + |> join_and_normalize(ref_path) + end + + if fragment in [nil, ""] do + resolved_path + else + resolved_path <> "#" <> fragment + end + end + defp load_remote_schema(data, uri, current_path, validation_context, evaluated) do {base, fragment} = URIUtil.split_fragment(uri) @@ -289,12 +327,12 @@ defmodule JSONSchex.Validator.Reference do end defp load_external_schema(uri, base, validation_context) do - case validation_context.root_schema.external_loader do + case validation_context.root_schema.loader do loader when is_function(loader) -> - case loader.(uri) do + case loader.(base) do {:ok, remote_raw_map} -> opts = [ - external_loader: validation_context.root_schema.external_loader, + loader: validation_context.root_schema.loader, base_uri: base, format_assertion: validation_context.root_schema.format_assertion, content_assertion: validation_context.root_schema.content_assertion @@ -320,6 +358,29 @@ defmodule JSONSchex.Validator.Reference do end end + defp path_dirname(path) do + case Path.dirname(path) do + "." -> "" + value -> value + end + end + + defp join_and_normalize("", path) do + path + |> Path.expand("/") + |> String.trim_leading("/") + end + + defp join_and_normalize(base, path) do + if String.starts_with?(base, "/") do + Path.expand(path, base) + else + base + |> then(&Path.expand(path, "/" <> &1)) + |> String.trim_leading("/") + end + end + defp validate_loaded_schema( data, compiled_schema, @@ -364,7 +425,7 @@ defmodule JSONSchex.Validator.Reference do case resolve_jit_fragment(raw_root, pointer, validation_context) do {:ok, found_fragment} -> opts = [ - external_loader: validation_context.root_schema.external_loader, + loader: validation_context.root_schema.loader, format_assertion: validation_context.root_schema.format_assertion, content_assertion: validation_context.root_schema.content_assertion ] diff --git a/mix.exs b/mix.exs index c841d32..dcdf8b0 100644 --- a/mix.exs +++ b/mix.exs @@ -117,7 +117,7 @@ defmodule JSONSchex.MixProject do # Run "mix help deps" to learn about dependencies. defp deps do [ - {:ex_json_pointer, "~> 0.6"}, + {:ex_json_pointer, "~> 0.7"}, {:jason, "~> 1.4", optional: true}, {:decimal, "~> 1.0 or ~> 2.0 or ~> 3.0", optional: true}, {:idna, "~> 6.0 or ~> 7.1", optional: true}, diff --git a/mix.lock b/mix.lock index cd8d76b..5dd4e03 100644 --- a/mix.lock +++ b/mix.lock @@ -4,7 +4,7 @@ "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, "ex_doc": {:hex, :ex_doc, "0.40.2", "f50edec428c4b0a457a167de42414c461122a3585a99515a69d09fff19e5597e", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "4fa426e2beb47854a162e2c488727fdec51cd4692e319b23810c2804cb1a40fe"}, - "ex_json_pointer": {:hex, :ex_json_pointer, "0.6.1", "ca6ddf7be8deba681ad406862b52799465c936df724ec16627fe7b26085d4725", [:mix], [], "hexpm", "313a3788314abeeddd6e1aba242d802c57772e11bcec3d19f2e7f96c62d8ddb1"}, + "ex_json_pointer": {:hex, :ex_json_pointer, "0.7.0", "0824178215f4d65d9250bdd8c79e03ed44cb20d7f4a89ccd6d2f82a400d0ffc6", [:mix], [], "hexpm", "b1924d38461112268c6adaa6a9a9e6f489ab64113d06a61733cd040143294ed1"}, "idna": {:hex, :idna, "7.1.0", "1067a13043538129602d2f2ce6899d8713125c7d19734aa557ce2e3ea55bd4f1", [:rebar3], [], "hexpm", "6ae959a025bf36df61a8cab8508d9654891b5426a84c44d82deaffd6ddf8c71f"}, "jason": {:hex, :jason, "1.4.5", "2e3a008590b0b8d7388c20293e9dcc9cf3e5d642fd2a114e4cbbb52e595d940a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "b0c823996102bcd0239b3c2444eb00409b72f6a140c1950bc8b457d836b30684"}, "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, diff --git a/test/debug_defs_suite_test.exs b/test/debug_defs_suite_test.exs index 8059e7c..47b888b 100644 --- a/test/debug_defs_suite_test.exs +++ b/test/debug_defs_suite_test.exs @@ -7,7 +7,7 @@ defmodule JSONSchex.Test.Defs do "$ref" => "https://json-schema.org/draft/2020-12/schema" } - assert {:ok, c} = JSONSchex.compile(schema, external_loader: &JSONSchex.Test.SuiteLoader.load/1) + assert {:ok, c} = JSONSchex.compile(schema, loader: &JSONSchex.Test.SuiteLoader.load/1) assert {:error, _errors} = JSONSchex.validate(c, %{"$defs" => %{"foo" => %{"type" => 1}}}) assert :ok == JSONSchex.validate(c, %{"$defs" => %{"foo" => %{"type" => "integer"}}}) end diff --git a/test/debug_dynamic_ref_suite_test.exs b/test/debug_dynamic_ref_suite_test.exs index 2e37acc..3897e50 100644 --- a/test/debug_dynamic_ref_suite_test.exs +++ b/test/debug_dynamic_ref_suite_test.exs @@ -31,7 +31,7 @@ defmodule JSONSchex.Test.DebugDynamicRefSuite do # } # """ # {:ok, a} = JSON.decode(schema) - # {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1]) + # {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1]) # assert {:error, _} = JSONSchex.validate(c, %{"a" => true}) # assert {:error, _} = JSONSchex.validate(c, %{"elements" => [%{"b" => 1}]}) # assert :ok == JSONSchex.validate(c, %{"elements" => [%{"a" => 1}]}) @@ -68,7 +68,7 @@ defmodule JSONSchex.Test.DebugDynamicRefSuite do # } # """ # {:ok, a} = JSON.decode(schema) - # {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1]) + # {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1]) # assert :ok = JSONSchex.validate(c, ["foo", "1"]) # assert {:error, _} = JSONSchex.validate(c, ["foo", 1]) #end @@ -115,7 +115,7 @@ defmodule JSONSchex.Test.DebugDynamicRefSuite do # } # """ # {:ok, a} = JSON.decode(schema) - # {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1]) + # {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1]) # assert {:error, _} = JSONSchex.validate(c, "a string") # assert {:error, _} = JSONSchex.validate(c, 42) # assert :ok == JSONSchex.validate(c, nil) @@ -168,7 +168,7 @@ defmodule JSONSchex.Test.DebugDynamicRefSuite do } """ {:ok, a} = JSON.decode(schema) - {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1]) + {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1]) assert {:error, _} = JSONSchex.validate(c, "a string") assert {:error, _} = JSONSchex.validate(c, "hey") assert :ok == JSONSchex.validate(c, "hi") diff --git a/test/debug_optional_format_suite_test.exs b/test/debug_optional_format_suite_test.exs index 6dcf1d8..2ae4ecf 100644 --- a/test/debug_optional_format_suite_test.exs +++ b/test/debug_optional_format_suite_test.exs @@ -30,7 +30,7 @@ defmodule JSONSchex.Test.DebugOptionalFormatSuite do # } # """ # {:ok, a} = JSON.decode(schema) - # {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1, format_assertion: true]) + # {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1, format_assertion: true]) # assert {:error, _} = JSONSchex.validate(c, "2020-04-31") # assert :ok == JSONSchex.validate(c, "2020-04-30") #end @@ -43,7 +43,7 @@ defmodule JSONSchex.Test.DebugOptionalFormatSuite do # } # """ # {:ok, a} = JSON.decode(schema) - # {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1, format_assertion: true]) + # {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1, format_assertion: true]) # assert :ok == JSONSchex.validate(c, "실례@실례.테스트") #end end diff --git a/test/debug_ref_suite_test.exs b/test/debug_ref_suite_test.exs index 1e2156b..5a08665 100644 --- a/test/debug_ref_suite_test.exs +++ b/test/debug_ref_suite_test.exs @@ -186,7 +186,7 @@ defmodule JSONSchex.Test.DebugRefSuite do # } # """ # {:ok, a} = JSON.decode(schema) - # {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1]) + # {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1]) ## {:ok, c} = JSONSchex.compile(a) # IO.puts "final compiled: #{inspect(c, pretty: true)}" # IO.puts "***start validate***" diff --git a/test/debug_vocabulary_suite_test.exs b/test/debug_vocabulary_suite_test.exs index b7afd2f..c6ca42f 100644 --- a/test/debug_vocabulary_suite_test.exs +++ b/test/debug_vocabulary_suite_test.exs @@ -20,7 +20,7 @@ defmodule JSONSchex.Test.DebugVocabularySuite do } """ {:ok, a} = JSON.decode(schema) - {:ok, c} = JSONSchex.compile(a, [external_loader: &JSONSchex.Test.SuiteLoader.load/1]) + {:ok, c} = JSONSchex.compile(a, [loader: &JSONSchex.Test.SuiteLoader.load/1]) assert {:error, _} = JSONSchex.validate(c, %{"badProperty" => "this property should not exist"}) assert :ok == JSONSchex.validate(c, %{"numberProperty" => 1}) assert :ok == JSONSchex.validate(c, %{"numberProperty" => 20}) diff --git a/test/format_assertion_test.exs b/test/format_assertion_test.exs index c129ac4..dfcfdf0 100644 --- a/test/format_assertion_test.exs +++ b/test/format_assertion_test.exs @@ -8,7 +8,7 @@ defmodule JSONSchex.Test.FormatAssertionTest do } # Default behavior: format is annotation only - {:ok, compiled} = JSONSchex.compile(schema, external_loader: &JSONSchex.Test.SuiteLoader.load/1) + {:ok, compiled} = JSONSchex.compile(schema, loader: &JSONSchex.Test.SuiteLoader.load/1) assert JSONSchex.validate(compiled, "not-an-email") == :ok assert JSONSchex.validate(compiled, "test@example.com") == :ok end @@ -20,7 +20,7 @@ defmodule JSONSchex.Test.FormatAssertionTest do } # Forced assertion - {:ok, compiled} = JSONSchex.compile(schema, format_assertion: true, external_loader: &JSONSchex.Test.SuiteLoader.load/1) + {:ok, compiled} = JSONSchex.compile(schema, format_assertion: true, loader: &JSONSchex.Test.SuiteLoader.load/1) assert {:error, errors} = JSONSchex.validate(compiled, "not-an-email") assert Enum.any?(errors, fn e -> e.rule == :format end) @@ -61,7 +61,7 @@ defmodule JSONSchex.Test.FormatAssertionTest do "format" => "email" } - {:ok, compiled} = JSONSchex.compile(schema, external_loader: loader) + {:ok, compiled} = JSONSchex.compile(schema, loader: loader) assert {:error, errors} = JSONSchex.validate(compiled, "not-an-email") assert Enum.any?(errors, fn e -> e.rule == :format end) @@ -84,11 +84,11 @@ defmodule JSONSchex.Test.FormatAssertionTest do } # Without assertion option (should pass invalid email) - {:ok, compiled_default} = JSONSchex.compile(main_schema, external_loader: loader) + {:ok, compiled_default} = JSONSchex.compile(main_schema, loader: loader) assert JSONSchex.validate(compiled_default, "not-an-email") == :ok # With assertion option (should fail invalid email) - {:ok, compiled_forced} = JSONSchex.compile(main_schema, external_loader: loader, format_assertion: true) + {:ok, compiled_forced} = JSONSchex.compile(main_schema, loader: loader, format_assertion: true) assert {:error, errors} = JSONSchex.validate(compiled_forced, "not-an-email") assert Enum.any?(errors, fn e -> e.rule == :format end) end diff --git a/test/meta_schema_test.exs b/test/meta_schema_test.exs index 0204149..e93318c 100644 --- a/test/meta_schema_test.exs +++ b/test/meta_schema_test.exs @@ -42,7 +42,7 @@ defmodule JSONSchex.Test.MetaSchema do raise "#{uri} is unknown" end - {:ok, compiled} = JSONSchex.compile(schema, external_loader: loader) + {:ok, compiled} = JSONSchex.compile(schema, loader: loader) data = %{"foo" => "bar", "baz" => "bax"} assert JSONSchex.validate(compiled, data) == :ok @@ -55,7 +55,7 @@ defmodule JSONSchex.Test.MetaSchema do } assert_raise RuntimeError, ~r/#{invalid_schema} is unknown/, fn -> - JSONSchex.compile(schema, external_loader: loader) + JSONSchex.compile(schema, loader: loader) end end @@ -76,7 +76,7 @@ defmodule JSONSchex.Test.MetaSchema do raise "#{uri} is unknown" end - assert {:ok, compiled} = JSONSchex.compile(schema, external_loader: loader) + assert {:ok, compiled} = JSONSchex.compile(schema, loader: loader) assert JSONSchex.validate(compiled, "not-an-email") == :ok end @@ -94,7 +94,7 @@ defmodule JSONSchex.Test.MetaSchema do raise "#{uri} is unknown" end - assert {:error, error} = JSONSchex.compile(schema, external_loader: loader) + assert {:error, error} = JSONSchex.compile(schema, loader: loader) assert error.rule == :unsupported_vocabulary assert error.path == ["$vocabulary", "https://example.com/custom-vocab"] assert error.value == true diff --git a/test/ref_internal_regression_test.exs b/test/ref_internal_regression_test.exs index 5fa1a7c..313b9c3 100644 --- a/test/ref_internal_regression_test.exs +++ b/test/ref_internal_regression_test.exs @@ -51,7 +51,7 @@ defmodule JSONSchex.Test.RefInternalRegression do test "validator JIT fallback still resolves local pointer fragments inside loaded schemas with a different root $id" do loader = fn - "http://example.com/remote.json#/properties/foo" -> + "http://example.com/remote.json" -> {:ok, %{ "$id" => "http://example.com/actual/loaded.json", @@ -69,9 +69,48 @@ defmodule JSONSchex.Test.RefInternalRegression do "$ref" => "http://example.com/remote.json#/properties/foo" } - assert {:ok, compiled} = JSONSchex.compile(schema, external_loader: loader) + assert {:ok, compiled} = JSONSchex.compile(schema, loader: loader) assert :ok == JSONSchex.validate(compiled, 42) assert {:error, [%{rule: :type}]} = JSONSchex.validate(compiled, "not an integer") end + + test "runtime validation loads path-like external refs through loader using document URI without fragment" do + parent = self() + + loader = fn + "specs/schemas/common.json" -> + send(parent, {:loaded, "specs/schemas/common.json"}) + + {:ok, + %{ + "$defs" => %{ + "id" => %{"type" => "integer"} + } + }} + + other -> + send(parent, {:unexpected_load, other}) + {:error, :enoent} + end + + schema = %{ + "type" => "object", + "properties" => %{ + "id" => %{"$ref" => "schemas/common.json#/$defs/id"} + } + } + + assert {:ok, compiled} = + JSONSchex.compile(schema, + base_uri: "specs/root.json", + loader: loader + ) + + assert :ok == JSONSchex.validate(compiled, %{"id" => 1}) + assert {:error, [%{rule: :type}]} = JSONSchex.validate(compiled, %{"id" => "1"}) + + assert_received {:loaded, "specs/schemas/common.json"} + refute_received {:unexpected_load, _} + end end diff --git a/test/ref_remote_test.exs b/test/ref_remote_test.exs index 481b8ae..d69992f 100644 --- a/test/ref_remote_test.exs +++ b/test/ref_remote_test.exs @@ -7,7 +7,7 @@ defmodule JSONSchex.Test.RefRemote do "$ref" => "http://localhost:1234/different-id-ref-string.json" } - assert {:ok, c} = JSONSchex.compile(schema, external_loader: &JSONSchex.Test.SuiteLoader.load/1) + assert {:ok, c} = JSONSchex.compile(schema, loader: &JSONSchex.Test.SuiteLoader.load/1) assert {:error, _e} = JSONSchex.validate(c, 1) end @@ -20,7 +20,7 @@ defmodule JSONSchex.Test.RefRemote do "name" => %{"$ref" => "name-defs.json#/$defs/orNull"} } } - assert {:ok, c} = JSONSchex.compile(schema, external_loader: &JSONSchex.Test.SuiteLoader.load/1) + assert {:ok, c} = JSONSchex.compile(schema, loader: &JSONSchex.Test.SuiteLoader.load/1) assert :ok == JSONSchex.validate(c, %{"name" => "foo"}) assert :ok == JSONSchex.validate(c, %{"name" => nil}) assert {:error, _} = JSONSchex.validate(c, 1) @@ -49,7 +49,7 @@ defmodule JSONSchex.Test.RefRemote do } } } - assert {:ok, c} = JSONSchex.compile(schema, external_loader: &JSONSchex.Test.SuiteLoader.load/1) + assert {:ok, c} = JSONSchex.compile(schema, loader: &JSONSchex.Test.SuiteLoader.load/1) assert :ok == JSONSchex.validate(c, %{"list" => [1]}) assert {:error, [e]} = JSONSchex.validate(c, %{"list" => ["1"]}) assert e.rule == :type @@ -63,7 +63,7 @@ defmodule JSONSchex.Test.RefRemote do "name" => %{"$ref" => "nested/foo-ref-string.json"} } } - assert {:ok, c} = JSONSchex.compile(schema, external_loader: &JSONSchex.Test.SuiteLoader.load/1) + assert {:ok, c} = JSONSchex.compile(schema, loader: &JSONSchex.Test.SuiteLoader.load/1) assert :ok == JSONSchex.validate(c, %{"name" => %{"foo" => "a"}}) assert {:error, _e} = JSONSchex.validate(c, %{"name" => %{"foo" => 1}}) end @@ -104,7 +104,7 @@ defmodule JSONSchex.Test.RefRemote do "$ref" => "http://example.com/remote.json#/properties/foo" } - assert {:ok, compiled} = JSONSchex.compile(schema, external_loader: &dummy_loader/1) + assert {:ok, compiled} = JSONSchex.compile(schema, loader: &dummy_loader/1) assert :ok == JSONSchex.validate(compiled, 42) @@ -116,7 +116,7 @@ defmodule JSONSchex.Test.RefRemote do "$ref" => "http://example.com/remote.json#/properties/does_not_exist" } - assert {:ok, compiled} = JSONSchex.compile(schema, external_loader: &dummy_loader/1) + assert {:ok, compiled} = JSONSchex.compile(schema, loader: &dummy_loader/1) assert {:error, [%{rule: :ref, context: %{contrast: "ref_not_found"}}]} = JSONSchex.validate(compiled, 42) diff --git a/test/ref_test.exs b/test/ref_test.exs index fe26d96..60448df 100644 --- a/test/ref_test.exs +++ b/test/ref_test.exs @@ -4,7 +4,7 @@ defmodule JSONSchex.Test.Ref do doctest JSONSchex.Ref alias JSONSchex.Ref - alias JSONSchex.Ref.{Cycle, Error, Resolution} + alias JSONSchex.Ref.{Cycle, Error, Location, Resolution} describe "scan/2" do test "discovers structural refs and tracks nested base URIs" do @@ -35,11 +35,389 @@ defmodule JSONSchex.Test.Ref do assert wrapper_ref.source == "https://example.com/root.json" assert wrapper_ref.base_uri == "https://example.com/root.json" assert wrapper_ref.absolute_uri == "https://example.com/root.json#/components/schemas/Base" - assert wrapper_ref.fragment == "/components/schemas/Base" + assert JSONSchex.URIUtil.fragment(wrapper_ref.absolute_uri) == "/components/schemas/Base" assert response_ref.base_uri == "https://example.com/schemas/user.json" assert response_ref.absolute_uri == "https://example.com/schemas/common.json#/$defs/error" - assert response_ref.fragment == "/$defs/error" + assert JSONSchex.URIUtil.fragment(response_ref.absolute_uri) == "/$defs/error" + end + end + + describe "public helpers" do + test "classifies local and external refs" do + assert Ref.local_ref?("#/$defs/name") + refute Ref.local_ref?("schemas/common.json#/$defs/name") + + refute Ref.external_ref?("#/$defs/name") + assert Ref.external_ref?("schemas/common.json#/$defs/name") + end + + test "returns node paths, location keys, resource URIs, and indexed walk events" do + location = %Location{ + raw_ref: "schemas/common.json#/$defs/id", + path: ["components", "User", "$ref"], + source: "specs/root.json", + base_uri: "specs/root.json", + absolute_uri: "specs/schemas/common.json#/$defs/id" + } + + resolution = %Resolution{ + location: location, + target_source: "specs/schemas/common.json", + target_document: %{"$defs" => %{"id" => %{"type" => "integer"}}}, + target_value: %{"type" => "integer"}, + target_pointer: "#/$defs/id" + } + + error = %Error{kind: :missing_target, location: location} + + cycle = %Cycle{ + location: location, + trail: [location.absolute_uri] + } + + key = + {"specs/root.json", "specs/root.json", ["components", "User", "$ref"], + "specs/schemas/common.json#/$defs/id"} + + assert Location.node_path(location) == ["components", "User"] + assert Ref.location_key(location) == key + + assert Ref.resource_uri(location) == "specs/root.json" + assert Ref.resource_uri(resolution) == "specs/schemas/common.json" + assert Ref.resource_uri(error) == "specs/schemas/common.json" + assert Ref.resource_uri(cycle) == "specs/schemas/common.json" + + assert %{resolutions: resolutions, errors: errors, cycles: cycles} = + Ref.index_walk_events([resolution, error, cycle]) + + assert resolutions[key] == resolution + assert errors[key] == error + assert cycles[key] == cycle + end + + test "renders refs in original, absolute, and prefer_local modes" do + same_resource_location = %Location{ + raw_ref: "#/$defs/name", + path: ["schema", "$ref"], + source: "https://example.com/root.json", + base_uri: "https://example.com/root.json", + absolute_uri: "https://example.com/root.json#/$defs/name" + } + + same_resource_resolution = %Resolution{ + location: same_resource_location, + target_source: "https://example.com/root.json", + target_document: %{"$defs" => %{"name" => %{"type" => "string"}}}, + target_value: %{"type" => "string"}, + target_pointer: "#/$defs/name" + } + + anchor_location = %{ + same_resource_location + | raw_ref: "#name", + absolute_uri: "https://example.com/root.json#name" + } + + anchor_resolution = %Resolution{ + same_resource_resolution + | location: anchor_location, + target_pointer: "#/$defs/name" + } + + cross_resource_location = %Location{ + raw_ref: "schemas/common.json#/$defs/id", + path: ["schema", "$ref"], + source: "specs/root.json", + base_uri: "specs/root.json", + absolute_uri: "specs/schemas/common.json#/$defs/id" + } + + cross_resource_resolution = %Resolution{ + location: cross_resource_location, + target_source: "specs/schemas/common.json", + target_document: %{"$defs" => %{"id" => %{"type" => "integer"}}}, + target_value: %{"type" => "integer"}, + target_pointer: "#/$defs/id" + } + + root_location = %{ + same_resource_location + | raw_ref: "#", + absolute_uri: "https://example.com/root.json" + } + + root_resolution = %Resolution{ + same_resource_resolution + | location: root_location, + target_pointer: nil, + target_value: %{}, + target_document: %{} + } + + assert Ref.render_ref(same_resource_location, same_resource_resolution, mode: :original) == + "#/$defs/name" + + assert Ref.render_ref(same_resource_location, same_resource_resolution, mode: :absolute) == + "https://example.com/root.json#/$defs/name" + + assert Ref.render_ref(same_resource_location, same_resource_resolution) == "#/$defs/name" + assert Ref.render_ref(anchor_location, anchor_resolution) == "#name" + + assert Ref.render_ref(cross_resource_location, cross_resource_resolution) == + "schemas/common.json#/$defs/id" + + assert Ref.render_ref(root_location, root_resolution) == "#" + end + end + + describe "transform/3" do + test "expands resolved refs in post-order" do + document = %{ + "start" => %{"$ref" => "schemas/common.json#/schema"} + } + + parent = self() + + loader = fn + "specs/schemas/common.json" -> + {:ok, + %{ + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + }} + + _ -> + {:error, :enoent} + end + + callback = fn location, outcome -> + case outcome do + {:ok, %Resolution{} = resolution} -> + send(parent, {:ok_location, location.absolute_uri}) + {:replace, resolution.target_value} + + {:cycle, _resolution, _cycle} -> + :keep + + {:error, error} -> + {:error, error} + end + end + + assert {:ok, transformed} = + Ref.transform(document, callback, + source: "specs/root.json", + loader: loader + ) + + assert transformed == %{"start" => %{"type" => "string"}} + assert_received {:ok_location, "specs/schemas/common.json#/$defs/name"} + assert_received {:ok_location, "specs/schemas/common.json#/schema"} + end + + test "keeps unresolved input refs relative when source is omitted" do + document = %{ + "start" => %{"$ref" => "schemas/common.json#/schema"} + } + + parent = self() + + loader = fn + "schemas/common.json" -> + send(parent, {:loaded, "schemas/common.json"}) + + {:ok, + %{ + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + }} + + _ -> + {:error, :enoent} + end + + callback = fn location, outcome -> + case outcome do + {:ok, %Resolution{} = resolution} -> + send( + parent, + {:ok_location, location.path, location.source, location.base_uri, location.absolute_uri} + ) + + {:replace, resolution.target_value} + + {:cycle, _resolution, _cycle} -> + :keep + + {:error, error} -> + {:error, error} + end + end + + assert {:ok, transformed} = Ref.transform(document, callback, loader: loader) + + assert transformed == %{"start" => %{"type" => "string"}} + assert_received {:loaded, "schemas/common.json"} + + assert_received {:ok_location, ["start", "$ref"], nil, nil, + "schemas/common.json#/schema"} + + assert_received {:ok_location, ["schema", "$ref"], "schemas/common.json", + "schemas/common.json", "schemas/common.json#/$defs/name"} + end + + test "prefers explicit base_uri over source for input document refs" do + document = %{ + "start" => %{"$ref" => "schemas/common.json#/schema"} + } + + parent = self() + + loader = fn + "fixtures/schemas/common.json" -> + send(parent, {:loaded, "fixtures/schemas/common.json"}) + + {:ok, + %{ + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + }} + + _ -> + {:error, :enoent} + end + + callback = fn location, outcome -> + case outcome do + {:ok, %Resolution{} = resolution} -> + send( + parent, + {:ok_location, location.path, location.source, location.base_uri, location.absolute_uri} + ) + + {:replace, resolution.target_value} + + {:cycle, _resolution, _cycle} -> + :keep + + {:error, error} -> + {:error, error} + end + end + + assert {:ok, transformed} = + Ref.transform(document, callback, + source: "specs/root.json", + base_uri: "fixtures/root.json", + loader: loader + ) + + assert transformed == %{"start" => %{"type" => "string"}} + assert_received {:loaded, "fixtures/schemas/common.json"} + + assert_received {:ok_location, ["start", "$ref"], "specs/root.json", "fixtures/root.json", + "fixtures/schemas/common.json#/schema"} + + assert_received {:ok_location, ["schema", "$ref"], "fixtures/schemas/common.json", + "fixtures/schemas/common.json", + "fixtures/schemas/common.json#/$defs/name"} + end + + test "preserves cycle edges when callback keeps cycle outcomes" do + document = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "node" => %{ + "type" => "object", + "properties" => %{ + "next" => %{"$ref" => "#/$defs/node"} + } + } + }, + "start" => %{"$ref" => "#/$defs/node"} + } + + parent = self() + + callback = fn _location, outcome -> + case outcome do + {:ok, %Resolution{} = resolution} -> + {:replace, resolution.target_value} + + {:cycle, %Resolution{} = resolution, %Cycle{} = cycle} -> + send(parent, {:cycle, resolution.location.absolute_uri, cycle.trail}) + :keep + + {:error, error} -> + {:error, error} + end + end + + assert {:ok, transformed} = + Ref.transform(document, callback, base_uri: "https://example.com/root.json") + + assert transformed["start"]["type"] == "object" + assert transformed["start"]["properties"]["next"] == %{"$ref" => "#/$defs/node"} + + assert_received {:cycle, "https://example.com/root.json#/$defs/node", + [ + "https://example.com/root.json#/$defs/node", + "https://example.com/root.json#/$defs/node" + ]} + end + + test "reuses cached transformed external targets across multiple locations" do + document = %{ + "first" => %{"$ref" => "schemas/common.json#/$defs/node"}, + "second" => %{"$ref" => "schemas/common.json#/$defs/node"} + } + + parent = self() + + loader = fn + "specs/schemas/common.json" -> + send(parent, {:loaded, "specs/schemas/common.json"}) + + {:ok, + %{ + "$defs" => %{ + "node" => %{"$ref" => "#/terminal"} + }, + "terminal" => %{"type" => "integer"} + }} + + _ -> + {:error, :enoent} + end + + callback = fn _location, outcome -> + case outcome do + {:ok, %Resolution{} = resolution} -> {:replace, resolution.target_value} + {:cycle, _resolution, _cycle} -> :keep + {:error, error} -> {:error, error} + end + end + + assert {:ok, transformed} = + Ref.transform(document, callback, + source: "specs/root.json", + loader: loader + ) + + assert transformed == %{ + "first" => %{"type" => "integer"}, + "second" => %{"type" => "integer"} + } + + assert_received {:loaded, "specs/schemas/common.json"} + refute_received {:loaded, "specs/schemas/common.json"} end end @@ -71,7 +449,7 @@ defmodule JSONSchex.Test.Ref do resolutions = Enum.filter(events, &match?(%Resolution{}, &1)) - assert Enum.map(resolutions, & &1.target_uri) == [ + assert Enum.map(resolutions, & &1.location.absolute_uri) == [ "https://example.com/schemas/leaf.json", "https://example.com/schemas/leaf.json#/$defs/name", "https://example.com/root.json#/$defs/entry" @@ -118,7 +496,7 @@ defmodule JSONSchex.Test.Ref do resolutions = Enum.filter(events, &match?(%Resolution{}, &1)) - assert Enum.map(resolutions, & &1.target_uri) == [ + assert Enum.map(resolutions, & &1.location.absolute_uri) == [ "specs/schemas/common.json#/$defs/node", "specs/schemas/common.json#/terminal", "specs/schemas/common.json#/$defs/node" @@ -146,7 +524,7 @@ defmodule JSONSchex.Test.Ref do assert 3 == Enum.count(events, &match?(%Resolution{}, &1)) [cycle] = Enum.filter(events, &match?(%Cycle{}, &1)) - assert cycle.target_uri == "https://example.com/root.json#/$defs/b" + assert cycle.location.absolute_uri == "https://example.com/root.json#/$defs/b" assert cycle.trail == [ "https://example.com/root.json#/$defs/b", @@ -196,7 +574,9 @@ defmodule JSONSchex.Test.Ref do base_uri: "https://example.com/root.json" ) - assert resolution.target_uri == "https://example.com/schemas/user.json#/$defs/name" + assert resolution.location.absolute_uri == + "https://example.com/schemas/user.json#/$defs/name" + assert resolution.target_source == "specs/root.json" assert resolution.target_pointer == "#/$defs/name" assert resolution.target_document == document["container"] @@ -240,7 +620,7 @@ defmodule JSONSchex.Test.Ref do ) assert_received {:loaded, "specs/schemas/common.json"} - assert resolution.target_uri == "specs/schemas/common.json#/$defs/id" + assert resolution.location.absolute_uri == "specs/schemas/common.json#/$defs/id" assert resolution.target_source == "specs/schemas/common.json" assert resolution.target_pointer == "#/$defs/id" assert resolution.target_document == %{"$defs" => %{"id" => %{"type" => "string"}}} @@ -272,8 +652,8 @@ defmodule JSONSchex.Test.Ref do assert {:error, %Error{} = error} = Ref.resolve(document, location) assert error.kind == :missing_target - assert error.target_uri == "#/$defs/missing" assert error.location == location + assert error.location.absolute_uri == "#/$defs/missing" end end end diff --git a/test/schema_static_compile_test.exs b/test/schema_static_compile_test.exs index 8ae2299..e44041d 100644 --- a/test/schema_static_compile_test.exs +++ b/test/schema_static_compile_test.exs @@ -31,7 +31,7 @@ defmodule JSONSchex.Test.SchemaStaticCompile.LoaderModule do end @compiled JSONSchex.Schema.compile!(%{"$ref" => "https://example.com/integer"}, - external_loader: &__MODULE__.load/1 + loader: &__MODULE__.load/1 ) def schema, do: @compiled diff --git a/test/support/suite_runner.ex b/test/support/suite_runner.ex index 0e9db60..8cd5c9c 100644 --- a/test/support/suite_runner.ex +++ b/test/support/suite_runner.ex @@ -124,7 +124,7 @@ defmodule JSONSchex.Test.SuiteRunner do quote do @tag :jsts test unquote("Case: #{description}") do - opts = [external_loader: &JSONSchex.Test.SuiteLoader.load/1] + opts = [loader: &JSONSchex.Test.SuiteLoader.load/1] opts = if unquote(add_format_assertion) do opts ++ [format_assertion: true] diff --git a/test/vocabulary_dialect_test.exs b/test/vocabulary_dialect_test.exs index e8909e3..605eac4 100644 --- a/test/vocabulary_dialect_test.exs +++ b/test/vocabulary_dialect_test.exs @@ -30,7 +30,7 @@ defmodule JSONSchex.Test.VocabularyDialectTest do JSONSchex.Test.SuiteLoader.load(uri) end - assert {:error, error} = JSONSchex.compile(schema, external_loader: loader) + assert {:error, error} = JSONSchex.compile(schema, loader: loader) assert error.rule == :unsupported_vocabulary assert error.path == ["$vocabulary", "http://localhost:1234/draft/2020-12/vocab/unknown-required"] From 5c02e8a9067dbbf85a9c32c0b45bbd72717a6286 Mon Sep 17 00:00:00 2001 From: xinz Date: Wed, 20 May 2026 00:59:52 +0800 Subject: [PATCH 5/5] For archival purposes only --- CHANGELOG.md | 2 +- README.md | 1 + guide/ref.md | 152 ++++++- lib/jsonschex/ref.ex | 772 +++++++++++++++++++++++++++++++++-- test/ref_test.exs | 945 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 1834 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9fac2b..e194efb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Bug Fixes and Improvements - * Add a new public structural `$ref` API in `JSONSchex.Ref` for discovery (`scan/2`), single-step resolution (`resolve/3`), transitive traversal with cycle reporting (`walk/2`), callback-based transformation (`transform/3`), preserved-ref rendering (`render_ref/3`), and location-keyed walk indexing (`index_walk_events/1`) + * Expand the public structural `$ref` API in `JSONSchex.Ref` with rebasing (`rebase/3`), canonical target identity (`target_uri/1`), external resource collection (`collect_external_resources/2`), structured bundle output (`bundle/3`), and mount-aware preserved-ref rendering (`render_ref/3` with `mode: :mounted`) alongside discovery (`scan/2`), single-step resolution (`resolve/3`), transitive traversal with cycle reporting (`walk/2`), callback-based transformation (`transform/3`), and location-keyed walk indexing (`index_walk_events/1`) * Improve internal scope scanning so `contentSchema` is traversed for nested `$id`, anchor, and local `$ref` discovery ## v0.6.0 (2026-05-09) diff --git a/README.md b/README.md index 779adca..965a178 100644 --- a/README.md +++ b/README.md @@ -212,6 +212,7 @@ end See the `guide/` directory for detailed documentation: +- [Structural `$ref` discovery, rebasing, and bundling helpers](guide/ref.md) - [Loader and remote `$ref` handling](guide/loader.md) - [Dialect and `$vocabulary` behavior](guide/dialect_and_vocabulary.md) - [Feature matrix (Draft 2020-12 support)](guide/feature_matrix.md) diff --git a/guide/ref.md b/guide/ref.md index c7c4878..e4944f0 100644 --- a/guide/ref.md +++ b/guide/ref.md @@ -29,13 +29,18 @@ That means: ## Overview -`JSONSchex.Ref` exposes three main entry points: +`JSONSchex.Ref` exposes a small set of public structural `$ref` entry points and +helpers: - `scan/2` — discover structural `$ref` locations - `resolve/3` — resolve one location or raw ref string - `walk/2` — traverse reachable `$ref` targets transitively - `transform/3` — apply a callback-driven structural rewrite over discovered `$ref` locations +- `rebase/3` — rewrite a resource so its refs remain valid under a new root resource URI - `render_ref/3` — render a stable `$ref` string for a resolved target +- `target_uri/1` — compute a canonical absolute URI for a resolved target +- `collect_external_resources/2` — collect reachable non-root resources keyed by canonical resource URI +- `bundle/3` — return a structured bundle-oriented view containing rebased root and collected resources - `index_walk_events/1` — convert ordered walk events into a location-keyed index ## `scan/2` @@ -281,6 +286,12 @@ Supported modes are: - `:original` — reuse the original raw `$ref` spelling from the source location - `:absolute` — render an absolute target URI - `:prefer_local` — default; render a local fragment for same-resource targets, otherwise prefer a relative resource ref and fall back to absolute rendering +- `:mounted` — render the target as it should appear from a rebased or mounted resource context + +`mode: :mounted` expects: + +- `:mount_base_uri` — the rebased containing resource base URI +- optional `:resource_uri_map` — target-resource remapping applied before rendering Examples: @@ -288,8 +299,145 @@ Examples: - same-resource anchor target → `#name` - same-resource root target → `#` - cross-resource target → `schemas/common.json#/$defs/name` or an absolute URI +- mounted nested target → `../common.json#Pet` or another rebased relative form + +This is especially useful when `transform/3` decides to preserve a cycle edge instead of expanding it, or when downstream bundling code needs to render refs from a rebased resource context. + +## `target_uri/1` + +`target_uri/1` returns the canonical absolute URI for a `%JSONSchex.Ref.Resolution{}` when it can be derived. + +This is useful when downstream code needs a stable identity key for: + +- rebasing +- resource caches +- bundle indexes +- comparing original vs rebased targets + +```elixir +resolution = %JSONSchex.Ref.Resolution{ + location: %JSONSchex.Ref.Location{ + raw_ref: "schemas/common.json#/$defs/id", + path: ["schema", "$ref"], + absolute_uri: "specs/schemas/common.json#/$defs/id" + }, + target_source: "specs/schemas/common.json", + target_document: %{"$defs" => %{"id" => %{"type" => "integer"}}}, + target_value: %{"type" => "integer"}, + target_pointer: "#/$defs/id" +} + +JSONSchex.Ref.target_uri(resolution) +#=> "specs/schemas/common.json#/$defs/id" +``` + +## `rebase/3` + +`rebase/3` rewrites a resource so its refs remain valid under a new root resource URI. + +This is a structural rebasing helper, not an expansion helper. It: + +- rewrites `$ref` strings as needed +- preserves already-correct same-resource refs when possible +- keeps nested relative `$id` resources relative to the new root +- preserves absolute nested `$id` resources as separate identities +- optionally applies explicit target remaps through `:resource_uri_map` + +It accepts: + +- `target_base_uri` — the new root resource URI +- `:source` — provenance for the current document +- `:base_uri` — current starting base URI used before rebasing +- `:resource_uri_map` — optional explicit remaps for target resources outside the current document + +Example: + +```elixir +root = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "user" => %{ + "$id" => "schemas/user.json", + "$defs" => %{"name" => %{"type" => "string"}}, + "schema" => %{"$ref" => "#/$defs/name"} + } + }, + "start" => %{"$ref" => "https://example.com/schemas/user.json#/$defs/name"} +} + +{:ok, rebased} = + JSONSchex.Ref.rebase(root, "https://bundle.example/root.json") + +rebased["$id"] +#=> "https://bundle.example/root.json" + +rebased["start"]["$ref"] +#=> "schemas/user.json#/$defs/name" +``` + +## `collect_external_resources/2` + +`collect_external_resources/2` builds on `walk/2` and groups successful reachable non-root resources by canonical resource URI. + +Each entry contains: -This is especially useful when `transform/3` decides to preserve a cycle edge instead of expanding it. +- `:document` — the resource root document +- `:source` — the loaded document source, when available +- `:resolutions` — all successful incoming resolutions that targeted that resource + +Only successful `%Resolution{}` events are collected. `%Error{}` and `%Cycle{}` events are ignored for collection purposes. + +```elixir +{:ok, resources} = + JSONSchex.Ref.collect_external_resources(document, + source: "specs/root.json", + loader: loader + ) + +Map.keys(resources) +#=> ["specs/schemas/common.json"] +``` + +## `bundle/3` + +`bundle/3` returns a structured bundle-oriented view of the root document and its reachable external resources. + +It combines: + +- `walk/2` +- `index_walk_events/1` +- `collect_external_resources/2` +- `rebase/3` + +The returned map currently includes: + +- `:root_document` +- `:resources_by_uri` +- `:rebased_resources_by_uri` +- `:resource_uri_map` +- `:walk_events` +- `:walk_index` +- `:location_index` +- `:resource_index` + +This is intentionally still a low-level, policy-free helper rather than a final downstream bundling contract, but it already removes a lot of repetitive collection and rebasing glue. + +```elixir +{:ok, bundle} = + JSONSchex.Ref.bundle(document, "specs/bundle/root.json", + source: "specs/root.json", + loader: loader, + resource_uri_map: %{ + "specs/schemas/common.json" => "specs/bundle/common.json" + } + ) + +bundle.root_document["$id"] +#=> "specs/bundle/root.json" + +bundle.rebased_resources_by_uri["specs/schemas/common.json"]["$id"] +#=> "specs/bundle/common.json" +``` ## `index_walk_events/1` diff --git a/lib/jsonschex/ref.ex b/lib/jsonschex/ref.ex index 33bb307..12fe939 100644 --- a/lib/jsonschex/ref.ex +++ b/lib/jsonschex/ref.ex @@ -27,7 +27,12 @@ defmodule JSONSchex.Ref do `%Resolution{}`, `%Error{}`, and `%Cycle{}` events - `transform/3` applies a callback-driven, policy-free structural rewrite over discovered `$ref` locations + - `rebase/3` rewrites a resource so its refs remain valid under a new root + resource URI - `render_ref/3` renders a stable `$ref` string for a resolved target + - `target_uri/1` returns a canonical absolute URI for a resolved target when available + - `collect_external_resources/2` gathers reachable non-root resources keyed by canonical resource URI + - `bundle/3` returns a structured bundle-oriented view with rebased root and collected resources - `index_walk_events/1` turns ordered walk events into a location-keyed index ## Options @@ -198,6 +203,40 @@ defmodule JSONSchex.Ref do cycles: %{optional(location_key()) => Cycle.t()} } + @typedoc "Collected external resource entry keyed by canonical resource URI." + @type external_resource_entry :: %{ + required(:document) => document(), + optional(:source) => source() | nil, + required(:resolutions) => [Resolution.t()] + } + + @typedoc "Collected external resources keyed by canonical resource URI." + @type external_resource_index :: %{optional(String.t()) => external_resource_entry()} + + @typedoc "Bundle-oriented resource entry keyed by original canonical resource URI." + @type bundle_resource_entry :: %{ + required(:document) => document(), + required(:rebased_document) => document(), + optional(:source) => source() | nil, + required(:resolutions) => [Resolution.t()], + required(:rebased_resource_uri) => String.t() + } + + @typedoc "Bundle-oriented resource index keyed by original canonical resource URI." + @type bundle_resource_index :: %{optional(String.t()) => bundle_resource_entry()} + + @typedoc "Structured bundle-oriented view built from a root document and reachable resources." + @type bundle_result :: %{ + required(:root_document) => document(), + required(:resources_by_uri) => external_resource_index(), + required(:rebased_resources_by_uri) => %{optional(String.t()) => document()}, + required(:resource_uri_map) => %{optional(String.t()) => String.t()}, + required(:walk_events) => [walk_event()], + required(:walk_index) => walk_index(), + required(:location_index) => walk_index(), + required(:resource_index) => bundle_resource_index() + } + @typedoc "Outcome passed to `transform/3` callbacks for a discovered location." @type transform_outcome :: {:ok, Resolution.t()} | {:cycle, Resolution.t(), Cycle.t()} | {:error, Error.t()} @@ -209,7 +248,7 @@ defmodule JSONSchex.Ref do @type transform_callback :: (Location.t(), transform_outcome() -> transform_callback_result()) @typedoc "Rendering mode used by `render_ref/3`." - @type render_mode :: :original | :absolute | :prefer_local + @type render_mode :: :original | :absolute | :prefer_local | :mounted @doc """ Returns `true` if the given ref is a same-document local ref. @@ -249,6 +288,205 @@ defmodule JSONSchex.Ref do {location.source, location.base_uri, location.path, location.absolute_uri} end + @doc """ + Collects reachable external resources keyed by canonical resource URI. + + This helper builds on `walk/2` and groups successful resolutions whose target + resources are outside the original input document resource set. + + Each entry contains: + + - `:document` — the resource root document + - `:source` — the loaded document source, when available + - `:resolutions` — all successful resolutions that targeted that resource + + Root resources that belong to the original input document are excluded, even + when the input contains nested `$id` resources. Only successful reachable + non-root resources are collected. + + ## Options + + This function accepts the same root-context options as `walk/2`: + + - `:source` — source identifier for the root document. This is used both as + provenance metadata and, when `:base_uri` is omitted and `:source` is a + binary, as the initial base URI. + - `:base_uri` — explicit starting base URI override used for reference + resolution. + - `:loader` — `(document_uri -> {:ok, document} | {:ok, %{document: document, source: source}} | {:error, term()})` + + ## Notes + + - this helper only includes resources reached through successful + `%Resolution{}` events + - `%Error{}` and `%Cycle{}` events are ignored for collection purposes + - the `:resolutions` list for each collected resource preserves every + successful incoming resolution that targeted that resource + + ## Example + + iex> document = %{ + ...> "$id" => "specs/root.json", + ...> "start" => %{"$ref" => "schemas/common.json#/schema"} + ...> } + iex> loader = fn + ...> "specs/schemas/common.json" -> + ...> {:ok, + ...> %{ + ...> document: %{ + ...> "$id" => "specs/schemas/common.json", + ...> "$defs" => %{"name" => %{"type" => "string"}}, + ...> "schema" => %{"$ref" => "#/$defs/name"} + ...> }, + ...> source: "specs/schemas/common.json" + ...> }} + ...> _ -> + ...> {:error, :enoent} + ...> end + iex> {:ok, resources} = + ...> JSONSchex.Ref.collect_external_resources(document, + ...> source: "specs/root.json", + ...> loader: loader + ...> ) + iex> Map.keys(resources) + ["specs/schemas/common.json"] + iex> resources["specs/schemas/common.json"].document["schema"] + %{"$ref" => "#/$defs/name"} + """ + @spec collect_external_resources(document(), keyword()) :: {:ok, external_resource_index()} + def collect_external_resources(document, opts \\ []) + when is_map(document) or is_list(document) or is_boolean(document) do + source = Keyword.get(opts, :source) + base_uri = initial_base_uri(opts, source) + root_resource_uris = root_resource_uris(document, source, base_uri) + + {:ok, events} = walk(document, opts) + + resources = + Enum.reduce(events, %{}, fn event, acc -> + case event do + %Resolution{} = resolution -> + case resource_uri(resolution) do + uri when is_binary(uri) -> + if MapSet.member?(root_resource_uris, uri) do + acc + else + Map.update(acc, uri, external_resource_entry(resolution), fn entry -> + append_external_resolution(entry, resolution) + end) + end + + _ -> + acc + end + + _ -> + acc + end + end) + |> normalize_external_resource_index() + + {:ok, resources} + end + + @doc """ + Builds a structured bundle-oriented view of the root document and its reachable + external resources. + + This helper combines: + + - `walk/2` + - `index_walk_events/1` + - `collect_external_resources/2` + - `rebase/3` + + The returned result includes the rebased root document, original collected + external resources keyed by canonical resource URI, rebased external resource + documents keyed by their original canonical resource URI, a richer + `resource_index`, the merged `resource_uri_map`, and the ordered and indexed + walk output. + + ## Options + + This function accepts the same root-context options as `walk/2` and `rebase/3`: + + - `:source` + - `:base_uri` + - `:loader` + - `:resource_uri_map` + + ## Example + + iex> document = %{ + ...> "$id" => "specs/root.json", + ...> "start" => %{"$ref" => "schemas/common.json#/schema"} + ...> } + iex> loader = fn + ...> "specs/schemas/common.json" -> + ...> {:ok, + ...> %{ + ...> document: %{ + ...> "$id" => "specs/schemas/common.json", + ...> "$defs" => %{"name" => %{"type" => "string"}}, + ...> "schema" => %{"$ref" => "#/$defs/name"} + ...> }, + ...> source: "specs/schemas/common.json" + ...> }} + ...> _ -> + ...> {:error, :enoent} + ...> end + iex> {:ok, bundle} = + ...> JSONSchex.Ref.bundle(document, "specs/bundle/root.json", + ...> source: "specs/root.json", + ...> loader: loader, + ...> resource_uri_map: %{ + ...> "specs/schemas/common.json" => "specs/bundle/common.json" + ...> } + ...> ) + iex> bundle.root_document["start"] + %{"$ref" => "common.json#/schema"} + iex> Map.keys(bundle.resources_by_uri) + ["specs/schemas/common.json"] + iex> bundle.rebased_resources_by_uri["specs/schemas/common.json"]["$id"] + "specs/bundle/common.json" + iex> bundle.location_index == bundle.walk_index + true + iex> bundle.resource_index["specs/schemas/common.json"].rebased_resource_uri + "specs/bundle/common.json" + """ + @spec bundle(document(), String.t(), keyword()) :: {:ok, bundle_result()} | {:error, term()} + def bundle(document, target_base_uri, opts \\ []) + when (is_map(document) or is_list(document) or is_boolean(document)) and + is_binary(target_base_uri) do + source = Keyword.get(opts, :source) + current_base_uri = initial_base_uri(opts, source) + root_resource_uri_map = build_rebase_resource_uri_map(document, current_base_uri, target_base_uri) + resource_uri_map = Map.merge(resource_uri_map_from_opts(opts), root_resource_uri_map) + rebase_opts = Keyword.put(opts, :resource_uri_map, resource_uri_map) + + with {:ok, walk_events} <- walk(document, opts), + walk_index = index_walk_events(walk_events), + {:ok, resources_by_uri} <- collect_external_resources(document, opts), + {:ok, root_document} <- rebase(document, target_base_uri, rebase_opts), + {:ok, rebased_resources_by_uri} <- + rebase_external_resources(resources_by_uri, resource_uri_map) do + resource_index = + build_bundle_resource_index(resources_by_uri, rebased_resources_by_uri, resource_uri_map) + + {:ok, + %{ + root_document: root_document, + resources_by_uri: resources_by_uri, + rebased_resources_by_uri: rebased_resources_by_uri, + resource_uri_map: resource_uri_map, + walk_events: walk_events, + walk_index: walk_index, + location_index: walk_index, + resource_index: resource_index + }} + end + end + @doc """ Indexes walk events by `location_key/1`. @@ -342,6 +580,36 @@ defmodule JSONSchex.Ref do end end + @doc """ + Returns the canonical absolute URI for a resolved target when it can be derived. + + This is primarily useful for downstream tooling that needs a stable identity + key for resolved targets, such as rebasing or bundling logic. + """ + @spec target_uri(Resolution.t()) :: String.t() | nil + def target_uri(%Resolution{location: %Location{absolute_uri: absolute_uri}}) + when is_binary(absolute_uri), + do: absolute_uri + + def target_uri(%Resolution{} = resolution) do + target_resource = resource_uri(resolution) + fragment = preferred_fragment(resolution) + + cond do + is_binary(target_resource) and is_binary(fragment) -> + target_resource <> "#" <> fragment + + is_binary(target_resource) and is_binary(resolution.target_pointer) -> + target_resource <> resolution.target_pointer + + is_binary(target_resource) -> + target_resource + + true -> + nil + end + end + @doc """ Renders a `$ref` string for the given resolved target. @@ -351,9 +619,19 @@ defmodule JSONSchex.Ref do - `:absolute` — render the target as an absolute resource URI plus fragment - `:prefer_local` — render a local fragment for same-resource targets, otherwise a relative ref when it can be computed safely, falling back to absolute + - `:mounted` — render the target as it should appear from a rebased or mounted + resource context The default mode is `:prefer_local`. + `:mounted` expects: + + - `:mount_base_uri` — the rebased containing resource base URI + + and optionally: + + - `:resource_uri_map` — remaps target resource URIs before rendering + ## Examples iex> location = %JSONSchex.Ref.Location{ @@ -386,6 +664,9 @@ defmodule JSONSchex.Ref do :prefer_local -> render_prefer_local_ref(location, resolution) + + :mounted -> + render_mounted_ref(location, resolution, opts) end end @@ -395,28 +676,7 @@ defmodule JSONSchex.Ref do defp render_original_ref(_location, %Resolution{} = resolution), do: render_prefer_local_ref(nil, resolution) - defp render_absolute_ref(%Resolution{location: %Location{absolute_uri: absolute_uri}}) - when is_binary(absolute_uri), - do: absolute_uri - - defp render_absolute_ref(%Resolution{} = resolution) do - target_resource = resource_uri(resolution) - fragment = preferred_fragment(resolution) - - cond do - is_binary(target_resource) and is_binary(fragment) -> - target_resource <> "#" <> fragment - - is_binary(target_resource) and is_binary(resolution.target_pointer) -> - target_resource <> resolution.target_pointer - - is_binary(target_resource) -> - target_resource - - true -> - nil - end - end + defp render_absolute_ref(%Resolution{} = resolution), do: target_uri(resolution) defp render_prefer_local_ref(%Location{} = location, %Resolution{} = resolution) do source_resource = resource_uri(location) @@ -439,6 +699,27 @@ defmodule JSONSchex.Ref do defp render_prefer_local_ref(_location, %Resolution{} = resolution), do: render_absolute_ref(resolution) + defp render_mounted_ref(%Location{} = location, %Resolution{} = resolution, opts) do + case Keyword.get(opts, :mount_base_uri) do + mount_base_uri when is_binary(mount_base_uri) -> + mount_resource = URIUtil.base(mount_base_uri) + source_resource = resource_uri(location) + + resource_uri_map = + opts + |> resource_uri_map_from_opts() + |> maybe_put_mounted_source_resource(source_resource, mount_resource) + + mounted_target_uri = + rebase_target_uri(mount_resource, target_uri(resolution), resource_uri_map) + + render_rebased_target_uri(mount_resource, mounted_target_uri) + + _ -> + render_absolute_ref(resolution) + end + end + defp render_same_resource_ref(source_resource, %Resolution{} = resolution) do fragment = preferred_fragment(resolution) @@ -508,13 +789,25 @@ defmodule JSONSchex.Ref do same_hierarchical_uri_origin?(source, target) and is_binary(source.path) and is_binary(target.path) -> - Path.relative_to(target.path, path_dirname(source.path)) + relativize_hierarchical_uri_path(source.path, target.path) true -> nil end end + defp relativize_hierarchical_uri_path(source_path, target_path) + when is_binary(source_path) and is_binary(target_path) do + source_dir = + source_path + |> String.trim_leading("/") + |> path_dirname() + + target_path + |> String.trim_leading("/") + |> Path.relative_to(source_dir) + end + defp path_like_resource?(resource) when is_binary(resource) do match?(%URI{scheme: nil}, URI.parse(resource)) end @@ -716,6 +1009,53 @@ defmodule JSONSchex.Ref do end end + @doc """ + Rewrites a resource so its refs remain valid under a new root resource URI. + + This helper preserves ref target semantics while changing the document's root + resource identity. Nested relative `$id` values continue to derive from the + rebased root. Existing absolute `$id` values remain unchanged. + + Refs that target resources inside the rebased document are rewritten to their + rebased locations automatically. Refs targeting resources outside the + document remain pointed at their original targets unless an explicit + `:resource_uri_map` remaps those target resource URIs. + + ## Options + + - `:source` — source identifier for the current document provenance + - `:base_uri` — current starting base URI used to interpret relative refs and `$id` + values before rebasing + - `:resource_uri_map` — map or keyword list of `old_resource_uri => new_resource_uri` + overrides for target resources outside the current document + """ + @spec rebase(document(), String.t(), keyword()) :: {:ok, term()} | {:error, term()} + def rebase(document, target_base_uri, opts \\ []) + when (is_map(document) or is_list(document) or is_boolean(document)) and + is_binary(target_base_uri) do + source = Keyword.get(opts, :source) + current_base_uri = initial_base_uri(opts, source) + resource_uri_map = resource_uri_map_from_opts(opts) + internal_resource_uri_map = build_rebase_resource_uri_map(document, current_base_uri, target_base_uri) + resource_uri_map = Map.merge(resource_uri_map, internal_resource_uri_map) + + case rebase_node( + document, + [], + source, + current_base_uri, + target_base_uri, + resource_uri_map + ) do + {:ok, rebased_document} -> + rebased_document = maybe_put_root_id(rebased_document, target_base_uri) + {:ok, rebased_document} + + {:error, reason} -> + {:error, reason} + end + end + defp resolve_location(document, %Location{} = location, opts, cache) do source = location.source || Keyword.get(opts, :source) root_base_uri = initial_base_uri(opts, source) @@ -733,6 +1073,220 @@ defmodule JSONSchex.Ref do end end + defp rebase_node( + value, + _path, + _source, + _old_base_uri, + _new_base_uri, + _resource_uri_map + ) + when is_boolean(value) or is_binary(value) or is_number(value) or is_nil(value) do + {:ok, value} + end + + defp rebase_node( + list, + path, + source, + old_base_uri, + new_base_uri, + resource_uri_map + ) + when is_list(list) do + Enum.reduce_while(Enum.with_index(list), {:ok, []}, fn {item, index}, {:ok, acc} -> + case rebase_node( + item, + path ++ [index], + source, + old_base_uri, + new_base_uri, + resource_uri_map + ) do + {:ok, rebased_item} -> + {:cont, {:ok, [rebased_item | acc]}} + + {:error, reason} -> + {:halt, {:error, reason}} + end + end) + |> case do + {:ok, acc} -> {:ok, Enum.reverse(acc)} + {:error, reason} -> {:error, reason} + end + end + + defp rebase_node( + map, + path, + source, + old_base_uri, + new_base_uri, + resource_uri_map + ) + when is_map(map) do + old_effective_base_uri = effective_base_uri(old_base_uri, map) + + new_effective_base_uri = + case path do + [] -> new_base_uri + _ -> effective_base_uri(new_base_uri, map) + end + + map + |> Enum.sort_by(&sort_entry/1) + |> Enum.reduce_while({:ok, %{}}, fn {key, value}, {:ok, acc} -> + case rebase_node( + value, + path ++ [key], + source, + old_effective_base_uri, + new_effective_base_uri, + resource_uri_map + ) do + {:ok, rebased_value} -> + {:cont, {:ok, Map.put(acc, key, rebased_value)}} + + {:error, reason} -> + {:halt, {:error, reason}} + end + end) + |> case do + {:ok, rebased_map} -> + rebased_map = maybe_put_root_id(rebased_map, path, new_base_uri) + + rebase_current_ref( + rebased_map, + path, + source, + old_effective_base_uri, + new_effective_base_uri, + resource_uri_map + ) + + {:error, reason} -> + {:error, reason} + end + end + + defp rebase_current_ref( + map, + _path, + _source, + _old_effective_base_uri, + _new_effective_base_uri, + _resource_uri_map + ) + when not is_map(map) do + {:ok, map} + end + + defp rebase_current_ref( + map, + path, + source, + old_effective_base_uri, + new_effective_base_uri, + resource_uri_map + ) do + case Map.get(map, "$ref") do + ref when is_binary(ref) -> + location = + normalize_location( + %Location{raw_ref: ref, path: path ++ ["$ref"], source: source, base_uri: old_effective_base_uri}, + source, + old_effective_base_uri + ) + + rebased_ref = rebase_ref(new_effective_base_uri, location, resource_uri_map) + {:ok, Map.put(map, "$ref", rebased_ref)} + + _ -> + {:ok, map} + end + end + + defp rebase_ref(new_effective_base_uri, %Location{} = location, resource_uri_map) do + source_resource = if is_binary(new_effective_base_uri), do: URIUtil.base(new_effective_base_uri), else: nil + + rebased_target_uri = + rebase_target_uri( + source_resource, + rebase_target_reference(location, resource_uri_map), + resource_uri_map + ) + + render_rebased_target_uri(source_resource, rebased_target_uri) + end + + defp rebase_target_reference(%Location{raw_ref: raw_ref} = location, resource_uri_map) + when is_binary(raw_ref) do + case split_target(raw_ref) do + {:ok, target_resource, _fragment} when is_binary(target_resource) -> + if Map.has_key?(resource_uri_map, target_resource) do + raw_ref + else + location.absolute_uri || raw_ref + end + + _ -> + location.absolute_uri || raw_ref + end + end + + defp rebase_target_reference(%Location{} = location, _resource_uri_map), + do: location.absolute_uri || location.raw_ref + + defp rebase_target_uri(source_resource, target_uri, resource_uri_map) when is_binary(target_uri) do + case split_target(target_uri) do + {:ok, target_resource, fragment} -> + target_resource = + cond do + target_resource in [nil, ""] and is_binary(source_resource) -> + source_resource + + is_binary(target_resource) -> + Map.get(resource_uri_map, target_resource, target_resource) + + true -> + nil + end + + if is_binary(target_resource) do + with_optional_fragment(target_resource, fragment) + else + target_uri + end + + :error -> + target_uri + end + end + + defp rebase_target_uri(_source_resource, target_uri, _resource_uri_map), do: target_uri + + defp render_rebased_target_uri(source_resource, rebased_target_uri) + when is_binary(rebased_target_uri) do + case split_target(rebased_target_uri) do + {:ok, target_resource, fragment} when is_binary(source_resource) and source_resource == target_resource -> + URIUtil.local_ref(fragment) + + {:ok, target_resource, fragment} when is_binary(source_resource) and is_binary(target_resource) -> + case relativize_resource_uri(source_resource, target_resource) do + relative_resource when is_binary(relative_resource) -> + with_optional_fragment(relative_resource, fragment) + + _ -> + rebased_target_uri + end + + _ -> + rebased_target_uri + end + end + + defp render_rebased_target_uri(_source_resource, rebased_target_uri), do: rebased_target_uri + defp transform_node( value, _path, @@ -917,7 +1471,7 @@ defmodule JSONSchex.Ref do end defp transform_resolution(%Resolution{} = resolution, loader, fun, state, trail) do - target_uri = resolution_uri(resolution) + target_uri = target_uri(resolution) cond do not walkable_document?(resolution.target_value) -> @@ -1018,7 +1572,7 @@ defmodule JSONSchex.Ref do end defp maybe_walk_resolution(state, %Resolution{} = resolution, loader, trail) do - target_uri = resolution_uri(resolution) + target_uri = target_uri(resolution) cond do not walkable_document?(resolution.target_value) -> @@ -1090,7 +1644,7 @@ defmodule JSONSchex.Ref do end defp next_base_uri(%Resolution{target_source: target_source} = resolution) do - target_uri = resolution_uri(resolution) + target_uri = target_uri(resolution) cond do is_binary(target_uri) -> @@ -1119,12 +1673,6 @@ defmodule JSONSchex.Ref do } end - defp resolution_uri(%Resolution{location: %Location{absolute_uri: absolute_uri}}) - when is_binary(absolute_uri), - do: absolute_uri - - defp resolution_uri(_), do: nil - defp normalize_location(%Location{} = location, source, root_base_uri) do base_uri = location.base_uri || root_base_uri absolute_uri = location.absolute_uri || resolve_reference(base_uri, location.raw_ref) @@ -1479,6 +2027,161 @@ defmodule JSONSchex.Ref do Keyword.get(opts, :loader) end + defp resource_uri_map_from_opts(opts) do + opts + |> Keyword.get(:resource_uri_map, %{}) + |> Map.new() + end + + + + defp maybe_put_mounted_source_resource(resource_uri_map, source_resource, mount_resource) + when is_binary(source_resource) and is_binary(mount_resource) do + Map.put_new(resource_uri_map, source_resource, mount_resource) + end + + defp maybe_put_mounted_source_resource(resource_uri_map, _source_resource, _mount_resource), + do: resource_uri_map + + defp root_resource_uris(document, source, base_uri) do + document + |> build_index(source, base_uri) + |> Map.fetch!(:resources) + |> Map.keys() + |> MapSet.new() + end + + defp external_resource_entry(%Resolution{} = resolution) do + %{ + document: resolution.target_document, + source: resolution.target_source, + resolutions: [resolution] + } + end + + defp append_external_resolution(entry, %Resolution{} = resolution) do + Map.update!(entry, :resolutions, &[resolution | &1]) + end + + defp normalize_external_resource_index(resources_by_uri) when is_map(resources_by_uri) do + Enum.into(resources_by_uri, %{}, fn {uri, entry} -> + {uri, Map.update!(entry, :resolutions, &Enum.reverse/1)} + end) + end + + defp rebase_external_resources(resources_by_uri, resource_uri_map) when is_map(resources_by_uri) do + Enum.reduce_while(resources_by_uri, {:ok, %{}}, fn {uri, entry}, {:ok, acc} -> + target_base_uri = Map.get(resource_uri_map, uri, uri) + source = Map.get(entry, :source) + + opts = [base_uri: uri, resource_uri_map: resource_uri_map] + opts = if is_nil(source), do: opts, else: Keyword.put(opts, :source, source) + + case rebase(entry.document, target_base_uri, opts) do + {:ok, rebased_document} -> + {:cont, {:ok, Map.put(acc, uri, rebased_document)}} + + {:error, reason} -> + {:halt, {:error, reason}} + end + end) + end + + defp build_bundle_resource_index(resources_by_uri, rebased_resources_by_uri, resource_uri_map) + when is_map(resources_by_uri) and is_map(rebased_resources_by_uri) and is_map(resource_uri_map) do + Enum.reduce(resources_by_uri, %{}, fn {uri, entry}, acc -> + Map.put(acc, uri, %{ + document: entry.document, + rebased_document: Map.get(rebased_resources_by_uri, uri, entry.document), + source: Map.get(entry, :source), + resolutions: entry.resolutions, + rebased_resource_uri: Map.get(resource_uri_map, uri, uri) + }) + end) + end + + defp build_rebase_resource_uri_map(document, current_base_uri, target_base_uri) do + do_build_rebase_resource_uri_map(document, [], current_base_uri, target_base_uri, %{}) + end + + defp do_build_rebase_resource_uri_map(value, path, old_base_uri, new_base_uri, acc) + when is_boolean(value) or is_binary(value) or is_number(value) or is_nil(value) do + if path == [] do + put_rebased_resource_uri(acc, old_base_uri, new_base_uri) + else + acc + end + end + + defp do_build_rebase_resource_uri_map(list, path, old_base_uri, new_base_uri, acc) + when is_list(list) do + acc = + if path == [] do + put_rebased_resource_uri(acc, old_base_uri, new_base_uri) + else + acc + end + + Enum.reduce(Enum.with_index(list), acc, fn {item, index}, inner_acc -> + do_build_rebase_resource_uri_map( + item, + path ++ [index], + old_base_uri, + new_base_uri, + inner_acc + ) + end) + end + + defp do_build_rebase_resource_uri_map(map, path, old_base_uri, new_base_uri, acc) + when is_map(map) do + old_effective_base_uri = effective_base_uri(old_base_uri, map) + + new_effective_base_uri = + case path do + [] -> new_base_uri + _ -> effective_base_uri(new_base_uri, map) + end + + acc = + if path == [] or is_binary(Map.get(map, "$id")) do + put_rebased_resource_uri(acc, old_effective_base_uri, new_effective_base_uri) + else + acc + end + + Enum.reduce(Enum.sort_by(map, &sort_entry/1), acc, fn {key, value}, inner_acc -> + do_build_rebase_resource_uri_map( + value, + path ++ [key], + old_effective_base_uri, + new_effective_base_uri, + inner_acc + ) + end) + end + + defp put_rebased_resource_uri(acc, old_resource_uri, new_resource_uri) + when is_binary(old_resource_uri) and is_binary(new_resource_uri) do + Map.put(acc, URIUtil.base(old_resource_uri), URIUtil.base(new_resource_uri)) + end + + defp put_rebased_resource_uri(acc, _old_resource_uri, _new_resource_uri), do: acc + + defp maybe_put_root_id(document, target_base_uri) + when is_map(document) and is_binary(target_base_uri) do + Map.put(document, "$id", target_base_uri) + end + + defp maybe_put_root_id(document, _target_base_uri), do: document + + defp maybe_put_root_id(document, [], target_base_uri) + when is_map(document) and is_binary(target_base_uri) do + Map.put(document, "$id", target_base_uri) + end + + defp maybe_put_root_id(document, _path, _target_base_uri), do: document + defp effective_base_uri(base_uri, map) do case Map.get(map, "$id") do id when is_binary(id) -> resolve_reference(base_uri, id) @@ -1494,6 +2197,9 @@ defmodule JSONSchex.Ref do uri == "" -> URIUtil.base(base) + base == uri -> + uri + absolute_uri?(uri) -> uri diff --git a/test/ref_test.exs b/test/ref_test.exs index 60448df..92f509c 100644 --- a/test/ref_test.exs +++ b/test/ref_test.exs @@ -52,7 +52,7 @@ defmodule JSONSchex.Test.Ref do assert Ref.external_ref?("schemas/common.json#/$defs/name") end - test "returns node paths, location keys, resource URIs, and indexed walk events" do + test "returns node paths, location keys, target URIs, resource URIs, and indexed walk events" do location = %Location{ raw_ref: "schemas/common.json#/$defs/id", path: ["components", "User", "$ref"], @@ -83,6 +83,7 @@ defmodule JSONSchex.Test.Ref do assert Location.node_path(location) == ["components", "User"] assert Ref.location_key(location) == key + assert Ref.target_uri(resolution) == "specs/schemas/common.json#/$defs/id" assert Ref.resource_uri(location) == "specs/root.json" assert Ref.resource_uri(resolution) == "specs/schemas/common.json" assert Ref.resource_uri(error) == "specs/schemas/common.json" @@ -96,7 +97,92 @@ defmodule JSONSchex.Test.Ref do assert cycles[key] == cycle end - test "renders refs in original, absolute, and prefer_local modes" do + test "reconstructs target URIs from target resource context when absolute_uri is absent" do + resolution = %Resolution{ + location: %Location{raw_ref: "#/schema", path: ["schema", "$ref"]}, + target_source: "https://example.com/root.json", + target_document: %{}, + target_value: %{}, + target_pointer: "#/$defs/name" + } + + root_resolution = %{ + resolution + | target_pointer: nil + } + + assert Ref.target_uri(resolution) == "https://example.com/root.json#/$defs/name" + assert Ref.target_uri(root_resolution) == "https://example.com/root.json" + end + + test "collects reachable external resources keyed by canonical resource URI" do + document = %{ + "$id" => "specs/root.json", + "local" => %{ + "$id" => "schemas/local.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + }, + "start" => %{"$ref" => "schemas/common.json#/schema"} + } + + loader = fn + "specs/schemas/common.json" -> + {:ok, + %{ + document: %{ + "$id" => "specs/schemas/common.json", + "schema" => %{"$ref" => "parts/nested.json#/schema"}, + "parts" => %{ + "nested" => %{ + "$id" => "parts/nested.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + } + } + }, + source: "specs/schemas/common.json" + }} + + _ -> + {:error, :enoent} + end + + assert {:ok, resources} = + Ref.collect_external_resources(document, + source: "specs/root.json", + loader: loader + ) + + assert Map.keys(resources) |> Enum.sort() == [ + "specs/schemas/common.json", + "specs/schemas/parts/nested.json" + ] + + refute Map.has_key?(resources, "specs/root.json") + refute Map.has_key?(resources, "specs/schemas/local.json") + + assert %{document: common_document, source: "specs/schemas/common.json", resolutions: common_resolutions} = + resources["specs/schemas/common.json"] + + assert common_document["schema"] == %{"$ref" => "parts/nested.json#/schema"} + assert Enum.map(common_resolutions, & &1.location.absolute_uri) == ["specs/schemas/common.json#/schema"] + + assert %{document: nested_document, source: "specs/schemas/common.json", resolutions: nested_resolutions} = + resources["specs/schemas/parts/nested.json"] + + assert nested_document["schema"] == %{"$ref" => "#/$defs/name"} + + assert Enum.map(nested_resolutions, & &1.location.absolute_uri) == [ + "specs/schemas/parts/nested.json#/schema" + ] + end + + test "renders refs in original, absolute, prefer_local, and mounted modes" do same_resource_location = %Location{ raw_ref: "#/$defs/name", path: ["schema", "$ref"], @@ -155,6 +241,54 @@ defmodule JSONSchex.Test.Ref do target_document: %{} } + mounted_nested_location = %Location{ + raw_ref: "common.json#Pet", + path: ["schema", "$ref"], + source: "https://example.com/root.json", + base_uri: "https://example.com/schemas/nested/user.json", + absolute_uri: "https://example.com/schemas/common.json#Pet" + } + + mounted_nested_resolution = %Resolution{ + location: mounted_nested_location, + target_source: "https://example.com/root.json", + target_document: %{"$anchor" => "Pet"}, + target_value: %{"$anchor" => "Pet"}, + target_pointer: nil + } + + mounted_absolute_location = %Location{ + raw_ref: "https://example.com/schemas/common.json#Pet", + path: ["schema", "$ref"], + source: "https://cdn.example/user.json", + base_uri: "https://cdn.example/user.json", + absolute_uri: "https://example.com/schemas/common.json#Pet" + } + + mounted_absolute_resolution = %Resolution{ + location: mounted_absolute_location, + target_source: "https://example.com/root.json", + target_document: %{"$anchor" => "Pet"}, + target_value: %{"$anchor" => "Pet"}, + target_pointer: nil + } + + mounted_path_like_location = %Location{ + raw_ref: "common.json#/$defs/id", + path: ["schema", "$ref"], + source: "specs/source/root.json", + base_uri: "specs/source/schemas/nested/user.json", + absolute_uri: "specs/source/schemas/common.json#/$defs/id" + } + + mounted_path_like_resolution = %Resolution{ + location: mounted_path_like_location, + target_source: "specs/source/schemas/common.json", + target_document: %{"$defs" => %{"id" => %{"type" => "integer"}}}, + target_value: %{"type" => "integer"}, + target_pointer: "#/$defs/id" + } + assert Ref.render_ref(same_resource_location, same_resource_resolution, mode: :original) == "#/$defs/name" @@ -168,6 +302,58 @@ defmodule JSONSchex.Test.Ref do "schemas/common.json#/$defs/id" assert Ref.render_ref(root_location, root_resolution) == "#" + + assert Ref.render_ref(same_resource_location, same_resource_resolution, + mode: :mounted, + mount_base_uri: "https://bundle.example/root.json" + ) == "#/$defs/name" + + assert Ref.render_ref(anchor_location, anchor_resolution, + mode: :mounted, + mount_base_uri: "https://bundle.example/root.json" + ) == "#name" + + assert Ref.render_ref(root_location, root_resolution, + mode: :mounted, + mount_base_uri: "https://bundle.example/root.json" + ) == "#" + + + + assert Ref.render_ref(cross_resource_location, cross_resource_resolution, + mode: :mounted, + mount_base_uri: "specs/bundle/user.json", + resource_uri_map: %{ + "specs/schemas/common.json" => "specs/bundle/common.json", + "specs/root.json" => "specs/bundle/user.json" + } + ) == "common.json#/$defs/id" + + assert Ref.render_ref(mounted_nested_location, mounted_nested_resolution, + mode: :mounted, + mount_base_uri: "https://bundle.example/schemas/nested/user.json", + resource_uri_map: %{ + "https://example.com/schemas/common.json" => "https://bundle.example/schemas/common.json" + } + ) == "../common.json#Pet" + + assert Ref.render_ref(mounted_absolute_location, mounted_absolute_resolution, + mode: :mounted, + mount_base_uri: "https://cdn.example/user.json", + resource_uri_map: %{ + "https://example.com/schemas/common.json" => "https://bundle.example/schemas/common.json" + } + ) == "https://bundle.example/schemas/common.json#Pet" + + assert Ref.render_ref(mounted_path_like_location, mounted_path_like_resolution, + mode: :mounted, + mount_base_uri: "specs/bundle/schemas/nested/user.json", + resource_uri_map: %{ + "specs/source/schemas/common.json" => "specs/bundle/schemas/common.json" + } + ) == "../common.json#/$defs/id" + + end end @@ -421,6 +607,761 @@ defmodule JSONSchex.Test.Ref do end end + describe "bundle/3" do + test "returns rebased root, collected resources, rebased resources, and walk indexes" do + document = %{ + "$id" => "specs/root.json", + "$defs" => %{ + "root_name" => %{"type" => "string"} + }, + "start" => %{"$ref" => "schemas/common.json#/schema"} + } + + loader = fn + "specs/schemas/common.json" -> + {:ok, + %{ + document: %{ + "$id" => "specs/schemas/common.json", + "root_link" => %{"$ref" => "../root.json#/$defs/root_name"}, + "schema" => %{"$ref" => "parts/nested.json#/schema"}, + "parts" => %{ + "nested" => %{ + "$id" => "parts/nested.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + } + } + }, + source: "specs/schemas/common.json" + }} + + _ -> + {:error, :enoent} + end + + assert {:ok, bundle} = + Ref.bundle(document, "specs/bundle/root.json", + source: "specs/root.json", + loader: loader, + resource_uri_map: %{ + "specs/schemas/common.json" => "specs/bundle/common.json", + "specs/schemas/parts/nested.json" => "specs/bundle/parts/nested.json" + } + ) + + assert bundle.root_document["$id"] == "specs/bundle/root.json" + assert bundle.root_document["start"]["$ref"] == "common.json#/schema" + + assert Map.keys(bundle.resources_by_uri) |> Enum.sort() == [ + "specs/schemas/common.json", + "specs/schemas/parts/nested.json" + ] + + assert bundle.rebased_resources_by_uri["specs/schemas/common.json"]["$id"] == + "specs/bundle/common.json" + + assert bundle.rebased_resources_by_uri["specs/schemas/common.json"]["schema"] == + %{"$ref" => "parts/nested.json#/schema"} + + assert bundle.rebased_resources_by_uri["specs/schemas/common.json"]["root_link"] == + %{"$ref" => "root.json#/$defs/root_name"} + + assert bundle.rebased_resources_by_uri["specs/schemas/parts/nested.json"]["$id"] == + "specs/bundle/parts/nested.json" + + assert bundle.rebased_resources_by_uri["specs/schemas/parts/nested.json"]["schema"] == + %{"$ref" => "#/$defs/name"} + + assert bundle.resource_uri_map["specs/root.json"] == "specs/bundle/root.json" + assert bundle.resource_uri_map["specs/schemas/common.json"] == "specs/bundle/common.json" + assert bundle.resource_uri_map["specs/schemas/parts/nested.json"] == + "specs/bundle/parts/nested.json" + + assert is_list(bundle.walk_events) + assert bundle.walk_index.resolutions != %{} + assert bundle.location_index == bundle.walk_index + + assert %{document: common_document, rebased_document: rebased_common_document, source: "specs/schemas/common.json", resolutions: common_resolutions, rebased_resource_uri: "specs/bundle/common.json"} = + bundle.resource_index["specs/schemas/common.json"] + + assert common_document == bundle.resources_by_uri["specs/schemas/common.json"].document + assert rebased_common_document == bundle.rebased_resources_by_uri["specs/schemas/common.json"] + assert Enum.map(common_resolutions, & &1.location.absolute_uri) == ["specs/schemas/common.json#/schema"] + + assert %{rebased_resource_uri: "specs/bundle/parts/nested.json"} = + bundle.resource_index["specs/schemas/parts/nested.json"] + + start_key = + {"specs/root.json", "specs/root.json", ["start", "$ref"], + "specs/schemas/common.json#/schema"} + + assert %Resolution{} = bundle.walk_index.resolutions[start_key] + end + + test "preserves resource resolution order and returns walk errors alongside bundle state" do + document = %{ + "$id" => "specs/root.json", + "first" => %{"$ref" => "schemas/common.json#/schema"}, + "second" => %{"$ref" => "schemas/common.json#/schema"}, + "missing" => %{"$ref" => "schemas/missing.json#/schema"} + } + + loader = fn + "specs/schemas/common.json" -> + {:ok, + %{ + document: %{ + "$id" => "specs/schemas/common.json", + "schema" => %{"type" => "string"} + }, + source: "specs/schemas/common.json" + }} + + _ -> + {:error, :enoent} + end + + assert {:ok, bundle} = + Ref.bundle(document, "specs/bundle/root.json", + source: "specs/root.json", + loader: loader, + resource_uri_map: %{ + "specs/schemas/common.json" => "specs/bundle/common.json" + } + ) + + assert Enum.any?(bundle.walk_events, &match?(%Error{kind: :missing_document}, &1)) + + assert Enum.map(bundle.resources_by_uri["specs/schemas/common.json"].resolutions, & &1.location.path) == [ + ["first", "$ref"], + ["second", "$ref"] + ] + + assert Enum.map(bundle.resource_index["specs/schemas/common.json"].resolutions, & &1.location.path) == [ + ["first", "$ref"], + ["second", "$ref"] + ] + + refute Map.has_key?(bundle.resources_by_uri, "specs/schemas/missing.json") + refute Map.has_key?(bundle.resource_index, "specs/schemas/missing.json") + end + end + + describe "rebase/3" do + test "injects a new root $id and preserves same-resource local refs" do + document = %{ + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + } + + assert {:ok, rebased} = + Ref.rebase(document, "https://bundle.example/schemas/user.json", + base_uri: "https://example.com/schemas/user.json" + ) + + assert rebased["$id"] == "https://bundle.example/schemas/user.json" + assert rebased["schema"]["$ref"] == "#/$defs/name" + + [location] = Ref.scan(rebased, base_uri: "https://bundle.example/schemas/user.json") + assert {:ok, resolution} = + Ref.resolve(rebased, location, + base_uri: "https://bundle.example/schemas/user.json" + ) + + assert resolution.target_value == %{"type" => "string"} + end + + test "rewrites relative external refs against the new base when preserving original targets" do + document = %{ + "schema" => %{"$ref" => "common.json#/$defs/id"} + } + + assert {:ok, rebased} = + Ref.rebase(document, "specs/bundle/user.json", + base_uri: "specs/source/user.json" + ) + + assert rebased["$id"] == "specs/bundle/user.json" + assert rebased["schema"]["$ref"] == "../source/common.json#/$defs/id" + end + + test "rebases nested relative $id resources under the new root" do + document = %{ + "$id" => "https://example.com/root.json", + "child" => %{ + "$id" => "schemas/user.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["child"]["$id"] == "schemas/user.json" + assert rebased["child"]["schema"]["$ref"] == "#/$defs/name" + + [location] = Ref.scan(rebased, base_uri: "https://bundle.example/root.json") + + assert {:ok, resolution} = + Ref.resolve(rebased, location, base_uri: "https://bundle.example/root.json") + + assert resolution.location.base_uri == "https://bundle.example/schemas/user.json" + assert resolution.target_document == rebased["child"] + assert resolution.target_value == %{"type" => "string"} + end + + test "preserves same-resource root refs when rebasing" do + document = %{ + "self" => %{"$ref" => "#"} + } + + assert {:ok, rebased} = + Ref.rebase(document, "https://bundle.example/root.json", + base_uri: "https://example.com/root.json" + ) + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["self"]["$ref"] == "#" + end + + test "preserves same-resource anchor refs when rebasing" do + document = %{ + "$anchor" => "Pet", + "self" => %{"$ref" => "#Pet"} + } + + assert {:ok, rebased} = + Ref.rebase(document, "https://bundle.example/root.json", + base_uri: "https://example.com/root.json" + ) + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["$anchor"] == "Pet" + assert rebased["self"]["$ref"] == "#Pet" + end + + test "rewrites internal cross-resource refs to rebased sibling resources" do + document = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "user" => %{ + "$id" => "schemas/user.json", + "$defs" => %{ + "name" => %{"type" => "string"} + } + } + }, + "schema" => %{"$ref" => "https://example.com/schemas/user.json#/$defs/name"} + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["$defs"]["user"]["$id"] == "schemas/user.json" + assert rebased["schema"]["$ref"] == "schemas/user.json#/$defs/name" + end + + test "rewrites nested child refs that point back to the rebased root resource" do + document = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "child" => %{ + "$id" => "schemas/user.json", + "schema" => %{"$ref" => "https://example.com/root.json#/$defs/name"} + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["child"]["$id"] == "schemas/user.json" + assert rebased["child"]["schema"]["$ref"] == "../root.json#/$defs/name" + end + + test "rewrites path-like nested child refs that point back to the rebased root resource" do + document = %{ + "$id" => "specs/source/root.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "child" => %{ + "$id" => "schemas/user.json", + "schema" => %{"$ref" => "specs/source/root.json#/$defs/name"} + } + } + + assert {:ok, rebased} = Ref.rebase(document, "specs/bundle/root.json") + + assert rebased["$id"] == "specs/bundle/root.json" + assert rebased["child"]["$id"] == "schemas/user.json" + assert rebased["child"]["schema"]["$ref"] == "../root.json#/$defs/name" + end + + test "preserves absolute nested resource identities while rebasing the root" do + document = %{ + "$id" => "https://example.com/root.json", + "child" => %{ + "$id" => "https://cdn.example/user.json", + "$anchor" => "Pet", + "schema" => %{"$ref" => "#Pet"} + }, + "link" => %{"$ref" => "https://cdn.example/user.json#Pet"} + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["child"]["$id"] == "https://cdn.example/user.json" + assert rebased["child"]["schema"]["$ref"] == "#Pet" + assert rebased["link"]["$ref"] == "https://cdn.example/user.json#Pet" + end + + test "rewrites nested sibling resource refs with anchors under the rebased root" do + document = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "common" => %{ + "$id" => "schemas/common.json", + "$anchor" => "Pet" + }, + "user" => %{ + "$id" => "schemas/user.json", + "schema" => %{"$ref" => "common.json#Pet"} + } + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["$defs"]["common"]["$id"] == "schemas/common.json" + assert rebased["$defs"]["user"]["$id"] == "schemas/user.json" + assert rebased["$defs"]["user"]["schema"]["$ref"] == "common.json#Pet" + end + + test "preserves recursive local-file child resources after rebasing" do + document = %{ + "$id" => "specs/source/root.json", + "$defs" => %{ + "node_resource" => %{ + "$id" => "schemas/node.json", + "$defs" => %{ + "node" => %{ + "type" => "object", + "properties" => %{ + "next" => %{"$ref" => "#/$defs/node"} + } + } + }, + "schema" => %{"$ref" => "#/$defs/node"} + } + }, + "schema" => %{"$ref" => "schemas/node.json#/schema"} + } + + assert {:ok, rebased} = Ref.rebase(document, "specs/bundle/root.json") + + assert rebased["$id"] == "specs/bundle/root.json" + assert rebased["$defs"]["node_resource"]["$id"] == "schemas/node.json" + assert rebased["$defs"]["node_resource"]["schema"]["$ref"] == "#/$defs/node" + assert rebased["$defs"]["node_resource"]["$defs"]["node"]["properties"]["next"]["$ref"] == + "#/$defs/node" + assert rebased["schema"]["$ref"] == "schemas/node.json#/schema" + + locations = Ref.scan(rebased, base_uri: "specs/bundle/root.json") + + next_location = + Enum.find(locations, fn location -> + location.path == ["$defs", "node_resource", "$defs", "node", "properties", "next", "$ref"] + end) + + schema_location = + Enum.find(locations, fn location -> + location.path == ["schema", "$ref"] + end) + + assert %Location{} = next_location + assert %Location{} = schema_location + assert next_location.absolute_uri == "specs/bundle/schemas/node.json#/$defs/node" + assert schema_location.absolute_uri == "specs/bundle/schemas/node.json#/schema" + + assert {:ok, next_resolution} = + Ref.resolve(rebased, next_location, base_uri: "specs/bundle/root.json") + + assert next_resolution.target_document == rebased["$defs"]["node_resource"] + assert next_resolution.target_value["type"] == "object" + + assert {:ok, schema_resolution} = + Ref.resolve(rebased, schema_location, base_uri: "specs/bundle/root.json") + + assert schema_resolution.target_document == rebased["$defs"]["node_resource"] + assert schema_resolution.target_value == %{"$ref" => "#/$defs/node"} + end + + test "preserves nested child same-resource root and anchor refs after rebasing the root" do + document = %{ + "$id" => "https://example.com/root.json", + "child" => %{ + "$id" => "schemas/user.json", + "$anchor" => "Pet", + "root_ref" => %{"$ref" => "#"}, + "anchor_ref" => %{"$ref" => "#Pet"} + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["child"]["$id"] == "schemas/user.json" + assert rebased["child"]["$anchor"] == "Pet" + assert rebased["child"]["root_ref"]["$ref"] == "#" + assert rebased["child"]["anchor_ref"]["$ref"] == "#Pet" + end + + test "rewrites path-like sibling refs between nested resources under the rebased root" do + document = %{ + "$id" => "specs/source/root.json", + "$defs" => %{ + "common" => %{ + "$id" => "schemas/common.json", + "$anchor" => "Pet" + }, + "user" => %{ + "$id" => "schemas/user.json", + "schema" => %{"$ref" => "common.json#Pet"} + } + } + } + + assert {:ok, rebased} = Ref.rebase(document, "specs/bundle/root.json") + + assert rebased["$id"] == "specs/bundle/root.json" + assert rebased["$defs"]["common"]["$id"] == "schemas/common.json" + assert rebased["$defs"]["user"]["$id"] == "schemas/user.json" + assert rebased["$defs"]["user"]["schema"]["$ref"] == "common.json#Pet" + end + + test "uses resource_uri_map to retarget path-like external anchor refs to rebased companion resources" do + document = %{ + "$id" => "specs/source/user.json", + "schema" => %{"$ref" => "common.json#PetSummary"} + } + + assert {:ok, rebased} = + Ref.rebase(document, "specs/bundle/user.json", + resource_uri_map: %{ + "specs/source/common.json" => "specs/bundle/common.json" + } + ) + + assert rebased["$id"] == "specs/bundle/user.json" + assert rebased["schema"]["$ref"] == "common.json#PetSummary" + end + + test "rebases multi-level relative $id chains and preserves descendant refs" do + document = %{ + "$id" => "https://example.com/root.json", + "tree" => %{ + "$id" => "schemas/", + "user" => %{ + "$id" => "user.json", + "$defs" => %{ + "name" => %{"type" => "string"} + }, + "schema" => %{"$ref" => "#/$defs/name"} + }, + "link" => %{"$ref" => "user.json#/$defs/name"} + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["tree"]["$id"] == "schemas/" + assert rebased["tree"]["user"]["$id"] == "user.json" + assert rebased["tree"]["user"]["schema"]["$ref"] == "#/$defs/name" + assert rebased["tree"]["link"]["$ref"] == "user.json#/$defs/name" + + locations = Ref.scan(rebased, base_uri: "https://bundle.example/root.json") + + link_location = + Enum.find(locations, fn location -> + location.path == ["tree", "link", "$ref"] + end) + + assert %Location{} = link_location + assert link_location.absolute_uri == "https://bundle.example/schemas/user.json#/$defs/name" + + assert {:ok, resolution} = + Ref.resolve(rebased, link_location, base_uri: "https://bundle.example/root.json") + + assert resolution.target_document == rebased["tree"]["user"] + assert resolution.target_value == %{"type" => "string"} + end + + test "rewrites anchor refs across rebased parent resource boundaries" do + document = %{ + "$id" => "https://example.com/root.json", + "package" => %{ + "$id" => "schemas/", + "common" => %{ + "$id" => "common.json", + "$anchor" => "Pet", + "type" => "string" + }, + "user" => %{ + "$id" => "user.json", + "schema" => %{"$ref" => "common.json#Pet"} + } + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["package"]["$id"] == "schemas/" + assert rebased["package"]["common"]["$id"] == "common.json" + assert rebased["package"]["user"]["$id"] == "user.json" + assert rebased["package"]["user"]["schema"]["$ref"] == "common.json#Pet" + + [location] = + Ref.scan(rebased, base_uri: "https://bundle.example/root.json") + |> Enum.filter(fn location -> location.path == ["package", "user", "schema", "$ref"] end) + + assert {:ok, resolution} = + Ref.resolve(rebased, location, base_uri: "https://bundle.example/root.json") + + assert resolution.target_document == rebased["package"]["common"] + assert resolution.target_value == rebased["package"]["common"] + end + + test "preserves mixed absolute and relative nested resource identities" do + document = %{ + "$id" => "https://example.com/root.json", + "common" => %{ + "$id" => "schemas/common.json", + "$anchor" => "Pet" + }, + "vendor" => %{ + "$id" => "https://cdn.example/vendor.json", + "$anchor" => "Vendor" + }, + "user" => %{ + "$id" => "schemas/user.json", + "common_ref" => %{"$ref" => "common.json#Pet"}, + "vendor_ref" => %{"$ref" => "https://cdn.example/vendor.json#Vendor"} + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["common"]["$id"] == "schemas/common.json" + assert rebased["vendor"]["$id"] == "https://cdn.example/vendor.json" + assert rebased["user"]["$id"] == "schemas/user.json" + assert rebased["user"]["common_ref"]["$ref"] == "common.json#Pet" + assert rebased["user"]["vendor_ref"]["$ref"] == "https://cdn.example/vendor.json#Vendor" + end + + test "rewrites sibling root-resource refs without preserving an empty fragment" do + document = %{ + "$id" => "https://example.com/root.json", + "$defs" => %{ + "common" => %{ + "$id" => "schemas/common.json", + "type" => "object" + }, + "user" => %{ + "$id" => "schemas/user.json", + "schema" => %{"$ref" => "common.json"} + } + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["$defs"]["user"]["schema"]["$ref"] == "common.json" + + [location] = + Ref.scan(rebased, base_uri: "https://bundle.example/root.json") + |> Enum.filter(fn location -> location.path == ["$defs", "user", "schema", "$ref"] end) + + assert {:ok, resolution} = + Ref.resolve(rebased, location, base_uri: "https://bundle.example/root.json") + + assert resolution.target_document == rebased["$defs"]["common"] + assert resolution.target_value == rebased["$defs"]["common"] + assert is_nil(resolution.target_pointer) + end + + test "preserves nested path-like directory refs that use dot segments" do + document = %{ + "$id" => "specs/source/root.json", + "package" => %{ + "$id" => "schemas/", + "common" => %{ + "$id" => "./common.json", + "$defs" => %{ + "id" => %{"type" => "integer"} + } + }, + "user" => %{ + "$id" => "nested/user.json", + "schema" => %{"$ref" => "../common.json#/$defs/id"} + } + } + } + + assert {:ok, rebased} = Ref.rebase(document, "specs/bundle/root.json") + + assert rebased["$id"] == "specs/bundle/root.json" + assert rebased["package"]["$id"] == "schemas/" + assert rebased["package"]["common"]["$id"] == "./common.json" + assert rebased["package"]["user"]["$id"] == "nested/user.json" + assert rebased["package"]["user"]["schema"]["$ref"] == "../common.json#/$defs/id" + + [location] = + Ref.scan(rebased, base_uri: "specs/bundle/root.json") + |> Enum.filter(fn location -> location.path == ["package", "user", "schema", "$ref"] end) + + assert {:ok, resolution} = + Ref.resolve(rebased, location, base_uri: "specs/bundle/root.json") + + assert resolution.target_document == rebased["package"]["common"] + assert resolution.target_value == %{"type" => "integer"} + end + + test "rewrites refs from absolute nested resources to rebased relative resources" do + document = %{ + "$id" => "https://example.com/root.json", + "common" => %{ + "$id" => "schemas/common.json", + "$anchor" => "Pet" + }, + "vendor_user" => %{ + "$id" => "https://cdn.example/user.json", + "schema" => %{"$ref" => "https://example.com/schemas/common.json#Pet"} + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["common"]["$id"] == "schemas/common.json" + assert rebased["vendor_user"]["$id"] == "https://cdn.example/user.json" + assert rebased["vendor_user"]["schema"]["$ref"] == + "https://bundle.example/schemas/common.json#Pet" + end + + test "rewrites multi-level nested sibling anchor refs through rebased parents" do + document = %{ + "$id" => "https://example.com/root.json", + "package" => %{ + "$id" => "schemas/", + "common" => %{ + "$id" => "common.json", + "$anchor" => "Pet" + }, + "nested" => %{ + "$id" => "nested/", + "user" => %{ + "$id" => "user.json", + "schema" => %{"$ref" => "../common.json#Pet"} + } + } + } + } + + assert {:ok, rebased} = Ref.rebase(document, "https://bundle.example/root.json") + + assert rebased["$id"] == "https://bundle.example/root.json" + assert rebased["package"]["nested"]["user"]["schema"]["$ref"] == "../common.json#Pet" + + [location] = + Ref.scan(rebased, base_uri: "https://bundle.example/root.json") + |> Enum.filter(fn location -> + location.path == ["package", "nested", "user", "schema", "$ref"] + end) + + assert {:ok, resolution} = + Ref.resolve(rebased, location, base_uri: "https://bundle.example/root.json") + + assert resolution.target_document == rebased["package"]["common"] + assert resolution.target_value == rebased["package"]["common"] + end + + test "preserves path-like absolute-path refs across rebasing" do + document = %{ + "$id" => "specs/source/root.json", + "schema" => %{"$ref" => "/shared/common.json#Pet"} + } + + assert {:ok, rebased} = Ref.rebase(document, "specs/bundle/root.json") + + assert rebased["$id"] == "specs/bundle/root.json" + assert rebased["schema"]["$ref"] == "/shared/common.json#Pet" + end + + test "rewrites mixed nested path-like sibling refs that use dot segments" do + document = %{ + "$id" => "specs/source/root.json", + "package" => %{ + "$id" => "schemas/", + "common" => %{ + "$id" => "nested/common.json", + "$defs" => %{ + "id" => %{"type" => "integer"} + } + }, + "nested" => %{ + "$id" => "nested/", + "user" => %{ + "$id" => "user.json", + "schema" => %{"$ref" => "common.json#/$defs/id"} + } + } + } + } + + assert {:ok, rebased} = Ref.rebase(document, "specs/bundle/root.json") + + assert rebased["$id"] == "specs/bundle/root.json" + assert rebased["package"]["common"]["$id"] == "nested/common.json" + assert rebased["package"]["nested"]["$id"] == "nested/" + assert rebased["package"]["nested"]["user"]["$id"] == "user.json" + assert rebased["package"]["nested"]["user"]["schema"]["$ref"] == + "../source/common.json#/$defs/id" + end + + test "uses resource_uri_map to retarget external anchor refs to rebased companion resources" do + document = %{ + "$id" => "https://example.com/user.json", + "schema" => %{"$ref" => "common.json#PetSummary"} + } + + assert {:ok, rebased} = + Ref.rebase(document, "https://bundle.example/user.json", + resource_uri_map: %{ + "https://example.com/common.json" => "https://bundle.example/common.json" + } + ) + + assert rebased["$id"] == "https://bundle.example/user.json" + assert rebased["schema"]["$ref"] == "common.json#PetSummary" + end + end + describe "walk/2" do test "walks refs transitively across nested resources in depth-first order" do document = %{