From 64b9118af882486489d794193cc2dbbece57328b Mon Sep 17 00:00:00 2001 From: jskoiz <20649937+jskoiz@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:16:08 -1000 Subject: [PATCH 1/3] Add serde-saphyr benchmark event path --- Cargo.lock | 149 ++ Cargo.toml | 1 + docs/BENCHMARKS.md | 189 +- docs/EVENT_BACKED_SERDE.md | 112 + examples/README.md | 8 +- examples/dhat_memory.rs | 84 +- examples/large_input_benchmark.rs | 150 ++ examples/real_world_benchmark.rs | 150 ++ src/de.rs | 11 +- src/event_de.rs | 3445 +++++++++++++++++++++++++++++ src/lib.rs | 41 + src/parse.rs | 9 +- 12 files changed, 4262 insertions(+), 87 deletions(-) create mode 100644 docs/EVENT_BACKED_SERDE.md create mode 100644 src/event_de.rs diff --git a/Cargo.lock b/Cargo.lock index e35fe4a..82cd4ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,36 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "annotate-snippets" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f211a51805bc641f3ad5b7664c77d2547af685cc33b4cd8d31964027a46f13f1" +dependencies = [ + "anstyle", + "memchr", + "unicode-width", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + [[package]] name = "anyhow" version = "1.0.102" @@ -50,6 +80,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bit-set" version = "0.8.0" @@ -80,6 +116,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + [[package]] name = "cfg-if" version = "1.0.4" @@ -140,6 +182,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "encoding_rs_io" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" +dependencies = [ + "encoding_rs", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -197,9 +248,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi 5.3.0", "wasip2", + "wasm-bindgen", ] [[package]] @@ -221,6 +274,16 @@ version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +[[package]] +name = "granit-parser" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f50ba32164f9e098d5da618776a32afbb32270adcbe3d3d006107dae11e37c91" +dependencies = [ + "arraydeque", + "smallvec", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -293,6 +356,17 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "js-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -561,6 +635,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "rusty-fork" version = "0.3.1" @@ -589,6 +669,7 @@ dependencies = [ "saphyr", "saphyr-parser", "serde", + "serde-saphyr", "serde_json", "serde_yaml", "sha2", @@ -641,6 +722,23 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-saphyr" +version = "0.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5897b4c3faadadd35fdb6689f015641f3bc481d5adaaac56231ea15aeb243db3" +dependencies = [ + "ahash", + "annotate-snippets", + "base64", + "encoding_rs_io", + "getrandom 0.3.4", + "granit-parser", + "num-traits", + "serde", + "smallvec", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -802,6 +900,12 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -847,6 +951,51 @@ dependencies = [ "wit-bindgen 0.51.0", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +dependencies = [ + "unicode-ident", +] + [[package]] name = "wasm-encoder" version = "0.244.0" diff --git a/Cargo.toml b/Cargo.toml index 10558dd..26a65e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -55,6 +55,7 @@ proptest = "1.6" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9.34" +serde-saphyr = { version = "0.0.27", default-features = false, features = ["deserialize"] } saphyr = "0.0.6" saphyr-parser = "0.0.6" sha2 = "0.10" diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index af2726c..3f69add 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -7,18 +7,34 @@ crate package ships this document, but it intentionally excludes the dev-dependency examples and fixture corpora used to regenerate the tables. ```sh -cargo run --release --example real_world_benchmark -YAML_BENCH_ITERS=1000 cargo run --release --example real_world_benchmark -cargo run --release --example large_input_benchmark -YAML_LARGE_BENCH_ITERS=20 cargo run --release --example large_input_benchmark +cargo run --locked --release --example real_world_benchmark +YAML_BENCH_ITERS=1000 cargo run --locked --release --example real_world_benchmark +cargo run --locked --release --example large_input_benchmark +YAML_LARGE_BENCH_ITERS=20 cargo run --locked --release --example large_input_benchmark ``` Environment for the latest captured run: -- Reference crates: `yaml-rust2 0.11.0`, `saphyr 0.0.6` +- Reference crates: `serde-saphyr 0.0.27` with `deserialize` only, + `yaml-rust2 0.11.0`, `saphyr 0.0.6` - Small fixture set: 33 files / 39 YAML documents / 25,362 bytes - Large fixture set: pinned downstream fixtures plus generated 1 MiB inputs -- Captured: 2026-06-01 with Cargo's `release` profile +- Captured: 2026-06-06 with Cargo's `release` profile and `--locked` + +The linked `serde-saphyr` repository was ahead of crates.io at the time of this +capture (`0.0.28` in Git, latest published `0.0.27`). The benchmark pins the +published crate so the checked-in `Cargo.lock` and package checks remain +registry-reproducible. + +The `serde-saphyr` rows use benchmark options rather than the crate defaults: +`strict_booleans: true` plus relaxed event, alias, document, node, scalar, and +merge budgets so the generated corpora are comparable throughput inputs. Because +`serde-saphyr` does not expose a native YAML value tree, the matched generic +Serde lane deserializes both libraries into `serde_yaml::Value`. The preflight +normalizes two public-contract differences before asserting equality: +`serde-saphyr::from_multiple_with_options` skips empty/null-like documents, and +serde-saphyr treats YAML tags as transparent for this target while saneyaml +preserves them. The README overview graphic is a static summary of selected benchmark and feature rows. Its source notes and update checklist live at @@ -73,16 +89,25 @@ behavior. ## Real-World Config Corpus -Corpus re-capture after adding CloudFormation/SAM, Symfony, GitLab CI, -CircleCI, Azure Pipelines, and an additional reusable GitHub Actions workflow: +Latest same-run capture after adding the matched `serde-saphyr` lane, using +`YAML_BENCH_ITERS=1000`: | parser/load path | iterations | bytes per iteration | docs per iteration | elapsed ms | ns/byte | |---|---:|---:|---:|---:|---:| -| `saneyaml::parse_documents` | 200 | 25,362 | 39 | 94.316 | 18.59 | -| `saneyaml::from_documents_str::` | 200 | 25,362 | 39 | 127.839 | 25.20 | -| `serde_yaml::Value` stream | 200 | 25,362 | 39 | 135.863 | 26.78 | -| `yaml_rust2::YamlLoader` | 200 | 25,362 | 39 | 108.641 | 21.42 | -| `saphyr::Yaml::load_from_str` | 200 | 25,362 | 39 | 100.915 | 19.89 | +| `saneyaml::parse_documents` | 1,000 | 25,362 | 39 | 431.314 | 17.01 | +| `saneyaml::from_documents_str::` | 1,000 | 25,362 | 39 | 572.477 | 22.57 | +| `saneyaml::from_documents_str::` | 1,000 | 25,362 | 39 | 578.399 | 22.81 | +| `saneyaml::__unstable_event_serde::from_documents_str::` | 1,000 | 25,362 | 39 | 1,165.027 | 45.94 | +| `serde_saphyr::from_multiple_with_options::` | 1,000 | 25,362 | 39 | 1,055.332 | 41.61 | +| `serde_yaml::Value` stream | 1,000 | 25,362 | 39 | 644.638 | 25.42 | +| `yaml_rust2::YamlLoader` | 1,000 | 25,362 | 39 | 539.894 | 21.29 | +| `saphyr::Yaml::load_from_str` | 1,000 | 25,362 | 39 | 502.288 | 19.80 | + +On this corpus, the matched generic Serde value lane measured saneyaml at +22.81 ns/byte versus serde-saphyr at 41.61 ns/byte. The private event-backed +prototype measured 45.94 ns/byte on the same target, so it is not a replacement +for the tree-backed Serde path yet. The raw tree-load rows are shown for context +but are a different contract from serde-saphyr's Serde-only API. Same-turn pre-optimization baseline, captured before this milestone with the default 200 iterations: @@ -106,9 +131,9 @@ Post zero-copy line-slice re-capture with 1,000 iterations (independent run, | `yaml_rust2::YamlLoader` | 1,000 | 19,727 | 33 | 434.222 | 22.01 | | `saphyr::Yaml::load_from_str` | 1,000 | 19,727 | 33 | 402.909 | 20.42 | -Result: after the zero-copy line slice, `saneyaml::parse_documents` is faster than -the pinned reference loaders on this small corpus in the latest 1,000-iteration -same-run capture (18.01 ns/byte vs `saphyr` at 20.42 and `yaml_rust2` at +Result: after the zero-copy line slice, `saneyaml::parse_documents` was faster than +the pinned reference loaders on this small corpus in that 2026-06-01 +1,000-iteration same-run capture (18.01 ns/byte vs `saphyr` at 20.42 and `yaml_rust2` at 22.01). The owning `Value` path also remains ahead of the `serde_yaml` `Value` stream and roughly ties `yaml_rust2` on this corpus. @@ -134,7 +159,7 @@ lower-noise inputs below — not the across-table delta. Command: ```sh -cargo run --release --example large_input_benchmark +cargo run --locked --release --example large_input_benchmark ``` Default iterations: 20, controlled by `YAML_LARGE_BENCH_ITERS`. @@ -145,12 +170,15 @@ Default iterations: 20, controlled by `YAML_LARGE_BENCH_ITERS`. | parser/load path | iterations | bytes per iteration | docs per iteration | elapsed ms | ns/byte | peak retained bytes | peak retained heap objects | |---|---:|---:|---:|---:|---:|---:|---:| -| `saneyaml::parse_documents` | 20 | 245,062 | 20 | 37.887 | 7.73 | 486,188 | 3,983 | -| `saneyaml::parse_borrowed_documents` | 20 | 245,062 | 20 | 39.063 | 7.97 | 173,556 | 904 | -| `saneyaml::from_documents_str::` | 20 | 245,062 | 20 | 42.226 | 8.62 | 217,483 | 3,780 | -| `serde_yaml::Value` stream | 20 | 245,062 | 20 | 54.939 | 11.21 | 396,987 | 3,780 | -| `yaml_rust2::YamlLoader` | 20 | 245,062 | 20 | 40.489 | 8.26 | 382,497 | 3,796 | -| `saphyr::Yaml::load_from_str` | 20 | 245,062 | 20 | 38.919 | 7.94 | 534,786 | 3,780 | +| `saneyaml::parse_documents` | 20 | 245,062 | 20 | 31.887 | 6.51 | 486,188 | 3,983 | +| `saneyaml::parse_borrowed_documents` | 20 | 245,062 | 20 | 30.256 | 6.17 | 173,556 | 904 | +| `saneyaml::from_documents_str::` | 20 | 245,062 | 20 | 40.035 | 8.17 | 217,483 | 3,780 | +| `saneyaml::from_documents_str::` | 20 | 245,062 | 20 | 41.333 | 8.43 | 378,843 | 3,780 | +| `saneyaml::__unstable_event_serde::from_documents_str::` | 20 | 245,062 | 20 | 77.032 | 15.72 | 396,987 | 3,780 | +| `serde_saphyr::from_multiple_with_options::` | 20 | 245,062 | 20 | 67.763 | 13.83 | 396,987 | 3,780 | +| `serde_yaml::Value` stream | 20 | 245,062 | 20 | 51.019 | 10.41 | 396,987 | 3,780 | +| `yaml_rust2::YamlLoader` | 20 | 245,062 | 20 | 38.470 | 7.85 | 382,497 | 3,796 | +| `saphyr::Yaml::load_from_str` | 20 | 245,062 | 20 | 36.419 | 7.43 | 534,786 | 3,780 | ### stackable_dummy_cluster @@ -158,12 +186,15 @@ One pinned Stackable CRD / 177,556 bytes / 1 YAML document. | parser/load path | iterations | bytes per iteration | docs per iteration | elapsed ms | ns/byte | peak retained bytes | peak retained heap objects | |---|---:|---:|---:|---:|---:|---:|---:| -| `saneyaml::parse_documents` | 20 | 177,556 | 1 | 23.209 | 6.54 | 486,188 | 3,983 | -| `saneyaml::parse_borrowed_documents` | 20 | 177,556 | 1 | 24.466 | 6.89 | 173,556 | 904 | -| `saneyaml::from_documents_str::` | 20 | 177,556 | 1 | 25.708 | 7.24 | 217,483 | 3,780 | -| `serde_yaml::Value` stream | 20 | 177,556 | 1 | 34.069 | 9.59 | 396,987 | 3,780 | -| `yaml_rust2::YamlLoader` | 20 | 177,556 | 1 | 25.730 | 7.25 | 382,497 | 3,796 | -| `saphyr::Yaml::load_from_str` | 20 | 177,556 | 1 | 24.722 | 6.96 | 534,786 | 3,780 | +| `saneyaml::parse_documents` | 20 | 177,556 | 1 | 20.570 | 5.79 | 486,188 | 3,983 | +| `saneyaml::parse_borrowed_documents` | 20 | 177,556 | 1 | 19.159 | 5.40 | 173,556 | 904 | +| `saneyaml::from_documents_str::` | 20 | 177,556 | 1 | 24.524 | 6.91 | 217,483 | 3,780 | +| `saneyaml::from_documents_str::` | 20 | 177,556 | 1 | 25.203 | 7.10 | 378,843 | 3,780 | +| `saneyaml::__unstable_event_serde::from_documents_str::` | 20 | 177,556 | 1 | 47.657 | 13.42 | 396,987 | 3,780 | +| `serde_saphyr::from_multiple_with_options::` | 20 | 177,556 | 1 | 41.445 | 11.67 | 396,987 | 3,780 | +| `serde_yaml::Value` stream | 20 | 177,556 | 1 | 32.343 | 9.11 | 396,987 | 3,780 | +| `yaml_rust2::YamlLoader` | 20 | 177,556 | 1 | 24.245 | 6.83 | 382,497 | 3,796 | +| `saphyr::Yaml::load_from_str` | 20 | 177,556 | 1 | 23.095 | 6.50 | 534,786 | 3,780 | ### generated_multi_doc_stream_1mib @@ -172,12 +203,15 @@ documents. | parser/load path | iterations | bytes per iteration | docs per iteration | elapsed ms | ns/byte | peak retained bytes | peak retained heap objects | |---|---:|---:|---:|---:|---:|---:|---:| -| `saneyaml::parse_documents` | 20 | 1,048,680 | 8,020 | 472.805 | 22.54 | 13,006,500 | 128,321 | -| `saneyaml::parse_borrowed_documents` | 20 | 1,048,680 | 8,020 | 500.419 | 23.86 | 4,106,240 | 32,081 | -| `saneyaml::from_documents_str::` | 20 | 1,048,680 | 8,020 | 542.758 | 25.88 | 4,729,860 | 112,281 | -| `serde_yaml::Value` stream | 20 | 1,048,680 | 8,020 | 681.650 | 32.50 | 11,607,364 | 112,281 | -| `yaml_rust2::YamlLoader` | 20 | 1,048,680 | 8,020 | 538.442 | 25.67 | 10,386,948 | 112,281 | -| `saphyr::Yaml::load_from_str` | 20 | 1,048,680 | 8,020 | 518.104 | 24.70 | 14,770,560 | 112,281 | +| `saneyaml::parse_documents` | 20 | 1,048,680 | 8,020 | 367.321 | 17.51 | 13,006,500 | 128,321 | +| `saneyaml::parse_borrowed_documents` | 20 | 1,048,680 | 8,020 | 361.229 | 17.22 | 4,106,240 | 32,081 | +| `saneyaml::from_documents_str::` | 20 | 1,048,680 | 8,020 | 495.592 | 23.63 | 4,729,860 | 112,281 | +| `saneyaml::from_documents_str::` | 20 | 1,048,680 | 8,020 | 538.155 | 25.66 | 9,862,660 | 112,281 | +| `saneyaml::__unstable_event_serde::from_documents_str::` | 20 | 1,048,680 | 8,020 | 1,116.471 | 53.23 | 11,607,364 | 112,281 | +| `serde_saphyr::from_multiple_with_options::` | 20 | 1,048,680 | 8,020 | 1,223.597 | 58.34 | 11,607,364 | 112,281 | +| `serde_yaml::Value` stream | 20 | 1,048,680 | 8,020 | 661.464 | 31.54 | 11,607,364 | 112,281 | +| `yaml_rust2::YamlLoader` | 20 | 1,048,680 | 8,020 | 545.253 | 26.00 | 10,386,948 | 112,281 | +| `saphyr::Yaml::load_from_str` | 20 | 1,048,680 | 8,020 | 534.700 | 25.49 | 14,770,560 | 112,281 | ### generated_wide_mapping_256kib @@ -185,12 +219,15 @@ Generated one-document wide service mapping / 262,176 bytes. | parser/load path | iterations | bytes per iteration | docs per iteration | elapsed ms | ns/byte | peak retained bytes | peak retained heap objects | |---|---:|---:|---:|---:|---:|---:|---:| -| `saneyaml::parse_documents` | 20 | 262,176 | 1 | 86.486 | 16.49 | 2,484,775 | 23,932 | -| `saneyaml::parse_borrowed_documents` | 20 | 262,176 | 1 | 89.698 | 17.11 | 765,792 | 2,994 | -| `saneyaml::from_documents_str::` | 20 | 262,176 | 1 | 98.223 | 18.73 | 938,236 | 17,950 | -| `serde_yaml::Value` stream | 20 | 262,176 | 1 | 126.465 | 24.12 | 1,895,692 | 17,950 | -| `yaml_rust2::YamlLoader` | 20 | 262,176 | 1 | 102.841 | 19.61 | 1,704,220 | 17,950 | -| `saphyr::Yaml::load_from_str` | 20 | 262,176 | 1 | 93.165 | 17.77 | 2,393,312 | 17,950 | +| `saneyaml::parse_documents` | 20 | 262,176 | 1 | 78.226 | 14.92 | 2,484,775 | 23,932 | +| `saneyaml::parse_borrowed_documents` | 20 | 262,176 | 1 | 73.898 | 14.09 | 765,792 | 2,994 | +| `saneyaml::from_documents_str::` | 20 | 262,176 | 1 | 107.359 | 20.47 | 938,236 | 17,950 | +| `saneyaml::from_documents_str::` | 20 | 262,176 | 1 | 106.085 | 20.23 | 1,895,476 | 17,950 | +| `saneyaml::__unstable_event_serde::from_documents_str::` | 20 | 262,176 | 1 | 223.815 | 42.68 | 1,895,692 | 17,950 | +| `serde_saphyr::from_multiple_with_options::` | 20 | 262,176 | 1 | 212.831 | 40.59 | 1,895,692 | 17,950 | +| `serde_yaml::Value` stream | 20 | 262,176 | 1 | 114.758 | 21.89 | 1,895,692 | 17,950 | +| `yaml_rust2::YamlLoader` | 20 | 262,176 | 1 | 102.762 | 19.60 | 1,704,220 | 17,950 | +| `saphyr::Yaml::load_from_str` | 20 | 262,176 | 1 | 97.245 | 18.55 | 2,393,312 | 17,950 | ### generated_wide_mapping_1mib @@ -198,17 +235,26 @@ Generated one-document wide service mapping / 1,048,661 bytes. | parser/load path | iterations | bytes per iteration | docs per iteration | elapsed ms | ns/byte | peak retained bytes | peak retained heap objects | |---|---:|---:|---:|---:|---:|---:|---:| -| `saneyaml::parse_documents` | 20 | 1,048,661 | 1 | 355.523 | 16.95 | 9,893,619 | 95,236 | -| `saneyaml::parse_borrowed_documents` | 20 | 1,048,661 | 1 | 374.905 | 17.88 | 3,047,520 | 11,907 | -| `saneyaml::from_documents_str::` | 20 | 1,048,661 | 1 | 417.223 | 19.89 | 3,739,059 | 71,428 | -| `serde_yaml::Value` stream | 20 | 1,048,661 | 1 | 503.740 | 24.02 | 7,548,675 | 71,428 | -| `yaml_rust2::YamlLoader` | 20 | 1,048,661 | 1 | 412.899 | 19.69 | 6,786,771 | 71,428 | -| `saphyr::Yaml::load_from_str` | 20 | 1,048,661 | 1 | 371.689 | 17.72 | 9,523,712 | 71,428 | +| `saneyaml::parse_documents` | 20 | 1,048,661 | 1 | 309.172 | 14.74 | 9,893,619 | 95,236 | +| `saneyaml::parse_borrowed_documents` | 20 | 1,048,661 | 1 | 282.013 | 13.45 | 3,047,520 | 11,907 | +| `saneyaml::from_documents_str::` | 20 | 1,048,661 | 1 | 419.435 | 20.00 | 3,739,059 | 71,428 | +| `saneyaml::from_documents_str::` | 20 | 1,048,661 | 1 | 417.003 | 19.88 | 7,548,459 | 71,428 | +| `saneyaml::__unstable_event_serde::from_documents_str::` | 20 | 1,048,661 | 1 | 878.512 | 41.89 | 7,548,675 | 71,428 | +| `serde_saphyr::from_multiple_with_options::` | 20 | 1,048,661 | 1 | 843.638 | 40.22 | 7,548,675 | 71,428 | +| `serde_yaml::Value` stream | 20 | 1,048,661 | 1 | 477.817 | 22.78 | 7,548,675 | 71,428 | +| `yaml_rust2::YamlLoader` | 20 | 1,048,661 | 1 | 420.142 | 20.03 | 6,786,771 | 71,428 | +| `saphyr::Yaml::load_from_str` | 20 | 1,048,661 | 1 | 401.299 | 19.13 | 9,523,712 | 71,428 | Large-input story: after zero-copy line storage, the no-merge fast path, delayed plain-scalar continuation allocation, and retained vector right-sizing, `saneyaml::parse_documents` beats `yaml_rust2` and `saphyr` on every -large parser path in the latest capture on an unloaded machine. The smallest +large parser path in the latest capture on an unloaded machine. In the matched +Serde value lane, `saneyaml::from_documents_str::` is faster +than `serde_saphyr::from_multiple_with_options::` on every +large-input row. The hidden event-backed Serde prototype only wins against +serde-saphyr on the generated multi-document stream in this capture and remains +slower on the other large rows; it also retains the same `serde_yaml::Value` +output shape, so it does not improve retained output memory yet. The smallest corpus (`external_downstream_all`) is the most contention-sensitive, so its ordering is the first to wobble under load; the larger corpora hold a clearer margin. The retained-memory story is now split @@ -230,9 +276,9 @@ paths, not constant-memory readers. Captured in a single `release` session against the in-repo harnesses: ```sh -YAML_BENCH_ITERS=1000 cargo run --release --example real_world_benchmark -cargo run --release --example dhat_memory -- --all -cargo run --release --example conformance_compare +YAML_BENCH_ITERS=1000 cargo run --locked --release --example real_world_benchmark +cargo run --locked --release --example dhat_memory -- --all +cargo run --locked --release --example conformance_compare ``` ### Real-world config corpus (1,000 iterations) @@ -241,7 +287,7 @@ cargo run --release --example conformance_compare from a separate same-session run of this milestone, not the same measurement as the "Real-World Config Corpus" table above; the corpus is identical but the per-loader ns/byte figures differ run to run (for example `saphyr` reads 21.42 -here versus 19.89 there), which is the run-to-run noise the methodology caveat +here versus 19.80 there), which is the run-to-run noise the methodology caveat describes. | parser/load path | ns/byte | @@ -257,7 +303,8 @@ On this corpus `saneyaml::parse_documents` is the fastest load path; the owning ### Allocator-backed memory (dhat), 1 MiB multi-document stream -8,020 documents. `retained blocks` is the count of live allocations at peak. +8,020 documents. `retained blocks` is the count of live allocations still held +while the parsed output is retained. | path | allocs | bytes allocated | peak | retained blocks | |---|---:|---:|---:|---:| @@ -265,31 +312,44 @@ On this corpus `saneyaml::parse_documents` is the fastest load path; the owning | `saneyaml` stream events | 232,594 | 49.28 MB | 2.11 MB | 6 | | `saneyaml` borrowed | 80,219 | 17.29 MB | 6.21 MB | 32,081 | | `saneyaml` owned | 200,519 | 16.05 MB | 15.12 MB | 128,321 | +| `saneyaml` Value | 449,140 | 25.07 MB | 15.12 MB | 112,281 | | `yaml-rust2` | 585,478 | 29.29 MB | 17.15 MB | 192,481 | +| `saneyaml` as `serde_yaml::Value` | 465,180 | 39.73 MB | 20.79 MB | 136,341 | +| `saneyaml` event-backed as `serde_yaml::Value` | 1,114,806 | 175.38 MB | 22.83 MB | 136,341 | +| `serde-saphyr` as `serde_yaml::Value` | 577,465 | 59.71 MB | 21.79 MB | 136,344 | | `serde_yaml` | 721,821 | 84.73 MB | 21.84 MB | 136,341 | | `saphyr` | 216,559 | 22.77 MB | 22.30 MB | 192,481 | On a multi-document stream the streaming loaders hold a bounded working set (retained blocks stay at 4–6 regardless of stream length) and post the lowest peak; the borrowed batch tree has the lowest peak among the non-streaming -loaders. +loaders. The event-backed Serde prototype is allocation-heavy here because it +still consumes parser-recorded event frames rather than a direct parser-to-Serde +stream. ### Allocator-backed memory (dhat), 1 MiB wide single document | path | peak | retained blocks | |---|---:|---:| +| `serde-saphyr` as `serde_yaml::Value` | 10.73 MB | 83,337 | | `yaml-rust2` | 10.98 MB | 130,951 | | `saphyr` | 14.10 MB | 130,951 | | `saneyaml` borrowed | 15.32 MB | 11,907 | -| `saneyaml` owned | 15.66 MB | 95,236 | +| `saneyaml` owned | 16.16 MB | 95,236 | | `saneyaml` stream docs | 16.16 MB | 4 | +| `saneyaml` Value | 16.39 MB | 71,428 | +| `saneyaml` as `serde_yaml::Value` | 19.91 MB | 83,334 | +| `serde_yaml` | 23.42 MB | 83,334 | | `saneyaml` stream events | 62.22 MB | 6 | +| `saneyaml` event-backed as `serde_yaml::Value` | 78.54 MB | 83,334 | Streaming only helps when there are document boundaries to reclaim at. On a single wide document there is nothing to reclaim mid-parse, so `yaml-rust2` and `saphyr` post lower peaks than saneyaml on this shape, and the event-streaming -path is the worst here because it buffers per-event output for one large -document. Streaming is a multi-document memory win, not a universal one. +path is expensive here because it buffers per-event output for one large +document. The matched `serde-saphyr` value row posts a low wide-document peak, +while the event-backed Serde prototype is the highest peak in this capture. +Streaming is a multi-document memory win, not a universal one. ### Conformance (402 curated cases) @@ -317,13 +377,13 @@ single capture as indicative rather than authoritative. | captured section | checkout-only command | |---|---| -| Real-World Config Corpus | `cargo run --release --example real_world_benchmark` | -| Real-world corpus (1,000 iterations) | `YAML_BENCH_ITERS=1000 cargo run --release --example real_world_benchmark` | -| Large Inputs (all corpora) | `cargo run --release --example large_input_benchmark` | -| Large Inputs (custom iteration count) | `YAML_LARGE_BENCH_ITERS=20 cargo run --release --example large_input_benchmark` | -| Allocator-backed memory (dhat) | `cargo run --release --example dhat_memory -- --all` | -| dhat single (library, corpus) pair | `cargo run --release --example dhat_memory -- saneyaml-borrowed multidoc` | -| Conformance (402 curated cases) | `cargo run --release --example conformance_compare` | +| Real-World Config Corpus | `cargo run --locked --release --example real_world_benchmark` | +| Real-world corpus (1,000 iterations) | `YAML_BENCH_ITERS=1000 cargo run --locked --release --example real_world_benchmark` | +| Large Inputs (all corpora) | `cargo run --locked --release --example large_input_benchmark` | +| Large Inputs (custom iteration count) | `YAML_LARGE_BENCH_ITERS=20 cargo run --locked --release --example large_input_benchmark` | +| Allocator-backed memory (dhat) | `cargo run --locked --release --example dhat_memory -- --all` | +| dhat single (library, corpus) pair | `cargo run --locked --release --example dhat_memory -- saneyaml-borrowed multidoc` | +| Conformance (402 curated cases) | `cargo run --locked --release --example conformance_compare` | Iteration counts default to 200 for `real_world_benchmark` (`YAML_BENCH_ITERS`) and 20 for `large_input_benchmark` (`YAML_LARGE_BENCH_ITERS`). The @@ -338,6 +398,7 @@ dev-dependency versions (see `Cargo.toml`): | crate | version | |---|---| +| `serde-saphyr` | 0.0.27 (`default-features = false`, `deserialize`) | | `serde_yaml` | 0.9.34 | | `saphyr` | 0.0.6 | | `saphyr-parser` | 0.0.6 | diff --git a/docs/EVENT_BACKED_SERDE.md b/docs/EVENT_BACKED_SERDE.md new file mode 100644 index 0000000..85f3b34 --- /dev/null +++ b/docs/EVENT_BACKED_SERDE.md @@ -0,0 +1,112 @@ +# Event-Backed Serde Workpad + +This work tracks a saneyaml-native implementation of the useful ideas exposed by +the serde-saphyr comparison without copying serde-saphyr's architecture or +semantics. + +## Current Shape + +- `from_str`, `from_slice`, and `from_documents_str` parse into spanful `Node` + trees before handing values to Serde. +- `Deserializer::from_str` is document-iterating, but it still owns parsed + `Node` documents internally. +- `DocumentStream` bounds retained parsed documents, but reader-backed Serde + entrypoints still read all input bytes first. +- `EventStream` exposes useful parser events, spans, tags, anchors, and document + boundaries, but it currently records events while the tree parser is already + constructing document nodes. +- The private event-backed Serde prototype now collects one document's event + frame at a time for typed document iteration, but it is not wired into public + entrypoints. A `#[doc(hidden)]` `__unstable_event_serde` wrapper exists only + so source-checkout benchmarks can measure this path. + +## Direction + +The target design is a private event-backed Serde engine that eventually reads +from parser events directly instead of requiring a public `Node` tree first. +That path should serve typed Serde reads and reader-backed document iteration. +Tree, `Value`, spanful, and lossless APIs should remain tree/lossless backed. + +The first vertical slice is intentionally narrow: + +- deserialize ordinary typed structs, sequences, maps, options, scalars, and + `IgnoredAny` from saneyaml events; +- preserve scalar source behavior for borrowed strings when spans allow it; +- retain scalar and collection tags for generic `Value` reads while honoring + explicit YAML core scalar tags (`!!str`, `!!int`, `!!float`, `!!bool`, + `!!null`) for typed reads, including directive-driven YAML 1.1 spellings; +- reject duplicate scalar and complex mapping keys, including alias-expanded + keys, before Serde map targets can overwrite, including mappings reached + through aliases or skipped by `IgnoredAny`; +- replay ordinary acyclic scalar, sequence, and mapping aliases through recorded + event subtrees with the existing alias expansion budget; +- expand default/strict merge keys, merge lists, explicit target overrides, and + YAML 1.1-compatible repeated/literal merge recovery; +- iterate typed document streams from string/slice input without retaining one + full-stream event vector, and from reader input after the existing bounded + read-to-end step; +- reject unsupported event features instead of silently accepting weaker + semantics. + +The current prototype lives in `src/event_de.rs` as a crate-private compiled +module. It proves the Serde visitor shape, per-document event-frame iteration, +reader-backed owned iteration after bounded input buffering, document-indexed +errors, borrowed-string behavior, duplicate-key rejection, tagged scalar and +collection projection, merge-key expansion/recovery, and +scalar/sequence/mapping alias replay for values and duplicate-key identity +without changing public entrypoints. Generic tagged collections and merge maps +currently use a temporary prepared `Node` handoff after event preflight; this +preserves semantics but is not the final live-event performance shape. + +Current benchmark evidence keeps the path private: on the 2026-06-06 capture in +`docs/BENCHMARKS.md`, the hidden event-backed `serde_yaml::Value` lane is slower +than tree-backed saneyaml on every measured corpus. It only beats serde-saphyr +on the generated 1 MiB multi-document stream, and dhat shows higher allocation +traffic because the prototype still consumes parser-recorded event frames. + +## Required Semantic Gates + +The event-backed path must not become public or replace existing entrypoints +until it preserves these saneyaml contracts: + +- YAML 1.1 merge recovery edge cases continue to match current semantic loading; +- aliases replay acyclic values under the same expansion and depth budgets; +- recursive or expansive aliases still fail before resource exhaustion; +- collection tags and tagged enum projection retain current typed Serde behavior; +- YAML 1.2 remains the default schema, with existing opt-in YAML 1.1 modes; +- errors retain document indexes, paths, spans, and related diagnostics. + +## Benchmark Gates + +Compare the new path against the current tree-backed Serde path and the pinned +`serde-saphyr` benchmark lane using the existing real-world, large-input, and +dhat examples. The saneyaml-native event path must parse equivalent input under +equivalent safety work; document skipping, tag stripping, duplicate bypassing, +or merge omission cannot count as a win. External `serde-saphyr` comparison rows +may keep unavoidable public-contract normalization only when it is explicit and +preflight-checked by the harness. + +## 2026-06-06 Checkpoint + +- Implementation: `src/event_de.rs` contains the private event-backed Serde + engine, per-document typed iterators, reader-backed owned iteration, alias + replay budgeting, duplicate-key preflight, merge expansion/recovery, tagged + scalar/collection handling, and document-indexed errors. +- Benchmark surface: `saneyaml::__unstable_event_serde` exposes hidden + collection-returning wrappers for source-checkout benchmarks only; it is + excluded from the public API snapshot. +- Benchmark result: the event-backed `serde_yaml::Value` lane is semantically + equivalent to tree-backed saneyaml in the benchmark preflight, but slower on + most measured corpora and allocation-heavy in dhat. It should stay private + until parser events can feed Serde without first constructing and recording + tree-backed event frames. +- Verification: the checkpoint was checked with `cargo fmt --check`, + `cargo check --locked --all-targets`, `cargo check --locked --examples`, + `cargo test --locked --lib event_de`, the same event test under + `--no-default-features`, `cargo test --locked --test streaming_api`, + `cargo test --locked --test schema_modes`, + `cargo test --locked --test trust_metadata`, + `cargo test --locked --test runtime_dependency_closure`, + `cargo package --locked --allow-dirty`, `scripts/check-public-api.sh`, + and the real-world, large-input, and dhat benchmark commands documented in + `docs/BENCHMARKS.md`. diff --git a/examples/README.md b/examples/README.md index 853ef25..26e54de 100644 --- a/examples/README.md +++ b/examples/README.md @@ -14,7 +14,7 @@ local development, benchmarking, and conformance checks against the source tree. | `conformance_compare.rs` | no | Head-to-head YAML test-suite conformance comparison. | The benchmark and conformance examples depend on dev-dependencies (for example -`serde_yaml`, `saphyr`, `yaml-rust2`, `dhat`) and on in-repo fixtures that are -not part of the published package, so they only build from a checkout of this -repository. See `docs/BENCHMARKS.md` for the exact commands and pinned -reference-crate versions used to capture the published numbers. +`serde_yaml`, `serde-saphyr`, `saphyr`, `yaml-rust2`, `dhat`) and on in-repo +fixtures that are not part of the published package, so they only build from a +checkout of this repository. See `docs/BENCHMARKS.md` for the exact commands and +pinned reference-crate versions used to capture the published numbers. diff --git a/examples/dhat_memory.rs b/examples/dhat_memory.rs index adb3ba6..85de0c0 100644 --- a/examples/dhat_memory.rs +++ b/examples/dhat_memory.rs @@ -4,20 +4,23 @@ //! dhat installs a global allocator, so each library must be measured in its //! own process to keep the numbers clean. Run one at a time: //! -//! cargo run --release --example dhat_memory -- saneyaml-borrowed multidoc -//! cargo run --release --example dhat_memory -- saphyr multidoc -//! cargo run --release --example dhat_memory -- saneyaml-stream-docs multidoc16 +//! cargo run --locked --release --example dhat_memory -- saneyaml-borrowed multidoc +//! cargo run --locked --release --example dhat_memory -- serde-saphyr-yaml-value multidoc +//! cargo run --locked --release --example dhat_memory -- saneyaml-event-yaml-value multidoc +//! cargo run --locked --release --example dhat_memory -- saphyr multidoc +//! cargo run --locked --release --example dhat_memory -- saneyaml-stream-docs multidoc16 //! //! Or use the bundled driver that sweeps every (library, corpus) pair: //! -//! cargo run --release --example dhat_memory -- --all +//! cargo run --locked --release --example dhat_memory -- --all //! -//! Reported per parse (deltas measured *after* the input is generated, so the -//! ~1 MiB input string is excluded from the figures): +//! Reported per parse. Allocation and retained deltas are measured *after* the +//! input is generated, so the input string is excluded from those figures. +//! `peak` is dhat's absolute process peak and can include the resident input: //! - allocations : number of heap allocations made during the parse //! - bytes allocated : total bytes allocated during the parse //! - retained : heap still live while the parsed tree is held -//! - peak : max simultaneously-live heap during the parse +//! - peak : max simultaneously-live heap reported by dhat use std::env; use std::hint::black_box; @@ -29,8 +32,11 @@ const LIBS: &[&str] = &[ "saneyaml-borrowed", "saneyaml-owned", "saneyaml-value", + "saneyaml-yaml-value", + "saneyaml-event-yaml-value", "saneyaml-stream-docs", "saneyaml-stream-events", + "serde-saphyr-yaml-value", "serde_yaml", "yaml-rust2", "saphyr", @@ -93,10 +99,11 @@ fn report(lib: &str, corpus: &str, before: &dhat::HeapStats, after: &dhat::HeapS fn measure(lib: &str, corpus: &str) { let input = corpus_input(corpus); - // `before` is snapshotted after the input string exists, so the ~1 MiB - // input is excluded. The parsed value is held (black_box) across `after` - // so retained memory reflects the live tree. The borrowed tree borrows - // from `input`, hence the per-arm measurement rather than a boxed return. + // `before` is snapshotted after the input string exists, so allocation and + // retained deltas exclude it. The parsed value is held (black_box) across + // `after` so retained memory reflects the live tree. The borrowed tree + // borrows from `input`, hence the per-arm measurement rather than a boxed + // return. macro_rules! measure_arm { ($parse:expr) => {{ let before = dhat::HeapStats::get(); @@ -117,6 +124,14 @@ fn measure(lib: &str, corpus: &str) { "saneyaml-value" => measure_arm!( saneyaml::from_documents_str::(&input).expect("saneyaml value") ), + "saneyaml-yaml-value" => measure_arm!( + saneyaml::from_documents_str::(&input) + .expect("saneyaml serde_yaml value") + ), + "saneyaml-event-yaml-value" => measure_arm!( + saneyaml::__unstable_event_serde::from_documents_str::(&input) + .expect("saneyaml event-backed serde_yaml value") + ), "saneyaml-stream-docs" => { let before = dhat::HeapStats::get(); let mut stream = @@ -155,6 +170,7 @@ fn measure(lib: &str, corpus: &str) { .collect::>() ) } + "serde-saphyr-yaml-value" => measure_arm!(serde_saphyr_yaml_value_documents(&input)), "yaml-rust2" => { measure_arm!(yaml_rust2::YamlLoader::load_from_str(&input).expect("yaml-rust2 load")) } @@ -206,7 +222,20 @@ fn main() { use saphyr::LoadableYamlNode; black_box(saphyr::Yaml::load_from_str(&input).expect("saphyr")); } - other => panic!("profile supports saneyaml-borrowed | saphyr, got {other:?}"), + "serde-saphyr-yaml-value" => { + black_box(serde_saphyr_yaml_value_documents(&input)); + } + "saneyaml-event-yaml-value" => { + black_box( + saneyaml::__unstable_event_serde::from_documents_str::( + &input, + ) + .expect("saneyaml event-backed serde_yaml value"), + ); + } + other => panic!( + "profile supports saneyaml-borrowed | saphyr | serde-saphyr-yaml-value | saneyaml-event-yaml-value, got {other:?}" + ), } black_box(input.len()); return; @@ -221,3 +250,34 @@ fn main() { let _profiler = dhat::Profiler::builder().testing().build(); measure(lib, corpus); } + +fn serde_saphyr_yaml_value_documents(input: &str) -> Vec { + serde_saphyr::from_multiple_with_options::( + input, + serde_saphyr_benchmark_options(), + ) + .expect("serde-saphyr serde_yaml value") +} + +fn serde_saphyr_benchmark_options() -> serde_saphyr::Options { + let many = usize::MAX; + serde_saphyr::options! { + strict_booleans: true, + budget: serde_saphyr::budget! { + max_reader_input_bytes: None, + max_events: many, + max_aliases: many, + max_anchors: many, + max_depth: many, + max_inclusion_depth: u32::MAX, + max_documents: many, + max_nodes: many, + max_total_scalar_bytes: many, + max_total_comment_bytes: many, + max_merge_keys: many, + enforce_alias_anchor_ratio: false, + alias_anchor_min_aliases: many, + alias_anchor_ratio_multiplier: many, + }, + } +} diff --git a/examples/large_input_benchmark.rs b/examples/large_input_benchmark.rs index cc4ccbe..413a331 100644 --- a/examples/large_input_benchmark.rs +++ b/examples/large_input_benchmark.rs @@ -306,6 +306,9 @@ fn main() { measure_yaml_parse_documents(corpus, iterations), measure_yaml_parse_borrowed_documents(corpus, iterations), measure_yaml_value(corpus, iterations), + measure_yaml_serde_yaml_value(corpus, iterations), + measure_yaml_event_serde_yaml_value(corpus, iterations), + measure_serde_saphyr_serde_yaml_value(corpus, iterations), measure_serde_yaml_value(corpus, iterations), measure_yaml_rust2(corpus, iterations), measure_saphyr(corpus, iterations), @@ -338,9 +341,19 @@ fn downstream_fixtures() -> Vec> { } fn validate_corpus(corpus: &Corpus<'_>) { + let serde_saphyr_options = serde_saphyr_benchmark_options(); for fixture in &corpus.fixtures { let owned = saneyaml::parse_documents(&fixture.input).expect(fixture.path); let borrowed = saneyaml::parse_borrowed_documents(&fixture.input).expect(fixture.path); + let saneyaml_serde_yaml_docs = + saneyaml::from_documents_str::(&fixture.input).expect(fixture.path); + let saneyaml_event_serde_yaml_docs = + saneyaml::__unstable_event_serde::from_documents_str::( + &fixture.input, + ) + .expect(fixture.path); + let serde_saphyr_serde_yaml_docs = + serde_saphyr_serde_yaml_documents(&fixture.input, fixture.path, &serde_saphyr_options); assert_eq!(owned.len(), fixture.docs, "{} document count", fixture.path); assert_eq!( borrowed.len(), @@ -348,6 +361,35 @@ fn validate_corpus(corpus: &Corpus<'_>) { "{} borrowed document count", fixture.path ); + assert_eq!( + saneyaml_serde_yaml_docs.len(), + fixture.docs, + "{} saneyaml serde_yaml::Value document count", + fixture.path + ); + assert_eq!( + saneyaml_event_serde_yaml_docs.len(), + fixture.docs, + "{} saneyaml event-backed serde_yaml::Value document count", + fixture.path + ); + assert_eq!( + saneyaml_event_serde_yaml_docs, saneyaml_serde_yaml_docs, + "{} event-backed serde_yaml::Value document shape", + fixture.path + ); + assert_eq!( + serde_saphyr_serde_yaml_docs.len(), + serde_saphyr_comparable_documents(&saneyaml_serde_yaml_docs).len(), + "{} serde-saphyr serde_yaml::Value document count", + fixture.path + ); + assert_eq!( + serde_saphyr_serde_yaml_docs, + serde_saphyr_comparable_documents(&saneyaml_serde_yaml_docs), + "{} generic serde_yaml::Value document shape", + fixture.path + ); for (index, (owned, borrowed)) in owned.iter().zip(&borrowed).enumerate() { let owned_value = saneyaml::Value::from(owned); let borrowed_value = borrowed.clone().into_owned_value(); @@ -403,6 +445,56 @@ fn measure_yaml_value(corpus: &Corpus<'_>, iterations: usize) -> BenchResult { ) } +fn measure_yaml_serde_yaml_value(corpus: &Corpus<'_>, iterations: usize) -> BenchResult { + measure( + "saneyaml::from_documents_str::", + corpus, + iterations, + |input, path| { + saneyaml::from_documents_str::(input) + .expect(path) + .len() + }, + |input, path| { + let docs = saneyaml::from_documents_str::(input).expect(path); + retained_serde_yaml_docs(&docs) + }, + ) +} + +fn measure_yaml_event_serde_yaml_value(corpus: &Corpus<'_>, iterations: usize) -> BenchResult { + measure( + "saneyaml::__unstable_event_serde::from_documents_str::", + corpus, + iterations, + |input, path| { + saneyaml::__unstable_event_serde::from_documents_str::(input) + .expect(path) + .len() + }, + |input, path| { + let docs = + saneyaml::__unstable_event_serde::from_documents_str::(input) + .expect(path); + retained_serde_yaml_docs(&docs) + }, + ) +} + +fn measure_serde_saphyr_serde_yaml_value(corpus: &Corpus<'_>, iterations: usize) -> BenchResult { + let options = serde_saphyr_benchmark_options(); + measure( + "serde_saphyr::from_multiple_with_options::", + corpus, + iterations, + |input, path| serde_saphyr_serde_yaml_documents(input, path, &options).len(), + |input, path| { + let docs = serde_saphyr_serde_yaml_documents(input, path, &options); + retained_serde_yaml_docs(&docs) + }, + ) +} + fn measure_serde_yaml_value(corpus: &Corpus<'_>, iterations: usize) -> BenchResult { measure( "serde_yaml::Value stream", @@ -799,3 +891,61 @@ fn retained_saphyr_scalar(scalar: &saphyr::Scalar<'_>) -> Retained { | saphyr::Scalar::FloatingPoint(_) => Retained::default(), } } + +fn serde_saphyr_serde_yaml_documents( + input: &str, + path: &str, + options: &serde_saphyr::Options, +) -> Vec { + serde_saphyr::from_multiple_with_options::(input, options.clone()) + .unwrap_or_else(|error| panic!("{path}: {error}")) +} + +fn serde_saphyr_comparable_documents(docs: &[serde_yaml::Value]) -> Vec { + // Match serde-saphyr's serde_yaml::Value contract: skip null docs and + // treat tags as transparent. + docs.iter() + .filter(|doc| !doc.is_null()) + .map(strip_serde_yaml_tags) + .collect() +} + +fn strip_serde_yaml_tags(value: &serde_yaml::Value) -> serde_yaml::Value { + match value { + serde_yaml::Value::Sequence(items) => { + serde_yaml::Value::Sequence(items.iter().map(strip_serde_yaml_tags).collect()) + } + serde_yaml::Value::Mapping(mapping) => { + let mut stripped = serde_yaml::Mapping::new(); + for (key, value) in mapping { + stripped.insert(strip_serde_yaml_tags(key), strip_serde_yaml_tags(value)); + } + serde_yaml::Value::Mapping(stripped) + } + serde_yaml::Value::Tagged(tagged) => strip_serde_yaml_tags(&tagged.value), + value => value.clone(), + } +} + +fn serde_saphyr_benchmark_options() -> serde_saphyr::Options { + let many = usize::MAX; + serde_saphyr::options! { + strict_booleans: true, + budget: serde_saphyr::budget! { + max_reader_input_bytes: None, + max_events: many, + max_aliases: many, + max_anchors: many, + max_depth: many, + max_inclusion_depth: u32::MAX, + max_documents: many, + max_nodes: many, + max_total_scalar_bytes: many, + max_total_comment_bytes: many, + max_merge_keys: many, + enforce_alias_anchor_ratio: false, + alias_anchor_min_aliases: many, + alias_anchor_ratio_multiplier: many, + }, + } +} diff --git a/examples/real_world_benchmark.rs b/examples/real_world_benchmark.rs index b6f771d..ac51b26 100644 --- a/examples/real_world_benchmark.rs +++ b/examples/real_world_benchmark.rs @@ -211,6 +211,7 @@ fn main() { .unwrap_or(200); let bytes_per_iteration = FIXTURES.iter().map(|fixture| fixture.input.len()).sum(); let docs_per_iteration = FIXTURES.iter().map(|fixture| fixture.docs).sum(); + let serde_saphyr_options = serde_saphyr_benchmark_options(); for fixture in FIXTURES { assert_eq!( @@ -221,6 +222,44 @@ fn main() { "{} document count", fixture.path ); + let saneyaml_serde_yaml_docs = + saneyaml::from_documents_str::(fixture.input).expect(fixture.path); + let saneyaml_event_serde_yaml_docs = + saneyaml::__unstable_event_serde::from_documents_str::( + fixture.input, + ) + .expect(fixture.path); + let serde_saphyr_serde_yaml_docs = + serde_saphyr_serde_yaml_documents(fixture.input, fixture.path, &serde_saphyr_options); + assert_eq!( + saneyaml_serde_yaml_docs.len(), + fixture.docs, + "{} saneyaml serde_yaml::Value document count", + fixture.path + ); + assert_eq!( + saneyaml_event_serde_yaml_docs.len(), + fixture.docs, + "{} saneyaml event-backed serde_yaml::Value document count", + fixture.path + ); + assert_eq!( + saneyaml_event_serde_yaml_docs, saneyaml_serde_yaml_docs, + "{} event-backed serde_yaml::Value document shape", + fixture.path + ); + assert_eq!( + serde_saphyr_serde_yaml_docs.len(), + serde_saphyr_comparable_documents(&saneyaml_serde_yaml_docs).len(), + "{} serde-saphyr serde_yaml::Value document count", + fixture.path + ); + assert_eq!( + serde_saphyr_serde_yaml_docs, + serde_saphyr_comparable_documents(&saneyaml_serde_yaml_docs), + "{} generic serde_yaml::Value document shape", + fixture.path + ); } let results = [ @@ -256,6 +295,59 @@ fn main() { .sum() }, ), + measure( + "saneyaml::from_documents_str::", + iterations, + bytes_per_iteration, + docs_per_iteration, + || { + FIXTURES + .iter() + .map(|fixture| { + saneyaml::from_documents_str::(fixture.input) + .expect(fixture.path) + .len() + }) + .sum() + }, + ), + measure( + "saneyaml::__unstable_event_serde::from_documents_str::", + iterations, + bytes_per_iteration, + docs_per_iteration, + || { + FIXTURES + .iter() + .map(|fixture| { + saneyaml::__unstable_event_serde::from_documents_str::( + fixture.input, + ) + .expect(fixture.path) + .len() + }) + .sum() + }, + ), + measure( + "serde_saphyr::from_multiple_with_options::", + iterations, + bytes_per_iteration, + docs_per_iteration, + || { + FIXTURES + .iter() + .map(|fixture| { + serde_saphyr_serde_yaml_documents( + fixture.input, + fixture.path, + &serde_saphyr_options, + ) + .len() + }) + .sum() + }, + ), measure( "serde_yaml::Value stream", iterations, @@ -355,3 +447,61 @@ fn ns_per_byte(result: &BenchResult) -> f64 { let bytes = result.iterations * result.bytes_per_iteration; result.elapsed.as_nanos() as f64 / bytes as f64 } + +fn serde_saphyr_serde_yaml_documents( + input: &str, + path: &str, + options: &serde_saphyr::Options, +) -> Vec { + serde_saphyr::from_multiple_with_options::(input, options.clone()) + .unwrap_or_else(|error| panic!("{path}: {error}")) +} + +fn serde_saphyr_comparable_documents(docs: &[serde_yaml::Value]) -> Vec { + // Match serde-saphyr's serde_yaml::Value contract: skip null docs and + // treat tags as transparent. + docs.iter() + .filter(|doc| !doc.is_null()) + .map(strip_serde_yaml_tags) + .collect() +} + +fn strip_serde_yaml_tags(value: &serde_yaml::Value) -> serde_yaml::Value { + match value { + serde_yaml::Value::Sequence(items) => { + serde_yaml::Value::Sequence(items.iter().map(strip_serde_yaml_tags).collect()) + } + serde_yaml::Value::Mapping(mapping) => { + let mut stripped = serde_yaml::Mapping::new(); + for (key, value) in mapping { + stripped.insert(strip_serde_yaml_tags(key), strip_serde_yaml_tags(value)); + } + serde_yaml::Value::Mapping(stripped) + } + serde_yaml::Value::Tagged(tagged) => strip_serde_yaml_tags(&tagged.value), + value => value.clone(), + } +} + +fn serde_saphyr_benchmark_options() -> serde_saphyr::Options { + let many = usize::MAX; + serde_saphyr::options! { + strict_booleans: true, + budget: serde_saphyr::budget! { + max_reader_input_bytes: None, + max_events: many, + max_aliases: many, + max_anchors: many, + max_depth: many, + max_inclusion_depth: u32::MAX, + max_documents: many, + max_nodes: many, + max_total_scalar_bytes: many, + max_total_comment_bytes: many, + max_merge_keys: many, + enforce_alias_anchor_ratio: false, + alias_anchor_min_aliases: many, + alias_anchor_ratio_multiplier: many, + }, + } +} diff --git a/src/de.rs b/src/de.rs index 811b92f..13b2f55 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1163,22 +1163,25 @@ fn take_yaml11_singleton_pair_value( } } -fn parse_explicit_core_int_text(raw: &str, span: Option) -> Result { +pub(crate) fn parse_explicit_core_int_text(raw: &str, span: Option) -> Result { yaml11::parse_explicit_int_number(raw) .ok_or_else(|| Error::new("failed to parse explicit !!int scalar", span)) } -fn parse_explicit_core_float_text(raw: &str, span: Option) -> Result { +pub(crate) fn parse_explicit_core_float_text( + raw: &str, + span: Option, +) -> Result { yaml11::parse_explicit_float_number(raw) .ok_or_else(|| Error::new("failed to parse explicit !!float scalar", span)) } -fn parse_explicit_core_bool_text(raw: &str, span: Option) -> Result { +pub(crate) fn parse_explicit_core_bool_text(raw: &str, span: Option) -> Result { yaml11::parse_bool(raw) .ok_or_else(|| Error::new("failed to parse explicit !!bool scalar", span)) } -fn parse_explicit_core_null_text(raw: &str, span: Option) -> Result<(), Error> { +pub(crate) fn parse_explicit_core_null_text(raw: &str, span: Option) -> Result<(), Error> { yaml11::is_null(raw) .then_some(()) .ok_or_else(|| Error::new("failed to parse explicit !!null scalar", span)) diff --git a/src/event_de.rs b/src/event_de.rs new file mode 100644 index 0000000..1302941 --- /dev/null +++ b/src/event_de.rs @@ -0,0 +1,3445 @@ +#![allow(dead_code)] +// Compiled work-in-progress: this module is exercised by unit tests before it is +// wired into public Serde entrypoints. + +use crate::{ + Error, ErrorPathSegment, Node, NodeValue, Number, Result, Span, Tag, TaggedNode, + error::utf8_error_span, + key_identity::{DuplicateKeyTracker, check_duplicate_with_tracker_at_depth_limit}, + parse::{ + Event, EventMeta, ScalarStyle, merge_policy_for_schema, parse_scalar_with_schema, + schema_for_directives, + }, + schema::{LoadOptions, Schema}, +}; +use serde::de::{ + self, DeserializeOwned, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, + VariantAccess, Visitor, +}; +use std::{collections::HashMap, io::Read, marker::PhantomData}; + +pub(crate) fn from_str_with_options<'de, T>(input: &'de str, options: LoadOptions) -> Result +where + T: serde::Deserialize<'de>, +{ + let configured_schema = options.selected_schema(); + let replay_budget = options.alias_expansion_budget(input.len()); + let events = crate::parse::EventStream::from_str_with_options(input, options)? + .collect::>>()?; + let mut source = EventSource::new(input, events, configured_schema, replay_budget); + source.enter_stream()?; + source.enter_document()?; + let value = T::deserialize(EventNodeDeserializer { + source: &mut source, + })?; + source.finish_document()?; + match source.peek() { + Some(Event::StreamEnd) => Ok(value), + Some(Event::DocumentStart { .. }) => Err(Error::data( + "expected single YAML document, found multiple documents", + None, + )), + Some(event) => Err(unexpected_event("stream end", event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } +} + +pub(crate) fn from_documents_str_with_options( + input: &str, + options: LoadOptions, +) -> Result> +where + T: DeserializeOwned, +{ + document_iter_str_with_options(input, options)?.collect() +} + +pub(crate) fn document_iter_str_with_options<'de, T>( + input: &'de str, + options: LoadOptions, +) -> Result> +where + T: serde::Deserialize<'de>, +{ + let configured_schema = options.selected_schema(); + let replay_budget = options.alias_expansion_budget(input.len()); + Ok(EventDocumentIter { + input, + frames: EventDocumentFrames::from_str_with_options(input, options)?, + configured_schema, + replay_budget, + _marker: PhantomData, + }) +} + +pub(crate) fn document_iter_slice_with_options<'de, T>( + input: &'de [u8], + options: LoadOptions, +) -> Result> +where + T: serde::Deserialize<'de>, +{ + options.check_input_len(input.len())?; + let input = std::str::from_utf8(input) + .map_err(|err| Error::encoding("input is not valid UTF-8", utf8_error_span(input, err)))?; + document_iter_str_with_options(input, options) +} + +pub(crate) fn document_iter_reader_with_options( + reader: R, + options: LoadOptions, +) -> Result> +where + T: DeserializeOwned, + R: Read, +{ + let bytes = crate::de::read_to_end_with_options(reader, options)?; + let input = String::from_utf8(bytes).map_err(|err| { + Error::encoding( + "input is not valid UTF-8", + utf8_error_span(err.as_bytes(), err.utf8_error()), + ) + })?; + let configured_schema = options.selected_schema(); + let replay_budget = options.alias_expansion_budget(input.len()); + let frames = EventDocumentFrames::from_str_with_options(&input, options)?; + Ok(OwnedEventDocumentIter { + input, + frames, + configured_schema, + replay_budget, + _marker: PhantomData, + }) +} + +pub(crate) struct EventDocumentIter<'de, T> { + input: &'de str, + frames: EventDocumentFrames, + configured_schema: Schema, + replay_budget: usize, + _marker: PhantomData, +} + +impl<'de, T> Iterator for EventDocumentIter<'de, T> +where + T: serde::Deserialize<'de>, +{ + type Item = Result; + + fn next(&mut self) -> Option { + let (index, frame) = self.frames.next_frame()?; + Some( + frame + .and_then(|events| { + deserialize_document_frame( + self.input, + events, + self.configured_schema, + self.replay_budget, + ) + }) + .map_err(|error| error.with_document_index(index)), + ) + } +} + +pub(crate) struct OwnedEventDocumentIter { + input: String, + frames: EventDocumentFrames, + configured_schema: Schema, + replay_budget: usize, + _marker: PhantomData, +} + +impl Iterator for OwnedEventDocumentIter +where + T: DeserializeOwned, +{ + type Item = Result; + + fn next(&mut self) -> Option { + let (index, frame) = self.frames.next_frame()?; + Some( + frame + .and_then(|events| { + deserialize_document_frame( + &self.input, + events, + self.configured_schema, + self.replay_budget, + ) + }) + .map_err(|error| error.with_document_index(index)), + ) + } +} + +struct EventDocumentFrames { + events: crate::parse::EventStream, + started: bool, + finished: bool, + index: usize, +} + +impl EventDocumentFrames { + fn from_str_with_options(input: &str, options: LoadOptions) -> Result { + Ok(Self { + events: crate::parse::EventStream::from_str_with_options(input, options)?, + started: false, + finished: false, + index: 0, + }) + } + + fn next_frame(&mut self) -> Option<(usize, Result>)> { + if self.finished { + return None; + } + let index = self.index; + if let Err(error) = self.enter_stream() { + self.finished = true; + return Some((index, Err(error))); + } + + match self.events.next() { + Some(Ok(Event::StreamEnd)) => { + self.finished = true; + None + } + Some(Ok(start @ Event::DocumentStart { .. })) => { + Some((index, self.collect_document_frame(start))) + } + Some(Ok(event)) => { + self.finished = true; + Some(( + index, + Err(unexpected_event("document start or stream end", &event)), + )) + } + Some(Err(error)) => { + self.finished = true; + Some((index, Err(error))) + } + None => { + self.finished = true; + None + } + } + } + + fn enter_stream(&mut self) -> Result<()> { + if self.started { + return Ok(()); + } + self.started = true; + match self.events.next() { + Some(Ok(Event::StreamStart)) => Ok(()), + Some(Ok(event)) => Err(unexpected_event("stream start", &event)), + Some(Err(error)) => Err(error), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } + } + + fn collect_document_frame(&mut self, start: Event) -> Result> { + let mut frame = Vec::new(); + frame.push(Event::StreamStart); + frame.push(start); + loop { + match self.events.next() { + Some(Ok(event)) => { + let end = matches!(event, Event::DocumentEnd { .. }); + frame.push(event); + if end { + frame.push(Event::StreamEnd); + self.index += 1; + return Ok(frame); + } + } + Some(Err(error)) => { + self.finished = true; + return Err(error); + } + None => { + self.finished = true; + return Err(Error::data("unexpected end of YAML event stream", None)); + } + } + } + } +} + +fn deserialize_document_frame<'de, T>( + input: &'de str, + events: Vec, + configured_schema: Schema, + replay_budget: usize, +) -> Result +where + T: serde::Deserialize<'de>, +{ + let mut source = EventSource::new(input, events, configured_schema, replay_budget); + source.enter_stream()?; + source.enter_document()?; + let value = T::deserialize(EventNodeDeserializer { + source: &mut source, + })?; + source.finish_document()?; + match source.peek() { + Some(Event::StreamEnd) => Ok(value), + Some(event) => Err(unexpected_event("stream end", event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } +} + +struct EventSource<'de> { + input: &'de str, + events: Vec, + pos: usize, + configured_schema: Schema, + schema: Schema, + anchors: HashMap>, + inject: Vec, + replayed_events: usize, + replay_budget: usize, +} + +struct InjectedEvents { + anchor: String, + events: Vec, + pos: usize, +} + +impl<'de> EventSource<'de> { + fn new( + input: &'de str, + events: Vec, + configured_schema: Schema, + replay_budget: usize, + ) -> Self { + Self { + input, + events, + pos: 0, + configured_schema, + schema: configured_schema, + anchors: HashMap::new(), + inject: Vec::new(), + replayed_events: 0, + replay_budget, + } + } + + fn peek(&self) -> Option<&Event> { + if let Some(frame) = self.inject.last() + && frame.pos < frame.events.len() + { + return frame.events.get(frame.pos); + } + self.events.get(self.pos) + } + + fn next(&mut self) -> Result { + loop { + let event = self.next_raw()?; + if let Event::Alias { anchor } = event { + self.inject_alias(anchor.name, anchor.span)?; + continue; + } + return Ok(event); + } + } + + fn next_raw(&mut self) -> Result { + if let Some(event) = self.next_injected_event() { + return Ok(event); + } + + let pos = self.pos; + let event = self + .events + .get(pos) + .cloned() + .ok_or_else(|| Error::data("unexpected end of YAML event stream", None))?; + self.record_anchor_at(pos, &event)?; + self.pos += 1; + Ok(event) + } + + fn resolve_aliases_until_non_alias(&mut self) -> Result<()> { + while matches!(self.peek(), Some(Event::Alias { .. })) { + let Event::Alias { anchor } = self.next_raw()? else { + unreachable!("peek observed an alias"); + }; + self.inject_alias(anchor.name, anchor.span)?; + } + Ok(()) + } + + fn next_injected_event(&mut self) -> Option { + loop { + let frame = self.inject.last_mut()?; + if frame.pos < frame.events.len() { + let event = frame.events[frame.pos].clone(); + frame.pos += 1; + if frame.pos == frame.events.len() { + self.inject.pop(); + } + return Some(event); + } + self.inject.pop(); + } + } + + fn record_anchor_at(&mut self, pos: usize, event: &Event) -> Result<()> { + let Some(name) = event_anchor_name(event) else { + return Ok(()); + }; + let end = skip_node_in(&self.events, pos)?; + self.anchors + .insert(name.to_string(), self.events[pos..end].to_vec()); + Ok(()) + } + + fn inject_alias(&mut self, name: String, span: Span) -> Result<()> { + if self.inject.iter().any(|frame| frame.anchor == name) { + return Err(Error::reference( + format!("recursive alias `{name}` is not supported"), + span, + )); + } + let events = self + .anchors + .get(&name) + .cloned() + .ok_or_else(|| Error::reference(format!("unknown anchor `{name}`"), span))?; + self.replayed_events = self.replayed_events.saturating_add(events.len()); + if self.replayed_events > self.replay_budget { + return Err(Error::limit("alias event replay limit exceeded", span)); + } + self.inject.push(InjectedEvents { + anchor: name, + events, + pos: 0, + }); + Ok(()) + } + + fn enter_stream(&mut self) -> Result<()> { + match self.next()? { + Event::StreamStart => Ok(()), + event => Err(unexpected_event("stream start", &event)), + } + } + + fn enter_document(&mut self) -> Result<()> { + match self.next()? { + Event::DocumentStart { directives, .. } => { + self.anchors.clear(); + self.inject.clear(); + self.replayed_events = 0; + self.schema = schema_for_directives(self.configured_schema, &directives); + Ok(()) + } + event => Err(unexpected_event("document start", &event)), + } + } + + fn finish_document(&mut self) -> Result<()> { + match self.next()? { + Event::DocumentEnd { .. } => Ok(()), + event => Err(unexpected_event("document end", &event)), + } + } + + fn scalar_from_event( + &self, + value: String, + style: ScalarStyle, + meta: &EventMeta, + span: Span, + ) -> Result { + if let Some(tag) = &meta.tag { + let tag = &tag.tag; + let tag_span = meta.tag.as_ref().expect("tag checked").span; + if tag.is_yaml_core("str") { + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::String(value), span), + )); + } + if tag.is_yaml_core("int") { + let number = crate::de::parse_explicit_core_int_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Number(number), span).with_scalar_source(value), + )); + } + if tag.is_yaml_core("float") { + let number = crate::de::parse_explicit_core_float_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Number(number), span).with_scalar_source(value), + )); + } + if tag.is_yaml_core("bool") { + let value = crate::de::parse_explicit_core_bool_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Bool(value), span), + )); + } + if tag.is_yaml_core("null") { + crate::de::parse_explicit_core_null_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Null, span), + )); + } + let inner = self.untagged_scalar_from_event(value, style, span)?; + if tag.is_non_specific() { + return Ok(non_specific_event_node(span_union(tag_span, span), inner)); + } + return Ok(Node::new( + NodeValue::Tagged(Box::new(TaggedNode { + tag: tag.clone(), + tag_span, + value: inner, + })), + span_union(tag_span, span), + )); + } + self.untagged_scalar_from_event(value, style, span) + } + + fn untagged_scalar_from_event( + &self, + value: String, + style: ScalarStyle, + span: Span, + ) -> Result { + match style { + ScalarStyle::Plain => parse_scalar_with_schema(&value, span, self.schema), + ScalarStyle::SingleQuoted + | ScalarStyle::DoubleQuoted + | ScalarStyle::Literal + | ScalarStyle::Folded => Ok(Node::new(NodeValue::String(value), span)), + } + } + + fn take_scalar(&mut self) -> Result { + match self.next()? { + Event::Scalar { + value, + style, + meta, + span, + } => self.scalar_from_event(value, style, &meta, span), + Event::Alias { anchor } => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + event => Err(unexpected_event("scalar", &event)), + } + } + + fn skip_node(&mut self) -> Result<()> { + self.resolve_aliases_until_non_alias()?; + match self.peek().cloned() { + Some(Event::Scalar { .. }) => { + self.next()?; + Ok(()) + } + Some(Event::Alias { anchor }) => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + Some(Event::SequenceStart { .. }) => { + self.next()?; + loop { + if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { + self.next()?; + return Ok(()); + } + self.skip_node()?; + } + } + Some(Event::MappingStart { .. }) => { + if self.next_mapping_has_merge_key()? { + let mut node = self.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.schema))?; + self.skip_node_raw()?; + return Ok(()); + } + self.validate_next_mapping_duplicates()?; + self.next()?; + loop { + if matches!(self.peek(), Some(Event::MappingEnd { .. })) { + self.next()?; + return Ok(()); + } + self.skip_node()?; + self.skip_node()?; + } + } + Some(event) => Err(unexpected_event("node", &event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } + } + + fn skip_node_raw(&mut self) -> Result<()> { + match self.next()? { + Event::Scalar { .. } => Ok(()), + Event::Alias { anchor } => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + Event::SequenceStart { .. } => loop { + if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { + self.next()?; + return Ok(()); + } + self.skip_node_raw()?; + }, + Event::MappingStart { .. } => loop { + if matches!(self.peek(), Some(Event::MappingEnd { .. })) { + self.next()?; + return Ok(()); + } + self.skip_node_raw()?; + self.skip_node_raw()?; + }, + event => Err(unexpected_event("node", &event)), + } + } + + fn materialize_current_node_for_merge(&self) -> Result { + let (events, pos) = self.current_events_and_pos(); + let mut scan_anchors = self.anchors.clone(); + let mut replayed_events = 0usize; + let (node, next) = self.node_at_for_key( + events, + pos, + &mut scan_anchors, + &mut Vec::new(), + &mut replayed_events, + true, + )?; + let expected = skip_node_in(events, pos)?; + if next != expected { + return Err(Error::data( + "unterminated merge materialization event stream", + None, + )); + } + Ok(node) + } + + fn next_mapping_has_merge_key(&self) -> Result { + let (events, start) = self.current_events_and_pos(); + let Some(Event::MappingStart { .. }) = events.get(start) else { + return Ok(false); + }; + let mut pos = start + 1; + let mut scan_anchors = self.anchors.clone(); + let mut replayed_events = 0usize; + while let Some(event) = events.get(pos) { + if matches!(event, Event::MappingEnd { .. }) { + return Ok(false); + } + let (key, next_pos) = self.node_at_for_key( + events, + pos, + &mut scan_anchors, + &mut Vec::new(), + &mut replayed_events, + true, + )?; + if node_is_merge_key(&key) { + return Ok(true); + } + pos = next_pos; + pos = scan_anchors_in(events, pos, &mut scan_anchors)?; + } + Err(Error::data("unterminated mapping event stream", None)) + } + + fn validate_next_mapping_duplicates(&self) -> Result<()> { + let (events, start) = self.current_events_and_pos(); + let Some(Event::MappingStart { .. }) = events.get(start) else { + return Ok(()); + }; + let mut pos = start + 1; + let mut seen = DuplicateKeyTracker::new(); + let mut scan_anchors = self.anchors.clone(); + let mut replayed_events = 0usize; + while let Some(event) = events.get(pos) { + if matches!(event, Event::MappingEnd { .. }) { + return Ok(()); + } + if let Some((key, next_pos)) = + self.mapping_key_at(events, pos, &mut scan_anchors, &mut replayed_events)? + { + if node_is_merge_key(&key) { + return Err(Error::data( + "event-backed merge-key expansion is not implemented", + Some(key.span), + )); + } + check_duplicate_with_tracker_at_depth_limit(&mut seen, &key, 1, None)?; + pos = next_pos; + } else { + pos = scan_anchors_in(events, pos, &mut scan_anchors)?; + } + pos = scan_anchors_in(events, pos, &mut scan_anchors)?; + } + Err(Error::data("unterminated mapping event stream", None)) + } + + fn current_events_and_pos(&self) -> (&[Event], usize) { + if let Some(frame) = self.inject.last() + && frame.pos < frame.events.len() + { + return (&frame.events, frame.pos); + } + (&self.events, self.pos) + } + + fn mapping_key_at( + &self, + events: &[Event], + pos: usize, + scan_anchors: &mut HashMap>, + replayed_events: &mut usize, + ) -> Result> { + if let Some(name) = events.get(pos).and_then(event_anchor_name) { + let end = skip_node_in(events, pos)?; + scan_anchors.insert(name.to_string(), events[pos..end].to_vec()); + } + match events.get(pos) { + Some(Event::Scalar { .. }) + | Some(Event::Alias { .. }) + | Some(Event::SequenceStart { .. }) + | Some(Event::MappingStart { .. }) => self + .node_at_for_key( + events, + pos, + scan_anchors, + &mut Vec::new(), + replayed_events, + false, + ) + .map(|(node, next)| Some((node, next))), + Some(_) | None => Ok(None), + } + } + + fn scalar_key_at(&self, pos: usize) -> Result> { + self.scalar_key_at_in(&self.events, pos) + } + + fn scalar_key_at_in(&self, events: &[Event], pos: usize) -> Result> { + let Some(Event::Scalar { + value, + style, + meta, + span, + }) = events.get(pos) + else { + return Ok(None); + }; + self.scalar_from_event(value.clone(), *style, meta, *span) + .map(|node| Some((node, pos + 1))) + } + + fn scalar_key_node_from_event( + &self, + value: String, + style: ScalarStyle, + meta: &EventMeta, + span: Span, + ) -> Result { + let Some(tag) = &meta.tag else { + return self.scalar_from_event(value, style, meta, span); + }; + let inner = if tag.tag.is_yaml_core("int") { + Node::new( + NodeValue::Number(crate::de::parse_explicit_core_int_text(&value, Some(span))?), + span, + ) + } else if tag.tag.is_yaml_core("float") { + Node::new( + NodeValue::Number(crate::de::parse_explicit_core_float_text( + &value, + Some(span), + )?), + span, + ) + } else if tag.tag.is_yaml_core("bool") { + Node::new( + NodeValue::Bool(crate::de::parse_explicit_core_bool_text( + &value, + Some(span), + )?), + span, + ) + } else if tag.tag.is_yaml_core("null") { + crate::de::parse_explicit_core_null_text(&value, Some(span))?; + Node::new(NodeValue::Null, span) + } else { + let _ = style; + Node::new(NodeValue::String(value), span) + }; + Ok(tagged_key_node(tag.tag.clone(), tag.span, inner)) + } + + fn node_at_for_key( + &self, + events: &[Event], + pos: usize, + scan_anchors: &mut HashMap>, + active_aliases: &mut Vec, + replayed_events: &mut usize, + allow_merge_key: bool, + ) -> Result<(Node, usize)> { + let Some(event) = events.get(pos) else { + return Err(Error::data("unexpected end of YAML event stream", None)); + }; + if let Some(name) = event_anchor_name(event) { + let end = skip_node_in(events, pos)?; + scan_anchors.insert(name.to_string(), events[pos..end].to_vec()); + } + + match event { + Event::Scalar { + value, + style, + meta, + span, + } => self + .scalar_key_node_from_event(value.clone(), *style, meta, *span) + .map(|node| (node, pos + 1)), + Event::Alias { anchor } => { + let name = &anchor.name; + if active_aliases.iter().any(|active| active == name) { + return Err(Error::reference( + format!("recursive alias `{name}` is not supported"), + anchor.span, + )); + } + let target = scan_anchors.get(name).cloned().ok_or_else(|| { + Error::reference(format!("unknown anchor `{name}`"), anchor.span) + })?; + *replayed_events = replayed_events.saturating_add(target.len()); + if *replayed_events > self.replay_budget { + return Err(Error::limit( + "alias event replay limit exceeded", + anchor.span, + )); + } + active_aliases.push(name.clone()); + let (mut node, end) = self.node_at_for_key( + &target, + 0, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + )?; + active_aliases.pop(); + if end != target.len() { + return Err(Error::data("unterminated alias key event subtree", None)); + } + node.span = anchor.span; + Ok((node, pos + 1)) + } + Event::SequenceStart { meta, span, .. } => { + let mut items = Vec::new(); + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::SequenceEnd { span: end_span }) => { + let node = + Node::new(NodeValue::Sequence(items), span_union(*span, *end_span)); + return Ok((apply_event_tag(meta, node), next + 1)); + } + Some(_) => { + let (item, after_item) = self.node_at_for_key( + events, + next, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + )?; + items.push(item); + next = after_item; + } + None => { + return Err(Error::data("unterminated sequence event stream", None)); + } + } + } + } + Event::MappingStart { meta, span, .. } => { + let mut entries = Vec::new(); + let mut seen = DuplicateKeyTracker::new(); + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::MappingEnd { span: end_span }) => { + let node = Node::new( + NodeValue::Mapping(entries), + span_union(*span, *end_span), + ); + return Ok((apply_event_tag(meta, node), next + 1)); + } + Some(_) => { + let (key, after_key) = self.node_at_for_key( + events, + next, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + )?; + if !allow_merge_key && node_is_merge_key(&key) { + return Err(Error::data( + "event-backed merge-key expansion is not implemented", + Some(key.span), + )); + } + if !(allow_merge_key + && self.schema.is_legacy_compatible() + && node_is_merge_key(&key)) + { + check_duplicate_with_tracker_at_depth_limit( + &mut seen, &key, 1, None, + )?; + } + let (value, after_value) = self.node_at_for_key( + events, + after_key, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + )?; + entries.push((key, value)); + next = after_value; + } + None => return Err(Error::data("unterminated mapping event stream", None)), + } + } + } + event => Err(unexpected_event("node", event)), + } + } +} + +fn skip_node_in(events: &[Event], pos: usize) -> Result { + match events + .get(pos) + .ok_or_else(|| Error::data("unexpected end of YAML event stream", None))? + { + Event::Scalar { .. } | Event::Alias { .. } => Ok(pos + 1), + Event::SequenceStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::SequenceEnd { .. }) => return Ok(next + 1), + Some(_) => next = skip_node_in(events, next)?, + None => return Err(Error::data("unterminated sequence event stream", None)), + } + } + } + Event::MappingStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::MappingEnd { .. }) => return Ok(next + 1), + Some(_) => { + next = skip_node_in(events, next)?; + next = skip_node_in(events, next)?; + } + None => return Err(Error::data("unterminated mapping event stream", None)), + } + } + } + event => Err(unexpected_event("node", event)), + } +} + +fn span_union(start: Span, end: Span) -> Span { + Span::new(start.start, end.end, start.line, start.column) +} + +fn tagged_key_node(tag: crate::Tag, tag_span: Span, value: Node) -> Node { + let span = span_union(tag_span, value.span); + Node::new( + NodeValue::Tagged(Box::new(TaggedNode { + tag, + tag_span, + value, + })), + span, + ) +} + +fn apply_event_tag(meta: &EventMeta, node: Node) -> Node { + let Some(tag) = &meta.tag else { + return node; + }; + if tag.tag.is_non_specific() { + non_specific_event_node(span_union(tag.span, node.span), node) + } else { + tagged_key_node(tag.tag.clone(), tag.span, node) + } +} + +fn non_specific_event_node(span: Span, mut node: Node) -> Node { + node.span = span; + match &node.value { + NodeValue::Sequence(_) + | NodeValue::Mapping(_) + | NodeValue::String(_) + | NodeValue::Tagged(_) => node, + NodeValue::Null | NodeValue::Bool(_) | NodeValue::Number(_) => { + let source = node + .scalar_source() + .map(|source| source.raw().to_string()) + .unwrap_or_default(); + Node::new(NodeValue::String(source.clone()), span).with_scalar_source(source) + } + } +} + +fn node_is_merge_key(key: &Node) -> bool { + match &key.value { + NodeValue::String(_) => key.as_str() == Some("<<"), + NodeValue::Tagged(tagged) if tagged.tag.is_yaml_core("merge") => { + tagged.value.as_str() == Some("<<") + } + _ => false, + } +} + +fn scan_anchors_in( + events: &[Event], + pos: usize, + anchors: &mut HashMap>, +) -> Result { + let Some(event) = events.get(pos) else { + return Err(Error::data("unexpected end of YAML event stream", None)); + }; + if let Some(name) = event_anchor_name(event) { + let end = skip_node_in(events, pos)?; + anchors.insert(name.to_string(), events[pos..end].to_vec()); + } + match event { + Event::Scalar { .. } | Event::Alias { .. } => Ok(pos + 1), + Event::SequenceStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::SequenceEnd { .. }) => return Ok(next + 1), + Some(_) => next = scan_anchors_in(events, next, anchors)?, + None => return Err(Error::data("unterminated sequence event stream", None)), + } + } + } + Event::MappingStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::MappingEnd { .. }) => return Ok(next + 1), + Some(_) => { + next = scan_anchors_in(events, next, anchors)?; + next = scan_anchors_in(events, next, anchors)?; + } + None => return Err(Error::data("unterminated mapping event stream", None)), + } + } + } + event => Err(unexpected_event("node", event)), + } +} + +fn event_anchor_name(event: &Event) -> Option<&str> { + match event { + Event::Scalar { meta, .. } + | Event::SequenceStart { meta, .. } + | Event::MappingStart { meta, .. } => { + meta.anchor.as_ref().map(|anchor| anchor.name.as_str()) + } + Event::StreamStart + | Event::StreamEnd + | Event::DocumentStart { .. } + | Event::DocumentEnd { .. } + | Event::SequenceEnd { .. } + | Event::MappingEnd { .. } + | Event::Alias { .. } => None, + } +} + +struct EventNodeDeserializer<'a, 'de> { + source: &'a mut EventSource<'de>, +} + +impl<'de> EventNodeDeserializer<'_, 'de> { + fn deserialize_prepared_current_node(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_any(PreparedNodeDeserializer { node }, visitor) + } + + fn deserialize_prepared_current_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node }, visitor) + } + + fn deserialize_prepared_current_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor) + } +} + +impl<'de> de::Deserializer<'de> for EventNodeDeserializer<'_, 'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + match self.source.peek() { + Some(Event::Scalar { .. }) => { + let node = self.source.take_scalar()?; + visit_scalar_any(&node, self.source.input, visitor) + } + Some(Event::SequenceStart { meta, .. }) | Some(Event::MappingStart { meta, .. }) + if meta.tag.is_some() => + { + self.deserialize_prepared_current_node(visitor) + } + Some(Event::SequenceStart { .. }) => self.deserialize_seq(visitor), + Some(Event::MappingStart { .. }) => self.deserialize_map(visitor), + Some(Event::Alias { anchor }) => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + Some(event) => Err(unexpected_event("node", event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Bool(value) => with_span(visitor.visit_bool(value), node.span), + _ => Err(type_error("bool", &node)), + } + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_i64_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_u64_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_i128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_i128_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_u128_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_f64(visitor) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_f64_number(number, node.span, visitor), + _ => Err(type_error("number", &node)), + } + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + let value = prepared_string_target_text(&node).ok_or_else(|| type_error("char", &node))?; + let mut chars = value.chars(); + match (chars.next(), chars.next()) { + (Some(ch), None) => with_span(visitor.visit_char(ch), node.span), + _ => Err(type_error("char", &node)), + } + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + let value = string_target_text(&node).ok_or_else(|| type_error("string", &node))?; + if let Some(borrowed) = borrowed_event_str(self.source.input, node.span, value) { + return visitor.visit_borrowed_str(borrowed); + } + visitor.visit_str(value) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + let value = string_target_text(&node).ok_or_else(|| type_error("string", &node))?; + visitor.visit_string(value.to_string()) + } + + fn deserialize_bytes(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + Err(type_error("bytes", &node)) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + if self.source.peek_is_null_scalar()? { + self.source.take_scalar()?; + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Null => visitor.visit_unit(), + _ => Err(type_error("unit/null", &node)), + } + } + + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + if self + .source + .peek_has_yaml_core_tag(&["set", "omap", "pairs"]) + { + return self.deserialize_prepared_current_seq(visitor); + } + match self.source.next()? { + Event::SequenceStart { .. } => visitor.visit_seq(EventSeqAccess { + source: self.source, + index: 0, + }), + event => Err(unexpected_event("sequence", &event)), + } + } + + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + if self.source.peek_has_yaml_core_tag(&["omap"]) { + return self.deserialize_prepared_current_map(visitor); + } + if self.source.next_mapping_has_merge_key()? { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + return de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor); + } + self.source.validate_next_mapping_duplicates()?; + match self.source.next()? { + Event::MappingStart { .. } => visitor.visit_map(EventMapAccess { + source: self.source, + value: None, + }), + event => Err(unexpected_event("mapping", &event)), + } + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + let Some(value) = prepared_string_target_text(&node) else { + return Err(type_error("enum string", &node)); + }; + visitor.visit_enum(value.to_string().into_deserializer()) + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.skip_node()?; + visitor.visit_unit() + } +} + +impl EventSource<'_> { + fn peek_has_yaml_core_tag(&self, suffixes: &[&str]) -> bool { + match self.peek() { + Some(Event::SequenceStart { meta, .. }) | Some(Event::MappingStart { meta, .. }) => { + meta.tag + .as_ref() + .is_some_and(|tag| suffixes.iter().any(|suffix| tag.tag.is_yaml_core(suffix))) + } + _ => false, + } + } + + fn peek_is_null_scalar(&self) -> Result { + let Some(Event::Scalar { + value, + style, + meta, + span, + }) = self.peek() + else { + return Ok(false); + }; + let node = self.scalar_from_event(value.clone(), *style, meta, *span)?; + Ok(prepared_is_null_node(&node)) + } +} + +struct EventSeqAccess<'a, 'de> { + source: &'a mut EventSource<'de>, + index: usize, +} + +impl<'de> SeqAccess<'de> for EventSeqAccess<'_, 'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: DeserializeSeed<'de>, + { + if matches!(self.source.peek(), Some(Event::SequenceEnd { .. })) { + self.source.next()?; + return Ok(None); + } + let index = self.index; + self.index += 1; + seed.deserialize(EventNodeDeserializer { + source: self.source, + }) + .map(Some) + .map_err(|error| error.prepend_path_segment(ErrorPathSegment::Index(index))) + } +} + +struct EventMapAccess<'a, 'de> { + source: &'a mut EventSource<'de>, + value: Option, +} + +impl<'de> MapAccess<'de> for EventMapAccess<'_, 'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + if matches!(self.source.peek(), Some(Event::MappingEnd { .. })) { + self.source.next()?; + return Ok(None); + } + let (events, pos) = self.source.current_events_and_pos(); + let mut scan_anchors = self.source.anchors.clone(); + let mut replayed_events = 0usize; + let segment = self + .source + .mapping_key_at(events, pos, &mut scan_anchors, &mut replayed_events)? + .map(|(node, _)| path_segment_for_node(&node)) + .unwrap_or(ErrorPathSegment::ComplexKey); + self.value = Some(segment.clone()); + seed.deserialize(EventNodeDeserializer { + source: self.source, + }) + .map(Some) + .map_err(|error| error.with_path_segment_if_empty(segment)) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + let segment = self + .value + .take() + .ok_or_else(|| Error::data("value requested before key", None))?; + seed.deserialize(EventNodeDeserializer { + source: self.source, + }) + .map_err(|error| error.prepend_path_segment(segment)) + } +} + +struct PreparedNodeDeserializer { + node: Node, +} + +struct PreparedSeqAccess { + items: std::vec::IntoIter, + index: usize, +} + +impl<'de> SeqAccess<'de> for PreparedSeqAccess { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: DeserializeSeed<'de>, + { + let Some(node) = self.items.next() else { + return Ok(None); + }; + let index = self.index; + self.index += 1; + seed.deserialize(PreparedNodeDeserializer { node }) + .map(Some) + .map_err(|error| error.prepend_path_segment(ErrorPathSegment::Index(index))) + } +} + +struct PreparedMapAccess { + entries: std::vec::IntoIter<(Node, Node)>, + value: Option<(Node, ErrorPathSegment)>, +} + +impl<'de> MapAccess<'de> for PreparedMapAccess { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + let Some((key, value)) = self.entries.next() else { + return Ok(None); + }; + let segment = path_segment_for_node(&key); + self.value = Some((value, segment.clone())); + seed.deserialize(PreparedNodeDeserializer { node: key }) + .map(Some) + .map_err(|error| error.with_path_segment_if_empty(segment)) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + let (node, segment) = self + .value + .take() + .ok_or_else(|| Error::data("value requested before key", None))?; + seed.deserialize(PreparedNodeDeserializer { node }) + .map_err(|error| error.prepend_path_segment(segment)) + } +} + +impl<'de> de::Deserializer<'de> for PreparedNodeDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let span = self.node.span; + match self.node.value { + NodeValue::Null => visitor.visit_unit(), + NodeValue::Bool(value) => visitor.visit_bool(value), + NodeValue::Number(number) => visit_any_number(number, span, visitor), + NodeValue::String(value) => visitor.visit_string(value), + NodeValue::Sequence(items) => visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }), + NodeValue::Mapping(entries) => visitor.visit_map(PreparedMapAccess { + entries: entries.into_iter(), + value: None, + }), + NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { + tag: tagged.tag, + value: tagged.value, + }), + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Bool(value) => with_span(visitor.visit_bool(value), node.span), + _ => Err(type_error("bool", &node)), + } + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_i64_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_u64_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_i128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_i128_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_u128_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_f64(visitor) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_f64_number(number, node.span, visitor), + _ => Err(type_error("number", &node)), + } + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + let value = prepared_string_target_text(&node).ok_or_else(|| type_error("char", &node))?; + let mut chars = value.chars(); + match (chars.next(), chars.next()) { + (Some(ch), None) => with_span(visitor.visit_char(ch), node.span), + _ => Err(type_error("char", &node)), + } + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + let value = + prepared_string_target_text(&node).ok_or_else(|| type_error("string", &node))?; + visitor.visit_string(value.to_string()) + } + + fn deserialize_bytes(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + Err(type_error("bytes", &node)) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if prepared_is_null_node(&self.node) { + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Null => visitor.visit_unit(), + _ => Err(type_error("unit/null", &node)), + } + } + + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if yaml11_set_entries_node(&self.node)?.is_some() { + let entries = take_yaml11_set_entries_node(self.node).expect("checked explicit !!set"); + let items = yaml11_set_key_nodes(entries)?; + return visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }); + } + if yaml11_pair_items_node(&self.node, "omap")?.is_some() { + let items = + take_yaml11_pair_items_node(self.node, "omap").expect("checked explicit !!omap"); + let items = yaml11_pair_sequence_nodes(items, "omap")?; + return visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }); + } + if yaml11_pair_items_node(&self.node, "pairs")?.is_some() { + let items = + take_yaml11_pair_items_node(self.node, "pairs").expect("checked explicit !!pairs"); + let items = yaml11_pair_sequence_nodes(items, "pairs")?; + return visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }); + } + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Sequence(items) => visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }), + _ => Err(type_error("sequence", &node)), + } + } + + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if let Some(items) = yaml11_pair_items_node(&self.node, "omap")? { + validate_yaml11_omap_node_keys(items)?; + let items = + take_yaml11_pair_items_node(self.node, "omap").expect("checked explicit !!omap"); + let entries = yaml11_pair_entries(items, "omap")?; + return visitor.visit_map(PreparedMapAccess { + entries: entries.into_iter(), + value: None, + }); + } + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Mapping(entries) => visitor.visit_map(PreparedMapAccess { + entries: entries.into_iter(), + value: None, + }), + _ => Err(type_error("mapping", &node)), + } + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + match self.node.value { + NodeValue::String(variant) => visitor.visit_enum(variant.into_deserializer()), + NodeValue::Mapping(entries) if entries.len() == 1 => { + let mut entries = entries.into_iter(); + let (key, value) = entries.next().expect("length checked"); + visitor.visit_enum(PreparedEnumDeserializer { + key, + value: Some(value), + }) + } + NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { + tag: tagged.tag, + value: tagged.value, + }), + _ => Err(type_error("enum string or single-key mapping", &self.node)), + } + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } +} + +struct PreparedEnumDeserializer { + key: Node, + value: Option, +} + +impl<'de> EnumAccess<'de> for PreparedEnumDeserializer { + type Error = Error; + type Variant = Self; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: DeserializeSeed<'de>, + { + let variant = seed.deserialize(PreparedNodeDeserializer { + node: self.key.clone(), + })?; + Ok((variant, self)) + } +} + +impl<'de> VariantAccess<'de> for PreparedEnumDeserializer { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + match self.value { + None => Ok(()), + Some(node) if matches!(node.value, NodeValue::Null) => Ok(()), + Some(node) => Err(type_error("unit enum variant", &node)), + } + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + let node = self + .value + .ok_or_else(|| Error::data("newtype variant requires a value", None))?; + seed.deserialize(PreparedNodeDeserializer { node }) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self + .value + .ok_or_else(|| Error::data("tuple variant requires a value", None))?; + de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node }, visitor) + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self + .value + .ok_or_else(|| Error::data("struct variant requires a value", None))?; + de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor) + } +} + +struct PreparedTaggedEnumDeserializer { + tag: Tag, + value: Node, +} + +impl<'de> EnumAccess<'de> for PreparedTaggedEnumDeserializer { + type Error = Error; + type Variant = Self; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: DeserializeSeed<'de>, + { + let variant = + seed.deserialize(self.tag.serde_variant().into_owned().into_deserializer())?; + Ok((variant, self)) + } +} + +impl<'de> VariantAccess<'de> for PreparedTaggedEnumDeserializer { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + if prepared_is_null_node(&self.value) { + Ok(()) + } else { + Err(type_error("unit enum variant", &self.value)) + } + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + seed.deserialize(PreparedNodeDeserializer { node: self.value }) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node: self.value }, visitor) + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_map(PreparedNodeDeserializer { node: self.value }, visitor) + } +} + +fn explicit_core_tagged_node<'a>(mut node: &'a Node, suffix: &str) -> Option<&'a Node> { + while let NodeValue::Tagged(tagged) = &node.value { + if tagged.tag.is_yaml_core(suffix) { + return Some(&tagged.value); + } + node = &tagged.value; + } + None +} + +fn take_explicit_core_tagged_node(mut node: Node, suffix: &str) -> Option { + loop { + match node.value { + NodeValue::Tagged(tagged) if tagged.tag.is_yaml_core(suffix) => { + return Some(tagged.value); + } + NodeValue::Tagged(tagged) => node = tagged.value, + _ => return None, + } + } +} + +fn yaml11_set_entries_node(node: &Node) -> Result> { + let Some(value) = explicit_core_tagged_node(node, "set") else { + return Ok(None); + }; + match &value.value { + NodeValue::Mapping(entries) => Ok(Some(entries)), + _ => Err(type_error("mapping for explicit !!set", value)), + } +} + +fn take_yaml11_set_entries_node(node: Node) -> Option> { + let value = take_explicit_core_tagged_node(node, "set")?; + match value.value { + NodeValue::Mapping(entries) => Some(entries), + _ => None, + } +} + +fn yaml11_pair_items_node<'a>(node: &'a Node, suffix: &'static str) -> Result> { + let Some(value) = explicit_core_tagged_node(node, suffix) else { + return Ok(None); + }; + match &value.value { + NodeValue::Sequence(items) => Ok(Some(items)), + _ => Err(Error::data( + format!("expected sequence for explicit !!{suffix}"), + Some(value.span), + )), + } +} + +fn take_yaml11_pair_items_node(node: Node, suffix: &'static str) -> Option> { + let value = take_explicit_core_tagged_node(node, suffix)?; + match value.value { + NodeValue::Sequence(items) => Some(items), + _ => None, + } +} + +fn validate_yaml11_omap_node_keys(items: &[Node]) -> Result<()> { + let mut seen = DuplicateKeyTracker::new(); + for item in items { + let (key, _) = yaml11_singleton_pair_node(item, "omap")?; + check_duplicate_with_tracker_at_depth_limit( + &mut seen, + key, + 1, + Some(crate::schema::DEFAULT_MAX_NESTING_DEPTH), + )?; + } + Ok(()) +} + +fn yaml11_set_key_nodes(entries: Vec<(Node, Node)>) -> Result> { + entries + .into_iter() + .map(|(key, value)| { + ensure_yaml11_set_null_node(&value)?; + Ok(key) + }) + .collect() +} + +fn ensure_yaml11_set_null_node(value: &Node) -> Result<()> { + if prepared_is_null_node(value) { + Ok(()) + } else { + Err(Error::data( + "expected explicit !!set entry value to be null", + Some(value.span), + )) + } +} + +fn yaml11_pair_sequence_nodes(items: Vec, suffix: &'static str) -> Result> { + items + .into_iter() + .map(|item| { + let span = item.span; + let (key, value) = take_yaml11_singleton_pair_node(item, suffix)?; + Ok(Node::new(NodeValue::Sequence(vec![key, value]), span)) + }) + .collect() +} + +fn yaml11_pair_entries(items: Vec, suffix: &'static str) -> Result> { + items + .into_iter() + .map(|item| take_yaml11_singleton_pair_node(item, suffix)) + .collect() +} + +fn yaml11_singleton_pair_node<'a>( + node: &'a Node, + suffix: &'static str, +) -> Result<(&'a Node, &'a Node)> { + let node = prepared_untag_node(node); + match &node.value { + NodeValue::Mapping(entries) if entries.len() == 1 => Ok((&entries[0].0, &entries[0].1)), + NodeValue::Mapping(_) => Err(Error::data( + format!("expected explicit !!{suffix} entry to contain exactly one pair"), + Some(node.span), + )), + _ => Err(Error::data( + format!("expected single-pair mapping entry for explicit !!{suffix}"), + Some(node.span), + )), + } +} + +fn take_yaml11_singleton_pair_node(node: Node, suffix: &'static str) -> Result<(Node, Node)> { + let node = prepared_untag_node_owned(node); + match node.value { + NodeValue::Mapping(entries) if entries.len() == 1 => { + let mut entries = entries.into_iter(); + entries.next().ok_or_else(|| { + Error::data( + "internal: singleton mapping lost its entry", + Some(node.span), + ) + }) + } + NodeValue::Mapping(_) => Err(Error::data( + format!("expected explicit !!{suffix} entry to contain exactly one pair"), + Some(node.span), + )), + _ => Err(Error::data( + format!("expected single-pair mapping entry for explicit !!{suffix}"), + Some(node.span), + )), + } +} + +fn prepared_untag_node(mut node: &Node) -> &Node { + while let NodeValue::Tagged(tagged) = &node.value { + node = &tagged.value; + } + node +} + +fn prepared_untag_node_owned(node: Node) -> Node { + let Node { + value, + span, + source, + } = node; + match value { + NodeValue::Tagged(tagged) => prepared_untag_node_owned(tagged.value), + value => Node { + value, + span, + source, + }, + } +} + +fn prepared_is_null_node(node: &Node) -> bool { + match &node.value { + NodeValue::Null => true, + NodeValue::Tagged(tagged) => prepared_is_null_node(&tagged.value), + _ => false, + } +} + +fn prepared_string_target_text(node: &Node) -> Option<&str> { + match &node.value { + NodeValue::Tagged(tagged) => prepared_string_target_text(&tagged.value), + _ => string_target_text(node), + } +} + +fn visit_scalar_any<'de, V>(node: &Node, input: &'de str, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match &node.value { + NodeValue::Null => visitor.visit_unit(), + NodeValue::Bool(value) => visitor.visit_bool(*value), + NodeValue::Number(number) => visit_any_number(*number, node.span, visitor), + NodeValue::String(value) => { + if let Some(borrowed) = borrowed_event_str(input, node.span, value) { + visitor.visit_borrowed_str(borrowed) + } else { + visitor.visit_str(value) + } + } + NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { + tag: tagged.tag.clone(), + value: tagged.value.clone(), + }), + NodeValue::Sequence(_) | NodeValue::Mapping(_) => Err(type_error("scalar", node)), + } +} + +fn string_target_text(node: &Node) -> Option<&str> { + match &node.value { + NodeValue::String(value) => Some(value), + NodeValue::Null => Some("null"), + NodeValue::Bool(value) => Some(if *value { "true" } else { "false" }), + NodeValue::Number(_) => node.scalar_source().map(|source| source.raw()), + NodeValue::Tagged(tagged) => string_target_text(&tagged.value), + NodeValue::Sequence(_) | NodeValue::Mapping(_) => None, + } +} + +fn borrowed_event_str<'de>(input: &'de str, span: Span, value: &str) -> Option<&'de str> { + let raw = input.get(span.start..span.end)?; + if raw == value { + return Some(raw); + } + let quote = raw.chars().next()?; + if !matches!(quote, '"' | '\'') || !raw.ends_with(quote) || raw.len() < 2 { + return None; + } + let inner = &raw[quote.len_utf8()..raw.len() - quote.len_utf8()]; + (inner == value).then_some(inner) +} + +fn path_segment_for_node(node: &Node) -> ErrorPathSegment { + match &node.value { + NodeValue::String(value) => ErrorPathSegment::Key(value.clone()), + NodeValue::Bool(value) => ErrorPathSegment::ScalarKey(value.to_string()), + NodeValue::Number(number) => ErrorPathSegment::ScalarKey(number.to_string()), + NodeValue::Null => ErrorPathSegment::ScalarKey("null".to_string()), + NodeValue::Sequence(_) | NodeValue::Mapping(_) | NodeValue::Tagged(_) => { + ErrorPathSegment::ComplexKey + } + } +} + +fn with_span(result: Result, span: Span) -> Result { + result.map_err(|error| error.with_span_if_missing(span)) +} + +fn visit_i64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => match i64::try_from(value) { + Ok(value) => with_span(visitor.visit_i64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for i64", + Some(span), + )), + }, + Number::Unsigned(value) => match i64::try_from(value) { + Ok(value) => with_span(visitor.visit_i64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for i64", + Some(span), + )), + }, + Number::Float(_) => Err(Error::data("expected integer, found float", Some(span))), + } +} + +fn visit_u64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) if value >= 0 => match u64::try_from(value) { + Ok(value) => with_span(visitor.visit_u64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for u64", + Some(span), + )), + }, + Number::Unsigned(value) => match u64::try_from(value) { + Ok(value) => with_span(visitor.visit_u64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for u64", + Some(span), + )), + }, + Number::Integer(_) => Err(Error::data( + "expected unsigned integer, found integer", + Some(span), + )), + Number::Float(_) => Err(Error::data( + "expected unsigned integer, found float", + Some(span), + )), + } +} + +fn visit_i128_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => with_span(visitor.visit_i128(value), span), + Number::Unsigned(value) => match i128::try_from(value) { + Ok(value) => with_span(visitor.visit_i128(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for i128", + Some(span), + )), + }, + Number::Float(_) => Err(Error::data("expected integer, found float", Some(span))), + } +} + +fn visit_u128_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) if value >= 0 => { + let value = u128::try_from(value).expect("non-negative i128 fits u128"); + with_span(visitor.visit_u128(value), span) + } + Number::Unsigned(value) => with_span(visitor.visit_u128(value), span), + Number::Integer(_) => Err(Error::data( + "expected unsigned integer, found integer", + Some(span), + )), + Number::Float(_) => Err(Error::data( + "expected unsigned integer, found float", + Some(span), + )), + } +} + +fn visit_f64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => with_span(visitor.visit_f64(value as f64), span), + Number::Unsigned(value) => with_span(visitor.visit_f64(value as f64), span), + Number::Float(value) => with_span(visitor.visit_f64(value), span), + } +} + +fn visit_any_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => match i64::try_from(value) { + Ok(value) => with_span(visitor.visit_i64(value), span), + Err(_) => with_span(visitor.visit_i128(value), span), + }, + Number::Unsigned(value) => match u64::try_from(value) { + Ok(value) => with_span(visitor.visit_u64(value), span), + Err(_) => with_span(visitor.visit_u128(value), span), + }, + Number::Float(value) => with_span(visitor.visit_f64(value), span), + } +} + +fn type_error(expected: &'static str, node: &Node) -> Error { + Error::data( + format!("expected {expected}, found {}", kind_name(&node.value)), + Some(node.span), + ) +} + +fn kind_name(value: &NodeValue) -> &'static str { + match value { + NodeValue::Null => "null", + NodeValue::Bool(_) => "bool", + NodeValue::Number(Number::Integer(_)) => "integer", + NodeValue::Number(Number::Unsigned(_)) => "unsigned integer", + NodeValue::Number(Number::Float(_)) => "float", + NodeValue::String(_) => "string", + NodeValue::Sequence(_) => "sequence", + NodeValue::Mapping(_) => "mapping", + NodeValue::Tagged(_) => "tagged value", + } +} + +fn unexpected_event(expected: &'static str, event: &Event) -> Error { + Error::data( + format!("expected {expected}, found {}", event_kind(event)), + event_span(event), + ) +} + +fn event_kind(event: &Event) -> &'static str { + match event { + Event::StreamStart => "stream start", + Event::StreamEnd => "stream end", + Event::DocumentStart { .. } => "document start", + Event::DocumentEnd { .. } => "document end", + Event::SequenceStart { .. } => "sequence start", + Event::SequenceEnd { .. } => "sequence end", + Event::MappingStart { .. } => "mapping start", + Event::MappingEnd { .. } => "mapping end", + Event::Alias { .. } => "alias", + Event::Scalar { .. } => "scalar", + } +} + +fn event_span(event: &Event) -> Option { + match event { + Event::DocumentStart { span, .. } + | Event::DocumentEnd { span, .. } + | Event::SequenceStart { span, .. } + | Event::SequenceEnd { span } + | Event::MappingStart { span, .. } + | Event::MappingEnd { span } + | Event::Scalar { span, .. } => Some(*span), + Event::Alias { anchor } => Some(anchor.span), + Event::StreamStart | Event::StreamEnd => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde::{Deserialize, de::IgnoredAny}; + use std::collections::{BTreeMap, BTreeSet}; + use std::io::{self, Cursor, Read}; + + struct FailingAfterPrefixReader { + prefix: Cursor>, + } + + impl FailingAfterPrefixReader { + fn new(prefix: &[u8]) -> Self { + Self { + prefix: Cursor::new(prefix.to_vec()), + } + } + } + + impl Read for FailingAfterPrefixReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let read = self.prefix.read(buf)?; + if read == 0 { + Err(io::Error::other("stream interrupted")) + } else { + Ok(read) + } + } + } + + #[derive(Debug, Deserialize, PartialEq)] + struct EventConfig<'a> { + name: &'a str, + ports: Vec, + enabled: bool, + labels: BTreeMap, + optional: Option, + } + + #[derive(Debug, Deserialize, PartialEq)] + struct OwnedEventConfig { + name: String, + ports: Vec, + enabled: bool, + labels: BTreeMap, + optional: Option, + } + + #[derive(Debug, Deserialize, PartialEq)] + struct ExplicitCoreScalars { + string_null: String, + optional_string_null: Option, + string_bool: String, + yes: bool, + off: bool, + maybe: Option, + unit: (), + } + + #[derive(Debug, Deserialize, PartialEq)] + struct ExplicitCoreNumbers { + integer: i64, + unsigned: u64, + float: f64, + } + + #[derive(Debug, Deserialize, PartialEq)] + struct TargetMap { + target: BTreeMap, + } + + #[derive(Debug, Deserialize, PartialEq)] + struct TargetValueMap { + target: BTreeMap, + } + + #[derive(Debug, Deserialize, PartialEq)] + struct KnownOnly { + name: String, + } + + fn assert_value_tagged_key( + mapping: &crate::Value, + expected_tag: crate::Tag, + expected_key: &str, + expected_value: &str, + ) { + let mapping = mapping.as_mapping().expect("mapping value"); + assert!( + mapping.iter().any(|(key, value)| { + matches!(key, crate::Value::Tagged(tagged) + if tagged.tag == expected_tag + && tagged.value.as_str() == Some(expected_key) + && value.as_str() == Some(expected_value)) + }), + "expected tagged key {expected_tag:?} {expected_key:?}: {expected_value:?}" + ); + } + + #[test] + fn event_deserializer_reads_typed_structs() { + let input = "\ +name: api +ports: [80, 443] +enabled: true +labels: + tier: backend + release: stable +optional: null +"; + + let parsed: EventConfig<'_> = + from_str_with_options(input, LoadOptions::new()).expect("event-backed typed config"); + assert_eq!(parsed.name, "api"); + assert!(std::ptr::eq(parsed.name.as_ptr(), input[6..9].as_ptr())); + assert_eq!(parsed.ports, vec![80, 443]); + assert!(parsed.enabled); + assert_eq!(parsed.labels["tier"], "backend"); + assert_eq!(parsed.labels["release"], "stable"); + assert_eq!(parsed.optional, None); + } + + #[test] + fn event_deserializer_rejects_duplicate_scalar_keys() { + let input = "labels:\n tier: backend\n tier: worker\n"; + let error = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect_err("event-backed duplicate keys reject"); + assert!(error.to_string().contains("duplicate mapping key")); + } + + #[test] + fn event_deserializer_rejects_duplicate_sequence_alias_mapping_keys() { + let input = "seq: &seq [a, b]\nroot: {? *seq : first, ? [a, b] : second}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("event-backed alias-expanded sequence keys reject"); + + assert!(error.to_string().contains("duplicate mapping key")); + } + + #[test] + fn event_deserializer_rejects_duplicate_mapping_alias_keys_order_insensitively() { + let input = "base: &base {a: 1, b: 2}\nroot: {? *base : first, ? {b: 2, a: 1} : second}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("event-backed alias-expanded mapping keys reject"); + + assert!(error.to_string().contains("duplicate mapping key")); + } + + #[test] + fn event_deserializer_accepts_distinct_complex_alias_mapping_keys() { + let input = "seq: &seq [a, b]\nroot: {? *seq : first, ? [a, c] : second}\n"; + + from_str_with_options::(input, LoadOptions::new()) + .expect("distinct complex alias keys pass duplicate preflight"); + } + + #[test] + fn event_deserializer_rejects_recursive_alias_mapping_keys() { + let input = "root: {? &self [*self] : value}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("recursive alias key rejects"); + + assert!(error.to_string().contains("recursive alias")); + } + + #[test] + fn event_deserializer_rejects_complex_alias_mapping_keys_over_budget() { + let input = "seq: &seq [a, b]\nroot: {? *seq : first}\n"; + let error = from_str_with_options::( + input, + LoadOptions::new().max_alias_expansion_nodes(1), + ) + .expect_err("complex alias key replay budget rejects"); + + assert!( + error + .to_string() + .contains("alias event replay limit exceeded") + ); + } + + #[test] + fn event_deserializer_expands_merge_keys() { + let input = "\ +base: &base + retries: 3 + command: deploy +target: + <<: *base + command: smoke +"; + let parsed = + from_str_with_options::(input, LoadOptions::new()).expect("merge keys"); + + assert_eq!(parsed.target["retries"], "3"); + assert_eq!(parsed.target["command"], "smoke"); + } + + #[test] + fn event_deserializer_expands_merge_lists_with_earlier_sources_winning() { + let input = "\ +base1: &base1 {a: one, shared: first} +base2: &base2 {b: two, shared: second} +target: {<<: [*base1, *base2], local: ok} +"; + let parsed = + from_str_with_options::(input, LoadOptions::new()).expect("merge list"); + + assert_eq!(parsed.target["a"], "one"); + assert_eq!(parsed.target["b"], "two"); + assert_eq!(parsed.target["shared"], "first"); + assert_eq!(parsed.target["local"], "ok"); + } + + #[test] + fn event_deserializer_expands_explicit_merge_tag_keys() { + let input = "\ +%TAG !m! tag:yaml.org,2002: +--- +base: &base {a: one, shared: base} +tagged: {!!merge <<: *base, shared: tagged} +canonical: {! <<: *base, shared: canonical} +handle: {!m!merge <<: *base, shared: handle} +"; + let parsed = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect("explicit merge tag keys"); + + for (key, expected_shared) in [ + ("tagged", "tagged"), + ("canonical", "canonical"), + ("handle", "handle"), + ] { + assert_eq!(parsed[key]["a"], "one"); + assert_eq!(parsed[key]["shared"], expected_shared); + } + } + + #[test] + fn event_deserializer_keeps_explicit_string_merge_key_literal() { + let input = "base: &base {!!str <<: literal, a: one}\ntarget: {<<: *base}\n"; + let parsed = from_str_with_options::(input, LoadOptions::new()) + .expect("explicit string merge key stays literal"); + + assert_eq!(parsed.target["a"], "one"); + assert_eq!(parsed.target["<<"], "literal"); + } + + #[test] + fn event_deserializer_reports_invalid_merge_payloads() { + let input = "target: {<<: scalar}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("invalid merge payload rejects"); + + assert!( + error + .to_string() + .contains("expected a mapping or list of mappings for merging"), + "{error}" + ); + } + + #[test] + fn event_deserializer_skips_valid_merge_maps_for_ignored_values() { + let input = "base: &base {a: one}\nname: app\nignored: {<<: *base, b: two}\n"; + let parsed = from_str_with_options::(input, LoadOptions::new()) + .expect("unknown merge-bearing field is skipped"); + + assert_eq!(parsed.name, "app"); + from_str_with_options::(input, LoadOptions::new()) + .expect("ignored-any skips merge-bearing maps"); + } + + #[test] + fn event_deserializer_rejects_invalid_merge_payloads_in_ignored_values() { + let input = "name: app\nignored: {<<: scalar}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("strict invalid merge payload rejects while skipping"); + + assert!( + error + .to_string() + .contains("expected a mapping or list of mappings for merging"), + "{error}" + ); + } + + #[test] + fn event_deserializer_yaml11_skips_literal_merge_payload_in_ignored_value() { + let input = "%YAML 1.1\n---\nname: app\nignored: {<<: scalar, keep: value}\n"; + let parsed = + from_str_with_options::(input, LoadOptions::yaml_version_directive()) + .expect("directive-driven YAML 1.1 literal merge payload is skipped"); + + assert_eq!(parsed.name, "app"); + } + + #[test] + fn event_deserializer_rejects_repeated_merge_keys_by_default() { + let input = "\ +first: &first {shared: first} +second: &second {shared: second} +target: + <<: *first + !!merge <<: *second +"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("default repeated merge keys reject"); + + assert!(error.to_string().contains("duplicate mapping key `<<`")); + } + + #[test] + fn event_deserializer_yaml11_recovers_repeated_merge_keys() { + let input = "\ +first: &first {shared: first, retries: 3} +second: &second {shared: second, timeout: 10} +target: + <<: *first + ! <<: *second + keep: value +"; + let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) + .expect("YAML 1.1 repeated merge keys recover"); + + assert_eq!(parsed.target["shared"], "second"); + assert_eq!(parsed.target["retries"], "3"); + assert_eq!(parsed.target["timeout"], "10"); + assert_eq!(parsed.target["keep"], "value"); + } + + #[test] + fn event_deserializer_yaml11_keeps_scalar_merge_payload_literal() { + let input = "\ +target: + <<: scalar + keep: value +"; + let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) + .expect("YAML 1.1 scalar merge payload stays literal"); + + assert_eq!(parsed.target["<<"], "scalar"); + assert_eq!(parsed.target["keep"], "value"); + } + + #[test] + fn event_deserializer_yaml11_keeps_mixed_invalid_merge_list_literal() { + let input = "\ +base: &base {a: 1} +target: + <<: [*base, scalar] + keep: value +"; + let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) + .expect("YAML 1.1 mixed invalid merge list stays literal"); + let merge = &parsed.target["<<"]; + let merge = merge.as_sequence().expect("literal merge list"); + + assert_eq!(merge[0]["a"].as_u64(), Some(1)); + assert_eq!(merge[1].as_str(), Some("scalar")); + assert_eq!(parsed.target["keep"].as_str(), Some("value")); + } + + #[test] + fn event_deserializer_reads_explicit_core_scalar_tags() { + let input = "\ +string_null: !!str null +optional_string_null: !!str null +string_bool: !!str true +yes: !!bool YES +off: !!bool off +maybe: !!null null +unit: !!null ~ +"; + let parsed = + from_str_with_options::(input, LoadOptions::new()).unwrap(); + + assert_eq!( + parsed, + ExplicitCoreScalars { + string_null: "null".to_string(), + optional_string_null: Some("null".to_string()), + string_bool: "true".to_string(), + yes: true, + off: false, + maybe: None, + unit: (), + } + ); + } + + #[test] + fn event_deserializer_reads_explicit_core_numeric_tags() { + let input = "integer: !!int \"42\"\nunsigned: !!int 0x2A\nfloat: !!float \"1.5\"\n"; + let parsed = + from_str_with_options::(input, LoadOptions::new()).unwrap(); + + assert_eq!( + parsed, + ExplicitCoreNumbers { + integer: 42, + unsigned: 42, + float: 1.5, + } + ); + } + + #[test] + fn event_deserializer_explicit_tags_follow_directive_schema() { + let parsed = from_str_with_options::( + "%YAML 1.1\n--- !!bool YES\n", + LoadOptions::yaml_version_directive(), + ) + .expect("directive-driven explicit bool"); + + assert!(parsed); + } + + #[test] + fn event_deserializer_rejects_invalid_explicit_core_scalar_tags() { + let bool_error = from_str_with_options::("!!bool maybe\n", LoadOptions::new()) + .expect_err("invalid explicit bool"); + assert!( + bool_error + .to_string() + .contains("failed to parse explicit !!bool scalar"), + "{bool_error}" + ); + + let str_error = from_str_with_options::("!!str 7\n", LoadOptions::new()) + .expect_err("explicit string does not coerce to integer"); + assert!(str_error.to_string().contains("expected integer")); + } + + #[test] + fn event_deserializer_retains_tagged_scalars_for_value_and_unwraps_typed_strings() { + let value = from_str_with_options::("!Thing tagged\n", LoadOptions::new()) + .expect("custom tagged scalar value"); + let tagged = value.as_tagged().expect("custom tag retained"); + + assert_eq!(tagged.tag, crate::Tag::new("Thing")); + assert_eq!(tagged.value.as_str(), Some("tagged")); + + let typed = from_str_with_options::("!Thing tagged\n", LoadOptions::new()) + .expect("typed string unwraps custom tag"); + assert_eq!(typed, "tagged"); + + let explicit = from_str_with_options::("!!str null\n", LoadOptions::new()) + .expect("explicit core string tag value"); + let tagged = explicit.as_tagged().expect("explicit core tag retained"); + assert_eq!(tagged.tag, crate::Tag::new("!!str")); + assert_eq!(tagged.value.as_str(), Some("null")); + } + + #[test] + fn event_deserializer_retains_tagged_collections_for_value_and_unwraps_typed_targets() { + #[derive(Debug, Deserialize, PartialEq)] + struct TaggedCollections { + seq: Vec, + map: BTreeMap, + } + + let input = "seq: !Seq [a, b]\nmap: !Map {k: v}\n"; + let value = + from_str_with_options::(input, LoadOptions::new()).expect("value"); + + let sequence = value["seq"].as_tagged().expect("sequence tag retained"); + assert_eq!(sequence.tag, crate::Tag::new("Seq")); + assert_eq!( + sequence + .value + .as_sequence() + .expect("sequence payload") + .len(), + 2 + ); + assert_eq!(sequence.value[0].as_str(), Some("a")); + assert_eq!(sequence.value[1].as_str(), Some("b")); + + let mapping = value["map"].as_tagged().expect("mapping tag retained"); + assert_eq!(mapping.tag, crate::Tag::new("Map")); + assert_eq!(mapping.value["k"].as_str(), Some("v")); + + let typed = from_str_with_options::(input, LoadOptions::new()) + .expect("typed collections unwrap tags"); + assert_eq!( + typed, + TaggedCollections { + seq: vec!["a".to_string(), "b".to_string()], + map: BTreeMap::from([("k".to_string(), "v".to_string())]), + } + ); + + let top_value = from_str_with_options::("!Seq [a, b]\n", LoadOptions::new()) + .expect("top-level tagged sequence value"); + let tagged = top_value.as_tagged().expect("top-level tag retained"); + assert_eq!(tagged.tag, crate::Tag::new("Seq")); + assert_eq!(tagged.value[1].as_str(), Some("b")); + + let top_typed = from_str_with_options::>("!Seq [a, b]\n", LoadOptions::new()) + .expect("top-level typed sequence unwraps tag"); + assert_eq!(top_typed, ["a", "b"]); + } + + #[test] + fn event_deserializer_projects_yaml11_collection_tags_for_typed_targets() { + let set = from_str_with_options::>( + "!!set\n? alpha\n? beta\n", + LoadOptions::new(), + ) + .expect("typed !!set"); + assert_eq!( + set, + BTreeSet::from(["alpha".to_string(), "beta".to_string()]) + ); + + let omap_pairs = from_str_with_options::>( + "!!omap\n- first: 1\n- second: 2\n", + LoadOptions::new(), + ) + .expect("typed !!omap pair sequence"); + assert_eq!( + omap_pairs, + vec![("first".to_string(), 1), ("second".to_string(), 2)] + ); + + let omap_map = from_str_with_options::>( + "!!omap\n- second: 2\n- first: 1\n", + LoadOptions::new(), + ) + .expect("typed !!omap map"); + assert_eq!( + omap_map, + BTreeMap::from([("first".to_string(), 1), ("second".to_string(), 2)]) + ); + + let pairs = from_str_with_options::>( + "!!pairs\n- repeat: 1\n- repeat: 2\n", + LoadOptions::new(), + ) + .expect("typed !!pairs preserves duplicate keys"); + assert_eq!( + pairs, + vec![("repeat".to_string(), 1), ("repeat".to_string(), 2)] + ); + } + + #[test] + fn event_deserializer_rejects_lossy_yaml11_collection_tag_shapes() { + let duplicate = from_str_with_options::>( + "!!omap\n- z: 1\n- a: 2\n- z: 3\n", + LoadOptions::new(), + ) + .expect_err("typed !!omap map rejects duplicate keys"); + assert!(duplicate.to_string().contains("duplicate mapping key `z`")); + + let set_error = + from_str_with_options::>("!!set {alpha: true}\n", LoadOptions::new()) + .expect_err("typed !!set rejects non-null values"); + assert!( + set_error + .to_string() + .contains("expected explicit !!set entry value to be null"), + "{set_error}" + ); + + let omap_error = from_str_with_options::>( + "!!omap\n- {a: 1, b: 2}\n", + LoadOptions::new(), + ) + .expect_err("typed !!omap rejects multi-pair entries"); + assert!( + omap_error + .to_string() + .contains("expected explicit !!omap entry to contain exactly one pair"), + "{omap_error}" + ); + + let pairs_error = + from_str_with_options::>("!!pairs\n- scalar\n", LoadOptions::new()) + .expect_err("typed !!pairs rejects scalar entries"); + assert!( + pairs_error + .to_string() + .contains("expected single-pair mapping entry for explicit !!pairs"), + "{pairs_error}" + ); + } + + #[test] + fn event_deserializer_retains_tagged_merge_maps_for_value_and_unwraps_typed_targets() { + let input = "base: &base {a: one}\ntarget: !Thing {<<: *base, b: two}\n"; + let value = from_str_with_options::(input, LoadOptions::new()) + .expect("tagged merge map value"); + let tagged = value["target"].as_tagged().expect("target tag retained"); + + assert_eq!(tagged.tag, crate::Tag::new("Thing")); + assert_eq!(tagged.value["a"].as_str(), Some("one")); + assert_eq!(tagged.value["b"].as_str(), Some("two")); + + let typed = from_str_with_options::(input, LoadOptions::new()) + .expect("typed tagged merge map unwraps tag"); + assert_eq!(typed.target["a"], "one"); + assert_eq!(typed.target["b"], "two"); + } + + #[test] + fn event_deserializer_retains_tagged_literal_merge_keys_without_expansion() { + let input = "\ +custom: {!Thing <<: literal, image: app:custom} +string: {!!str <<: literal, image: app:string} +"; + let value = + from_str_with_options::(input, LoadOptions::new()).expect("tagged keys"); + + assert_value_tagged_key(&value["custom"], crate::Tag::new("Thing"), "<<", "literal"); + assert_value_tagged_key(&value["string"], crate::Tag::new("!!str"), "<<", "literal"); + assert_eq!(value["custom"]["image"].as_str(), Some("app:custom")); + assert_eq!(value["string"]["image"].as_str(), Some("app:string")); + + let typed = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect("typed maps unwrap tagged literal keys"); + assert_eq!(typed["custom"]["<<"], "literal"); + assert_eq!(typed["string"]["<<"], "literal"); + } + + #[test] + fn event_deserializer_replays_acyclic_scalar_aliases() { + let input = "base: &base api\nservice: *base\n"; + let parsed = from_str_with_options::>(input, LoadOptions::new()) + .expect("event-backed scalar alias replay"); + + assert_eq!(parsed["base"], "api"); + assert_eq!(parsed["service"], "api"); + } + + #[test] + fn event_deserializer_replays_acyclic_sequence_aliases() { + let input = "base: &base [api, worker]\nservice: *base\n"; + let parsed = + from_str_with_options::>>(input, LoadOptions::new()) + .expect("event-backed sequence alias replay"); + + assert_eq!(parsed["base"], ["api", "worker"]); + assert_eq!(parsed["service"], ["api", "worker"]); + } + + #[test] + fn event_deserializer_validates_alias_expanded_mapping_values() { + let input = "base: &base {a: one, b: two}\ntarget: *base\n"; + let parsed = + from_str_with_options::(input, LoadOptions::new()).expect("mapping alias"); + + assert_eq!(parsed.target["a"], "one"); + assert_eq!(parsed.target["b"], "two"); + } + + #[test] + fn event_deserializer_replays_scalar_alias_mapping_keys() { + let input = "root: {anchor: &svc service, ? *svc : api}\n"; + let parsed = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect("event-backed scalar alias mapping key replay"); + + assert_eq!(parsed["root"]["anchor"], "service"); + assert_eq!(parsed["root"]["service"], "api"); + } + + #[test] + fn event_deserializer_rejects_duplicate_alias_mapping_keys() { + let input = "root: {? &name name : api, ? *name : worker}\n"; + let error = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect_err("event-backed alias-expanded duplicate keys reject"); + assert!(error.to_string().contains("duplicate mapping key")); + } + + #[test] + fn event_deserializer_rejects_alias_replay_over_budget() { + let input = "base: &base api\nservice: *base\n"; + let error = from_str_with_options::>( + input, + LoadOptions::new().max_alias_expansion_nodes(0), + ) + .expect_err("event-backed alias replay budget rejects"); + + assert!( + error + .to_string() + .contains("alias event replay limit exceeded") + ); + } + + #[test] + fn event_deserializer_rejects_duplicate_keys_in_ignored_mappings() { + let input = "base: &base {a: one, a: two}\ntarget: *base\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("ignored anchor source duplicate keys reject"); + + assert!(error.to_string().contains("duplicate mapping key")); + } + + #[test] + fn event_deserializer_reads_multiple_documents() { + let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [8080]\nenabled: false\nlabels:\n tier: job\noptional: note\n"; + let parsed: Vec = + from_documents_str_with_options(input, LoadOptions::new()) + .expect("event-backed document stream"); + + assert_eq!(parsed.len(), 2); + assert_eq!(parsed[0].name, "api"); + assert_eq!(parsed[0].ports, vec![80]); + assert_eq!(parsed[1].name, "worker"); + assert_eq!(parsed[1].ports, vec![8080]); + assert_eq!(parsed[1].labels["tier"], "job"); + assert_eq!(parsed[1].optional.as_deref(), Some("note")); + } + + #[test] + fn event_document_iterator_yields_borrowed_typed_documents() { + let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [8080]\nenabled: false\nlabels: {}\noptional: null\n"; + let mut iter = document_iter_str_with_options::>(input, LoadOptions::new()) + .expect("event-backed document iterator"); + + let first = iter.next().expect("first document").expect("first parses"); + assert_eq!(first.name, "api"); + assert!(std::ptr::eq(first.name.as_ptr(), input[10..13].as_ptr())); + + let second = iter + .next() + .expect("second document") + .expect("second parses"); + assert_eq!(second.name, "worker"); + let worker_offset = input.find("worker").expect("worker text in input"); + assert!(std::ptr::eq( + second.name.as_ptr(), + input[worker_offset..worker_offset + "worker".len()].as_ptr() + )); + assert!(iter.next().is_none()); + } + + #[test] + fn event_document_iterator_continues_after_typed_document_error() { + let input = "\ +--- +name: api +ports: [80] +enabled: true +labels: {} +optional: null +--- +name: bad +ports: [70000] +enabled: true +labels: {} +optional: null +--- +name: worker +ports: [8080] +enabled: false +labels: {} +optional: null +"; + let mut iter = + document_iter_str_with_options::(input, LoadOptions::new()) + .expect("event-backed document iterator"); + + let first = iter.next().expect("first document").expect("first parses"); + assert_eq!(first.name, "api"); + + let error = iter + .next() + .expect("second document") + .expect_err("second document has typed range error"); + assert_eq!(error.document_index(), Some(1)); + assert!(error.to_string().contains("70000"), "{error}"); + + let third = iter.next().expect("third document").expect("third parses"); + assert_eq!(third.name, "worker"); + assert!(iter.next().is_none()); + } + + #[test] + fn event_document_iterator_defers_later_parse_error_and_then_stops() { + let input = "---\nname: one\n---\n:\tbad\n---\nname: never\n"; + let mut iter = document_iter_str_with_options::(input, LoadOptions::new()) + .expect("event-backed document iterator"); + + let first = iter.next().expect("first document").expect("first parses"); + assert_eq!(first.name, "one"); + + let error = iter + .next() + .expect("second document item") + .expect_err("later parser error"); + assert_eq!(error.document_index(), Some(1)); + assert_eq!(error.line(), Some(4)); + assert_eq!(error.column(), Some(2)); + assert!(iter.next().is_none()); + } + + #[test] + fn event_document_iterator_empty_stream_yields_no_documents() { + let mut iter = document_iter_str_with_options::("", LoadOptions::new()) + .expect("empty event-backed document iterator"); + + assert!(iter.next().is_none()); + let collected = from_documents_str_with_options::("", LoadOptions::new()) + .expect("empty document collection"); + assert!(collected.is_empty()); + } + + #[test] + fn event_document_iterator_slice_checks_utf8_and_input_limits() { + let invalid = match document_iter_slice_with_options::( + b"name: \xFF\n", + LoadOptions::new(), + ) { + Ok(_) => panic!("invalid UTF-8 should fail"), + Err(error) => error, + }; + assert!(invalid.to_string().contains("input is not valid UTF-8")); + + let limited = match document_iter_slice_with_options::( + b"name: app\n", + LoadOptions::new().max_input_bytes(4), + ) { + Ok(_) => panic!("input limit should fail"), + Err(error) => error, + }; + assert!( + limited + .to_string() + .contains("YAML input exceeds configured limit of 4 bytes") + ); + } + + #[test] + fn event_document_reader_iterator_uses_owned_input_and_preserves_merge_alias_semantics() { + let input = "\ +--- +base: &base {a: one} +target: {<<: *base, b: two} +--- +base: &base {a: three} +target: *base +"; + let docs = document_iter_reader_with_options::( + Cursor::new(input.as_bytes()), + LoadOptions::new(), + ) + .expect("reader-backed event iterator") + .collect::>>() + .expect("reader-backed documents"); + + assert_eq!(docs.len(), 2); + assert_eq!(docs[0].target["a"], "one"); + assert_eq!(docs[0].target["b"], "two"); + assert_eq!(docs[1].target["a"], "three"); + } + + #[test] + fn event_document_reader_iterator_reports_read_errors_before_iteration() { + let error = match document_iter_reader_with_options::( + FailingAfterPrefixReader::new(b"name: api\n"), + LoadOptions::new(), + ) { + Ok(_) => panic!("reader failure should reject iterator construction"), + Err(error) => error, + }; + + assert!(error.to_string().contains("failed to read YAML input")); + assert_eq!(error.location(), None); + } + + #[test] + fn event_deserializer_document_errors_carry_document_index() { + let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [70000]\nenabled: true\nlabels: {}\noptional: null\n"; + let error = from_documents_str_with_options::(input, LoadOptions::new()) + .expect_err("event-backed stream reports second document error"); + assert_eq!(error.document_index(), Some(1)); + } + + #[test] + fn event_deserializer_skips_ignored_any_without_materializing_values() { + let input = "root:\n - name: api\n ports: [80, 443]\n - nested:\n ok: true\n"; + IgnoredAny::deserialize(EventNodeDeserializer { + source: &mut EventSource::new( + input, + crate::parse::EventStream::from_str(input) + .expect("event stream") + .collect::>>() + .expect("events"), + Schema::Yaml12, + LoadOptions::new().alias_expansion_budget(input.len()), + ), + }) + .expect_err("raw stream markers must still be explicit"); + + from_str_with_options::(input, LoadOptions::new()).expect("ignored any"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 16b100e..b2bb279 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,7 @@ mod ast; mod de; mod emit; mod error; +mod event_de; mod key_identity; #[cfg(feature = "lossless")] pub mod lossless; @@ -44,6 +45,46 @@ pub mod stream { pub use crate::parse::{DocumentStream, EventStream}; } +#[doc(hidden)] +pub mod __unstable_event_serde { + use std::io::Read; + + use crate::{LoadOptions, Result}; + + pub fn from_documents_str(input: &str) -> Result> + where + T: serde::de::DeserializeOwned, + { + from_documents_str_with_options(input, LoadOptions::new()) + } + + pub fn from_documents_str_with_options(input: &str, options: LoadOptions) -> Result> + where + T: serde::de::DeserializeOwned, + { + crate::event_de::from_documents_str_with_options(input, options) + } + + pub fn from_documents_reader(reader: R) -> Result> + where + T: serde::de::DeserializeOwned, + R: Read, + { + from_documents_reader_with_options(reader, LoadOptions::new()) + } + + pub fn from_documents_reader_with_options( + reader: R, + options: LoadOptions, + ) -> Result> + where + T: serde::de::DeserializeOwned, + R: Read, + { + crate::event_de::document_iter_reader_with_options(reader, options)?.collect() + } +} + /// Mapping types and iterators for YAML [`Mapping`]. pub mod mapping { pub use crate::ast::{ diff --git a/src/parse.rs b/src/parse.rs index ee4df38..7705874 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -3741,7 +3741,10 @@ fn default_construction_schema(schema: Schema) -> Schema { } } -fn schema_for_directives(schema: Schema, directives: &EventDocumentDirectives) -> Schema { +pub(crate) fn schema_for_directives( + schema: Schema, + directives: &EventDocumentDirectives, +) -> Schema { match schema { Schema::YamlVersionDirective if directives @@ -3756,7 +3759,7 @@ fn schema_for_directives(schema: Schema, directives: &EventDocumentDirectives) - } } -fn merge_policy_for_schema(schema: Schema) -> MergePolicy { +pub(crate) fn merge_policy_for_schema(schema: Schema) -> MergePolicy { if schema.is_legacy_compatible() { MergePolicy::Yaml11Compatible } else { @@ -4660,7 +4663,7 @@ fn parse_scalar_with_span(text: &str, span: Span) -> Result { parse_scalar_with_schema(text, span, Schema::Yaml12) } -fn parse_scalar_with_schema(text: &str, span: Span, schema: Schema) -> Result { +pub(crate) fn parse_scalar_with_schema(text: &str, span: Span, schema: Schema) -> Result { parse_scalar_with_schema_and_source(text, span, schema, None) } From 0c28fed420a1f637a50a8fa18b01cb5bd3399c18 Mon Sep 17 00:00:00 2001 From: jskoiz <20649937+jskoiz@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:05:24 -1000 Subject: [PATCH 2/3] Bound event-backed alias depth and accept payload-carrying enum variants The event-backed deserializer expands aliases lazily while it walks, so the parser's literal nesting-depth check did not bound the expanded depth. A literally shallow document with a long alias chain recursed until the stack overflowed. Enforce the configured maximum nesting depth during alias expansion across the value walk, the ignored-node skip, and the key/merge materialization scan, matching the tree-backed alias handling, which rejects such documents instead of crashing. Also deserialize externally-tagged enum variants that carry a payload (newtype, tuple, and struct variants, plus tag-shorthand variants). The path previously accepted only bare-scalar unit variants and errored on every `{Variant: payload}` mapping. --- src/event_de.rs | 274 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 238 insertions(+), 36 deletions(-) diff --git a/src/event_de.rs b/src/event_de.rs index 1302941..76e55b9 100644 --- a/src/event_de.rs +++ b/src/event_de.rs @@ -24,9 +24,16 @@ where { let configured_schema = options.selected_schema(); let replay_budget = options.alias_expansion_budget(input.len()); + let max_nesting_depth = options.selected_max_nesting_depth(); let events = crate::parse::EventStream::from_str_with_options(input, options)? .collect::>>()?; - let mut source = EventSource::new(input, events, configured_schema, replay_budget); + let mut source = EventSource::new( + input, + events, + configured_schema, + replay_budget, + max_nesting_depth, + ); source.enter_stream()?; source.enter_document()?; let value = T::deserialize(EventNodeDeserializer { @@ -63,11 +70,13 @@ where { let configured_schema = options.selected_schema(); let replay_budget = options.alias_expansion_budget(input.len()); + let max_nesting_depth = options.selected_max_nesting_depth(); Ok(EventDocumentIter { input, frames: EventDocumentFrames::from_str_with_options(input, options)?, configured_schema, replay_budget, + max_nesting_depth, _marker: PhantomData, }) } @@ -102,12 +111,14 @@ where })?; let configured_schema = options.selected_schema(); let replay_budget = options.alias_expansion_budget(input.len()); + let max_nesting_depth = options.selected_max_nesting_depth(); let frames = EventDocumentFrames::from_str_with_options(&input, options)?; Ok(OwnedEventDocumentIter { input, frames, configured_schema, replay_budget, + max_nesting_depth, _marker: PhantomData, }) } @@ -117,6 +128,7 @@ pub(crate) struct EventDocumentIter<'de, T> { frames: EventDocumentFrames, configured_schema: Schema, replay_budget: usize, + max_nesting_depth: Option, _marker: PhantomData, } @@ -136,6 +148,7 @@ where events, self.configured_schema, self.replay_budget, + self.max_nesting_depth, ) }) .map_err(|error| error.with_document_index(index)), @@ -148,6 +161,7 @@ pub(crate) struct OwnedEventDocumentIter { frames: EventDocumentFrames, configured_schema: Schema, replay_budget: usize, + max_nesting_depth: Option, _marker: PhantomData, } @@ -167,6 +181,7 @@ where events, self.configured_schema, self.replay_budget, + self.max_nesting_depth, ) }) .map_err(|error| error.with_document_index(index)), @@ -273,11 +288,18 @@ fn deserialize_document_frame<'de, T>( events: Vec, configured_schema: Schema, replay_budget: usize, + max_nesting_depth: Option, ) -> Result where T: serde::Deserialize<'de>, { - let mut source = EventSource::new(input, events, configured_schema, replay_budget); + let mut source = EventSource::new( + input, + events, + configured_schema, + replay_budget, + max_nesting_depth, + ); source.enter_stream()?; source.enter_document()?; let value = T::deserialize(EventNodeDeserializer { @@ -301,6 +323,8 @@ struct EventSource<'de> { inject: Vec, replayed_events: usize, replay_budget: usize, + max_nesting_depth: Option, + depth: usize, } struct InjectedEvents { @@ -315,6 +339,7 @@ impl<'de> EventSource<'de> { events: Vec, configured_schema: Schema, replay_budget: usize, + max_nesting_depth: Option, ) -> Self { Self { input, @@ -326,7 +351,43 @@ impl<'de> EventSource<'de> { inject: Vec::new(), replayed_events: 0, replay_budget, + max_nesting_depth, + depth: 0, + } + } + + /// Records descent into a nested collection and enforces the configured + /// nesting-depth ceiling. The event-backed path expands aliases lazily as + /// it walks, so — unlike the tree-backed path's `AnchorTable::resolve` — the + /// parser's literal-depth check does not bound the *expanded* depth. Without + /// this guard a literally shallow document with a long alias chain recurses + /// until the stack overflows. Mirrors the tree-backed `depth > max` check. + fn enter_depth(&mut self, span: Span) -> Result<()> { + self.depth = self.depth.saturating_add(1); + if self.max_nesting_depth.is_some_and(|max| self.depth > max) { + return Err(Error::limit( + "maximum YAML nesting depth exceeded while expanding alias", + span, + )); + } + Ok(()) + } + + fn exit_depth(&mut self) { + self.depth = self.depth.saturating_sub(1); + } + + /// Same ceiling as [`enter_depth`], but for the read-only key/merge + /// materialization walk in [`node_at_for_key`], which threads an explicit + /// `depth` because it borrows `self` immutably. + fn check_depth(&self, depth: usize, span: impl Into>) -> Result<()> { + if self.max_nesting_depth.is_some_and(|max| depth > max) { + return Err(Error::limit( + "maximum YAML nesting depth exceeded while expanding alias", + span, + )); } + Ok(()) } fn peek(&self) -> Option<&Event> { @@ -437,6 +498,7 @@ impl<'de> EventSource<'de> { self.anchors.clear(); self.inject.clear(); self.replayed_events = 0; + self.depth = 0; self.schema = schema_for_directives(self.configured_schema, &directives); Ok(()) } @@ -558,17 +620,19 @@ impl<'de> EventSource<'de> { "event-backed alias replay is not implemented", anchor.span, )), - Some(Event::SequenceStart { .. }) => { + Some(Event::SequenceStart { span, .. }) => { + self.enter_depth(span)?; self.next()?; loop { if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { self.next()?; + self.exit_depth(); return Ok(()); } self.skip_node()?; } } - Some(Event::MappingStart { .. }) => { + Some(Event::MappingStart { span, .. }) => { if self.next_mapping_has_merge_key()? { let mut node = self.materialize_current_node_for_merge()?; node.apply_merge_keys_with_policy(merge_policy_for_schema(self.schema))?; @@ -576,10 +640,12 @@ impl<'de> EventSource<'de> { return Ok(()); } self.validate_next_mapping_duplicates()?; + self.enter_depth(span)?; self.next()?; loop { if matches!(self.peek(), Some(Event::MappingEnd { .. })) { self.next()?; + self.exit_depth(); return Ok(()); } self.skip_node()?; @@ -598,21 +664,29 @@ impl<'de> EventSource<'de> { "event-backed alias replay is not implemented", anchor.span, )), - Event::SequenceStart { .. } => loop { - if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { - self.next()?; - return Ok(()); + Event::SequenceStart { span, .. } => { + self.enter_depth(span)?; + loop { + if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { + self.next()?; + self.exit_depth(); + return Ok(()); + } + self.skip_node_raw()?; } - self.skip_node_raw()?; - }, - Event::MappingStart { .. } => loop { - if matches!(self.peek(), Some(Event::MappingEnd { .. })) { - self.next()?; - return Ok(()); + } + Event::MappingStart { span, .. } => { + self.enter_depth(span)?; + loop { + if matches!(self.peek(), Some(Event::MappingEnd { .. })) { + self.next()?; + self.exit_depth(); + return Ok(()); + } + self.skip_node_raw()?; + self.skip_node_raw()?; } - self.skip_node_raw()?; - self.skip_node_raw()?; - }, + } event => Err(unexpected_event("node", &event)), } } @@ -628,6 +702,7 @@ impl<'de> EventSource<'de> { &mut Vec::new(), &mut replayed_events, true, + self.depth, )?; let expected = skip_node_in(events, pos)?; if next != expected { @@ -658,6 +733,7 @@ impl<'de> EventSource<'de> { &mut Vec::new(), &mut replayed_events, true, + self.depth, )?; if node_is_merge_key(&key) { return Ok(true); @@ -681,9 +757,13 @@ impl<'de> EventSource<'de> { if matches!(event, Event::MappingEnd { .. }) { return Ok(()); } - if let Some((key, next_pos)) = - self.mapping_key_at(events, pos, &mut scan_anchors, &mut replayed_events)? - { + if let Some((key, next_pos)) = self.mapping_key_at( + events, + pos, + &mut scan_anchors, + &mut replayed_events, + self.depth, + )? { if node_is_merge_key(&key) { return Err(Error::data( "event-backed merge-key expansion is not implemented", @@ -715,6 +795,7 @@ impl<'de> EventSource<'de> { pos: usize, scan_anchors: &mut HashMap>, replayed_events: &mut usize, + depth: usize, ) -> Result> { if let Some(name) = events.get(pos).and_then(event_anchor_name) { let end = skip_node_in(events, pos)?; @@ -732,6 +813,7 @@ impl<'de> EventSource<'de> { &mut Vec::new(), replayed_events, false, + depth, ) .map(|(node, next)| Some((node, next))), Some(_) | None => Ok(None), @@ -797,6 +879,7 @@ impl<'de> EventSource<'de> { Ok(tagged_key_node(tag.tag.clone(), tag.span, inner)) } + #[allow(clippy::too_many_arguments)] fn node_at_for_key( &self, events: &[Event], @@ -805,10 +888,12 @@ impl<'de> EventSource<'de> { active_aliases: &mut Vec, replayed_events: &mut usize, allow_merge_key: bool, + depth: usize, ) -> Result<(Node, usize)> { let Some(event) = events.get(pos) else { return Err(Error::data("unexpected end of YAML event stream", None)); }; + self.check_depth(depth, event_span(event))?; if let Some(name) = event_anchor_name(event) { let end = skip_node_in(events, pos)?; scan_anchors.insert(name.to_string(), events[pos..end].to_vec()); @@ -849,6 +934,7 @@ impl<'de> EventSource<'de> { active_aliases, replayed_events, allow_merge_key, + depth, )?; active_aliases.pop(); if end != target.len() { @@ -875,6 +961,7 @@ impl<'de> EventSource<'de> { active_aliases, replayed_events, allow_merge_key, + depth + 1, )?; items.push(item); next = after_item; @@ -906,6 +993,7 @@ impl<'de> EventSource<'de> { active_aliases, replayed_events, allow_merge_key, + depth + 1, )?; if !allow_merge_key && node_is_merge_key(&key) { return Err(Error::data( @@ -928,6 +1016,7 @@ impl<'de> EventSource<'de> { active_aliases, replayed_events, allow_merge_key, + depth + 1, )?; entries.push((key, value)); next = after_value; @@ -1365,10 +1454,15 @@ impl<'de> de::Deserializer<'de> for EventNodeDeserializer<'_, 'de> { return self.deserialize_prepared_current_seq(visitor); } match self.source.next()? { - Event::SequenceStart { .. } => visitor.visit_seq(EventSeqAccess { - source: self.source, - index: 0, - }), + Event::SequenceStart { span, .. } => { + self.source.enter_depth(span)?; + let value = visitor.visit_seq(EventSeqAccess { + source: &mut *self.source, + index: 0, + }); + self.source.exit_depth(); + value + } event => Err(unexpected_event("sequence", &event)), } } @@ -1408,10 +1502,15 @@ impl<'de> de::Deserializer<'de> for EventNodeDeserializer<'_, 'de> { } self.source.validate_next_mapping_duplicates()?; match self.source.next()? { - Event::MappingStart { .. } => visitor.visit_map(EventMapAccess { - source: self.source, - value: None, - }), + Event::MappingStart { span, .. } => { + self.source.enter_depth(span)?; + let value = visitor.visit_map(EventMapAccess { + source: &mut *self.source, + value: None, + }); + self.source.exit_depth(); + value + } event => Err(unexpected_event("mapping", &event)), } } @@ -1430,18 +1529,29 @@ impl<'de> de::Deserializer<'de> for EventNodeDeserializer<'_, 'de> { fn deserialize_enum( self, - _name: &'static str, - _variants: &'static [&'static str], + name: &'static str, + variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { - let node = self.source.take_scalar()?; - let Some(value) = prepared_string_target_text(&node) else { - return Err(type_error("enum string", &node)); - }; - visitor.visit_enum(value.to_string().into_deserializer()) + // Materialize the current node and reuse the tree-backed enum logic so + // the event path accepts the same forms as `de.rs`: bare-scalar unit + // variants, single-key `{Variant: payload}` mappings (newtype/tuple/ + // struct variants), and tag-shorthand variants. The previous + // scalar-only path rejected every externally-tagged variant that + // carried a payload. + self.source.resolve_aliases_until_non_alias()?; + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_enum( + PreparedNodeDeserializer { node }, + name, + variants, + visitor, + ) } fn deserialize_identifier(self, visitor: V) -> Result @@ -1529,12 +1639,13 @@ impl<'de> MapAccess<'de> for EventMapAccess<'_, 'de> { self.source.next()?; return Ok(None); } + let depth = self.source.depth; let (events, pos) = self.source.current_events_and_pos(); let mut scan_anchors = self.source.anchors.clone(); let mut replayed_events = 0usize; let segment = self .source - .mapping_key_at(events, pos, &mut scan_anchors, &mut replayed_events)? + .mapping_key_at(events, pos, &mut scan_anchors, &mut replayed_events, depth)? .map(|(node, _)| path_segment_for_node(&node)) .unwrap_or(ErrorPathSegment::ComplexKey); self.value = Some(segment.clone()); @@ -3436,10 +3547,101 @@ target: *base .expect("events"), Schema::Yaml12, LoadOptions::new().alias_expansion_budget(input.len()), + LoadOptions::new().selected_max_nesting_depth(), ), }) .expect_err("raw stream markers must still be explicit"); from_str_with_options::(input, LoadOptions::new()).expect("ignored any"); } + + fn alias_depth_chain(levels: usize) -> String { + // A literally shallow document (max nesting depth 2) whose final anchor + // expands, via the alias chain, to a structure `levels` deep. + let mut input = String::from("- &n0 0\n"); + for k in 1..levels { + input.push_str(&format!("- &n{k} [*n{prev}]\n", prev = k - 1)); + } + input + } + + #[test] + fn event_deserializer_bounds_alias_expansion_depth() { + // The event-backed path expands aliases lazily while walking, so the + // parser's literal-depth check does not bound the expanded depth. Without + // an explicit ceiling this recurses until the stack overflows; it must + // instead reject, matching the tree-backed `AnchorTable::resolve` guard. + let input = alias_depth_chain(400); + let error = from_str_with_options::>(&input, LoadOptions::new()) + .expect_err("deep alias chain must hit the nesting-depth ceiling"); + assert!( + error.to_string().contains("nesting depth"), + "unexpected error: {error}" + ); + } + + #[test] + fn event_deserializer_allows_alias_chain_within_depth_limit() { + let input = alias_depth_chain(8); + let parsed = from_str_with_options::>(&input, LoadOptions::new()) + .expect("alias chain within the depth limit deserializes"); + assert_eq!(parsed.len(), 8); + } + + #[test] + fn event_deserializer_reads_map_form_enum_variants() { + // Externally-tagged enum variants carrying a payload — the forms the + // earlier scalar-only path rejected. Covers unit, newtype, tuple, and + // struct variants in one sequence. + #[derive(Debug, Deserialize, PartialEq)] + enum EventEnum { + Unit, + Newtype(u32), + Tuple(u8, u8), + Struct { width: u32, height: u32 }, + } + + let input = "\ +- Unit +- Newtype: 7 +- Tuple: [1, 2] +- Struct: + width: 3 + height: 4 +"; + let parsed: Vec = + from_str_with_options(input, LoadOptions::new()).expect("event-backed enum variants"); + assert_eq!( + parsed, + vec![ + EventEnum::Unit, + EventEnum::Newtype(7), + EventEnum::Tuple(1, 2), + EventEnum::Struct { + width: 3, + height: 4, + }, + ] + ); + } + + #[test] + fn event_deserializer_reads_map_form_enum_variant_through_alias() { + #[derive(Debug, Deserialize, PartialEq)] + enum Mode { + Tuned { level: u8 }, + } + + // The anchored definition and the alias must both resolve to the same + // map-form variant. + let parsed = from_str_with_options::>( + "- &m {Tuned: {level: 9}}\n- *m\n", + LoadOptions::new(), + ) + .expect("aliased map-form enum variant"); + assert_eq!( + parsed, + vec![Mode::Tuned { level: 9 }, Mode::Tuned { level: 9 }] + ); + } } From b564fdd94fbcc25797443f3e9967c70d1146f5ee Mon Sep 17 00:00:00 2001 From: jskoiz <20649937+jskoiz@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:37:39 -1000 Subject: [PATCH 3/3] Split event-backed Serde implementation modules --- src/event_de.rs | 3647 ------------------------------------ src/event_de/mod.rs | 313 ++++ src/event_de/prepared.rs | 986 ++++++++++ src/event_de/serde_impl.rs | 499 +++++ src/event_de/source.rs | 875 +++++++++ src/event_de/tests.rs | 984 ++++++++++ 6 files changed, 3657 insertions(+), 3647 deletions(-) delete mode 100644 src/event_de.rs create mode 100644 src/event_de/mod.rs create mode 100644 src/event_de/prepared.rs create mode 100644 src/event_de/serde_impl.rs create mode 100644 src/event_de/source.rs create mode 100644 src/event_de/tests.rs diff --git a/src/event_de.rs b/src/event_de.rs deleted file mode 100644 index 76e55b9..0000000 --- a/src/event_de.rs +++ /dev/null @@ -1,3647 +0,0 @@ -#![allow(dead_code)] -// Compiled work-in-progress: this module is exercised by unit tests before it is -// wired into public Serde entrypoints. - -use crate::{ - Error, ErrorPathSegment, Node, NodeValue, Number, Result, Span, Tag, TaggedNode, - error::utf8_error_span, - key_identity::{DuplicateKeyTracker, check_duplicate_with_tracker_at_depth_limit}, - parse::{ - Event, EventMeta, ScalarStyle, merge_policy_for_schema, parse_scalar_with_schema, - schema_for_directives, - }, - schema::{LoadOptions, Schema}, -}; -use serde::de::{ - self, DeserializeOwned, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, - VariantAccess, Visitor, -}; -use std::{collections::HashMap, io::Read, marker::PhantomData}; - -pub(crate) fn from_str_with_options<'de, T>(input: &'de str, options: LoadOptions) -> Result -where - T: serde::Deserialize<'de>, -{ - let configured_schema = options.selected_schema(); - let replay_budget = options.alias_expansion_budget(input.len()); - let max_nesting_depth = options.selected_max_nesting_depth(); - let events = crate::parse::EventStream::from_str_with_options(input, options)? - .collect::>>()?; - let mut source = EventSource::new( - input, - events, - configured_schema, - replay_budget, - max_nesting_depth, - ); - source.enter_stream()?; - source.enter_document()?; - let value = T::deserialize(EventNodeDeserializer { - source: &mut source, - })?; - source.finish_document()?; - match source.peek() { - Some(Event::StreamEnd) => Ok(value), - Some(Event::DocumentStart { .. }) => Err(Error::data( - "expected single YAML document, found multiple documents", - None, - )), - Some(event) => Err(unexpected_event("stream end", event)), - None => Err(Error::data("unexpected end of YAML event stream", None)), - } -} - -pub(crate) fn from_documents_str_with_options( - input: &str, - options: LoadOptions, -) -> Result> -where - T: DeserializeOwned, -{ - document_iter_str_with_options(input, options)?.collect() -} - -pub(crate) fn document_iter_str_with_options<'de, T>( - input: &'de str, - options: LoadOptions, -) -> Result> -where - T: serde::Deserialize<'de>, -{ - let configured_schema = options.selected_schema(); - let replay_budget = options.alias_expansion_budget(input.len()); - let max_nesting_depth = options.selected_max_nesting_depth(); - Ok(EventDocumentIter { - input, - frames: EventDocumentFrames::from_str_with_options(input, options)?, - configured_schema, - replay_budget, - max_nesting_depth, - _marker: PhantomData, - }) -} - -pub(crate) fn document_iter_slice_with_options<'de, T>( - input: &'de [u8], - options: LoadOptions, -) -> Result> -where - T: serde::Deserialize<'de>, -{ - options.check_input_len(input.len())?; - let input = std::str::from_utf8(input) - .map_err(|err| Error::encoding("input is not valid UTF-8", utf8_error_span(input, err)))?; - document_iter_str_with_options(input, options) -} - -pub(crate) fn document_iter_reader_with_options( - reader: R, - options: LoadOptions, -) -> Result> -where - T: DeserializeOwned, - R: Read, -{ - let bytes = crate::de::read_to_end_with_options(reader, options)?; - let input = String::from_utf8(bytes).map_err(|err| { - Error::encoding( - "input is not valid UTF-8", - utf8_error_span(err.as_bytes(), err.utf8_error()), - ) - })?; - let configured_schema = options.selected_schema(); - let replay_budget = options.alias_expansion_budget(input.len()); - let max_nesting_depth = options.selected_max_nesting_depth(); - let frames = EventDocumentFrames::from_str_with_options(&input, options)?; - Ok(OwnedEventDocumentIter { - input, - frames, - configured_schema, - replay_budget, - max_nesting_depth, - _marker: PhantomData, - }) -} - -pub(crate) struct EventDocumentIter<'de, T> { - input: &'de str, - frames: EventDocumentFrames, - configured_schema: Schema, - replay_budget: usize, - max_nesting_depth: Option, - _marker: PhantomData, -} - -impl<'de, T> Iterator for EventDocumentIter<'de, T> -where - T: serde::Deserialize<'de>, -{ - type Item = Result; - - fn next(&mut self) -> Option { - let (index, frame) = self.frames.next_frame()?; - Some( - frame - .and_then(|events| { - deserialize_document_frame( - self.input, - events, - self.configured_schema, - self.replay_budget, - self.max_nesting_depth, - ) - }) - .map_err(|error| error.with_document_index(index)), - ) - } -} - -pub(crate) struct OwnedEventDocumentIter { - input: String, - frames: EventDocumentFrames, - configured_schema: Schema, - replay_budget: usize, - max_nesting_depth: Option, - _marker: PhantomData, -} - -impl Iterator for OwnedEventDocumentIter -where - T: DeserializeOwned, -{ - type Item = Result; - - fn next(&mut self) -> Option { - let (index, frame) = self.frames.next_frame()?; - Some( - frame - .and_then(|events| { - deserialize_document_frame( - &self.input, - events, - self.configured_schema, - self.replay_budget, - self.max_nesting_depth, - ) - }) - .map_err(|error| error.with_document_index(index)), - ) - } -} - -struct EventDocumentFrames { - events: crate::parse::EventStream, - started: bool, - finished: bool, - index: usize, -} - -impl EventDocumentFrames { - fn from_str_with_options(input: &str, options: LoadOptions) -> Result { - Ok(Self { - events: crate::parse::EventStream::from_str_with_options(input, options)?, - started: false, - finished: false, - index: 0, - }) - } - - fn next_frame(&mut self) -> Option<(usize, Result>)> { - if self.finished { - return None; - } - let index = self.index; - if let Err(error) = self.enter_stream() { - self.finished = true; - return Some((index, Err(error))); - } - - match self.events.next() { - Some(Ok(Event::StreamEnd)) => { - self.finished = true; - None - } - Some(Ok(start @ Event::DocumentStart { .. })) => { - Some((index, self.collect_document_frame(start))) - } - Some(Ok(event)) => { - self.finished = true; - Some(( - index, - Err(unexpected_event("document start or stream end", &event)), - )) - } - Some(Err(error)) => { - self.finished = true; - Some((index, Err(error))) - } - None => { - self.finished = true; - None - } - } - } - - fn enter_stream(&mut self) -> Result<()> { - if self.started { - return Ok(()); - } - self.started = true; - match self.events.next() { - Some(Ok(Event::StreamStart)) => Ok(()), - Some(Ok(event)) => Err(unexpected_event("stream start", &event)), - Some(Err(error)) => Err(error), - None => Err(Error::data("unexpected end of YAML event stream", None)), - } - } - - fn collect_document_frame(&mut self, start: Event) -> Result> { - let mut frame = Vec::new(); - frame.push(Event::StreamStart); - frame.push(start); - loop { - match self.events.next() { - Some(Ok(event)) => { - let end = matches!(event, Event::DocumentEnd { .. }); - frame.push(event); - if end { - frame.push(Event::StreamEnd); - self.index += 1; - return Ok(frame); - } - } - Some(Err(error)) => { - self.finished = true; - return Err(error); - } - None => { - self.finished = true; - return Err(Error::data("unexpected end of YAML event stream", None)); - } - } - } - } -} - -fn deserialize_document_frame<'de, T>( - input: &'de str, - events: Vec, - configured_schema: Schema, - replay_budget: usize, - max_nesting_depth: Option, -) -> Result -where - T: serde::Deserialize<'de>, -{ - let mut source = EventSource::new( - input, - events, - configured_schema, - replay_budget, - max_nesting_depth, - ); - source.enter_stream()?; - source.enter_document()?; - let value = T::deserialize(EventNodeDeserializer { - source: &mut source, - })?; - source.finish_document()?; - match source.peek() { - Some(Event::StreamEnd) => Ok(value), - Some(event) => Err(unexpected_event("stream end", event)), - None => Err(Error::data("unexpected end of YAML event stream", None)), - } -} - -struct EventSource<'de> { - input: &'de str, - events: Vec, - pos: usize, - configured_schema: Schema, - schema: Schema, - anchors: HashMap>, - inject: Vec, - replayed_events: usize, - replay_budget: usize, - max_nesting_depth: Option, - depth: usize, -} - -struct InjectedEvents { - anchor: String, - events: Vec, - pos: usize, -} - -impl<'de> EventSource<'de> { - fn new( - input: &'de str, - events: Vec, - configured_schema: Schema, - replay_budget: usize, - max_nesting_depth: Option, - ) -> Self { - Self { - input, - events, - pos: 0, - configured_schema, - schema: configured_schema, - anchors: HashMap::new(), - inject: Vec::new(), - replayed_events: 0, - replay_budget, - max_nesting_depth, - depth: 0, - } - } - - /// Records descent into a nested collection and enforces the configured - /// nesting-depth ceiling. The event-backed path expands aliases lazily as - /// it walks, so — unlike the tree-backed path's `AnchorTable::resolve` — the - /// parser's literal-depth check does not bound the *expanded* depth. Without - /// this guard a literally shallow document with a long alias chain recurses - /// until the stack overflows. Mirrors the tree-backed `depth > max` check. - fn enter_depth(&mut self, span: Span) -> Result<()> { - self.depth = self.depth.saturating_add(1); - if self.max_nesting_depth.is_some_and(|max| self.depth > max) { - return Err(Error::limit( - "maximum YAML nesting depth exceeded while expanding alias", - span, - )); - } - Ok(()) - } - - fn exit_depth(&mut self) { - self.depth = self.depth.saturating_sub(1); - } - - /// Same ceiling as [`enter_depth`], but for the read-only key/merge - /// materialization walk in [`node_at_for_key`], which threads an explicit - /// `depth` because it borrows `self` immutably. - fn check_depth(&self, depth: usize, span: impl Into>) -> Result<()> { - if self.max_nesting_depth.is_some_and(|max| depth > max) { - return Err(Error::limit( - "maximum YAML nesting depth exceeded while expanding alias", - span, - )); - } - Ok(()) - } - - fn peek(&self) -> Option<&Event> { - if let Some(frame) = self.inject.last() - && frame.pos < frame.events.len() - { - return frame.events.get(frame.pos); - } - self.events.get(self.pos) - } - - fn next(&mut self) -> Result { - loop { - let event = self.next_raw()?; - if let Event::Alias { anchor } = event { - self.inject_alias(anchor.name, anchor.span)?; - continue; - } - return Ok(event); - } - } - - fn next_raw(&mut self) -> Result { - if let Some(event) = self.next_injected_event() { - return Ok(event); - } - - let pos = self.pos; - let event = self - .events - .get(pos) - .cloned() - .ok_or_else(|| Error::data("unexpected end of YAML event stream", None))?; - self.record_anchor_at(pos, &event)?; - self.pos += 1; - Ok(event) - } - - fn resolve_aliases_until_non_alias(&mut self) -> Result<()> { - while matches!(self.peek(), Some(Event::Alias { .. })) { - let Event::Alias { anchor } = self.next_raw()? else { - unreachable!("peek observed an alias"); - }; - self.inject_alias(anchor.name, anchor.span)?; - } - Ok(()) - } - - fn next_injected_event(&mut self) -> Option { - loop { - let frame = self.inject.last_mut()?; - if frame.pos < frame.events.len() { - let event = frame.events[frame.pos].clone(); - frame.pos += 1; - if frame.pos == frame.events.len() { - self.inject.pop(); - } - return Some(event); - } - self.inject.pop(); - } - } - - fn record_anchor_at(&mut self, pos: usize, event: &Event) -> Result<()> { - let Some(name) = event_anchor_name(event) else { - return Ok(()); - }; - let end = skip_node_in(&self.events, pos)?; - self.anchors - .insert(name.to_string(), self.events[pos..end].to_vec()); - Ok(()) - } - - fn inject_alias(&mut self, name: String, span: Span) -> Result<()> { - if self.inject.iter().any(|frame| frame.anchor == name) { - return Err(Error::reference( - format!("recursive alias `{name}` is not supported"), - span, - )); - } - let events = self - .anchors - .get(&name) - .cloned() - .ok_or_else(|| Error::reference(format!("unknown anchor `{name}`"), span))?; - self.replayed_events = self.replayed_events.saturating_add(events.len()); - if self.replayed_events > self.replay_budget { - return Err(Error::limit("alias event replay limit exceeded", span)); - } - self.inject.push(InjectedEvents { - anchor: name, - events, - pos: 0, - }); - Ok(()) - } - - fn enter_stream(&mut self) -> Result<()> { - match self.next()? { - Event::StreamStart => Ok(()), - event => Err(unexpected_event("stream start", &event)), - } - } - - fn enter_document(&mut self) -> Result<()> { - match self.next()? { - Event::DocumentStart { directives, .. } => { - self.anchors.clear(); - self.inject.clear(); - self.replayed_events = 0; - self.depth = 0; - self.schema = schema_for_directives(self.configured_schema, &directives); - Ok(()) - } - event => Err(unexpected_event("document start", &event)), - } - } - - fn finish_document(&mut self) -> Result<()> { - match self.next()? { - Event::DocumentEnd { .. } => Ok(()), - event => Err(unexpected_event("document end", &event)), - } - } - - fn scalar_from_event( - &self, - value: String, - style: ScalarStyle, - meta: &EventMeta, - span: Span, - ) -> Result { - if let Some(tag) = &meta.tag { - let tag = &tag.tag; - let tag_span = meta.tag.as_ref().expect("tag checked").span; - if tag.is_yaml_core("str") { - return Ok(tagged_key_node( - tag.clone(), - tag_span, - Node::new(NodeValue::String(value), span), - )); - } - if tag.is_yaml_core("int") { - let number = crate::de::parse_explicit_core_int_text(&value, Some(span))?; - return Ok(tagged_key_node( - tag.clone(), - tag_span, - Node::new(NodeValue::Number(number), span).with_scalar_source(value), - )); - } - if tag.is_yaml_core("float") { - let number = crate::de::parse_explicit_core_float_text(&value, Some(span))?; - return Ok(tagged_key_node( - tag.clone(), - tag_span, - Node::new(NodeValue::Number(number), span).with_scalar_source(value), - )); - } - if tag.is_yaml_core("bool") { - let value = crate::de::parse_explicit_core_bool_text(&value, Some(span))?; - return Ok(tagged_key_node( - tag.clone(), - tag_span, - Node::new(NodeValue::Bool(value), span), - )); - } - if tag.is_yaml_core("null") { - crate::de::parse_explicit_core_null_text(&value, Some(span))?; - return Ok(tagged_key_node( - tag.clone(), - tag_span, - Node::new(NodeValue::Null, span), - )); - } - let inner = self.untagged_scalar_from_event(value, style, span)?; - if tag.is_non_specific() { - return Ok(non_specific_event_node(span_union(tag_span, span), inner)); - } - return Ok(Node::new( - NodeValue::Tagged(Box::new(TaggedNode { - tag: tag.clone(), - tag_span, - value: inner, - })), - span_union(tag_span, span), - )); - } - self.untagged_scalar_from_event(value, style, span) - } - - fn untagged_scalar_from_event( - &self, - value: String, - style: ScalarStyle, - span: Span, - ) -> Result { - match style { - ScalarStyle::Plain => parse_scalar_with_schema(&value, span, self.schema), - ScalarStyle::SingleQuoted - | ScalarStyle::DoubleQuoted - | ScalarStyle::Literal - | ScalarStyle::Folded => Ok(Node::new(NodeValue::String(value), span)), - } - } - - fn take_scalar(&mut self) -> Result { - match self.next()? { - Event::Scalar { - value, - style, - meta, - span, - } => self.scalar_from_event(value, style, &meta, span), - Event::Alias { anchor } => Err(Error::reference( - "event-backed alias replay is not implemented", - anchor.span, - )), - event => Err(unexpected_event("scalar", &event)), - } - } - - fn skip_node(&mut self) -> Result<()> { - self.resolve_aliases_until_non_alias()?; - match self.peek().cloned() { - Some(Event::Scalar { .. }) => { - self.next()?; - Ok(()) - } - Some(Event::Alias { anchor }) => Err(Error::reference( - "event-backed alias replay is not implemented", - anchor.span, - )), - Some(Event::SequenceStart { span, .. }) => { - self.enter_depth(span)?; - self.next()?; - loop { - if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { - self.next()?; - self.exit_depth(); - return Ok(()); - } - self.skip_node()?; - } - } - Some(Event::MappingStart { span, .. }) => { - if self.next_mapping_has_merge_key()? { - let mut node = self.materialize_current_node_for_merge()?; - node.apply_merge_keys_with_policy(merge_policy_for_schema(self.schema))?; - self.skip_node_raw()?; - return Ok(()); - } - self.validate_next_mapping_duplicates()?; - self.enter_depth(span)?; - self.next()?; - loop { - if matches!(self.peek(), Some(Event::MappingEnd { .. })) { - self.next()?; - self.exit_depth(); - return Ok(()); - } - self.skip_node()?; - self.skip_node()?; - } - } - Some(event) => Err(unexpected_event("node", &event)), - None => Err(Error::data("unexpected end of YAML event stream", None)), - } - } - - fn skip_node_raw(&mut self) -> Result<()> { - match self.next()? { - Event::Scalar { .. } => Ok(()), - Event::Alias { anchor } => Err(Error::reference( - "event-backed alias replay is not implemented", - anchor.span, - )), - Event::SequenceStart { span, .. } => { - self.enter_depth(span)?; - loop { - if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { - self.next()?; - self.exit_depth(); - return Ok(()); - } - self.skip_node_raw()?; - } - } - Event::MappingStart { span, .. } => { - self.enter_depth(span)?; - loop { - if matches!(self.peek(), Some(Event::MappingEnd { .. })) { - self.next()?; - self.exit_depth(); - return Ok(()); - } - self.skip_node_raw()?; - self.skip_node_raw()?; - } - } - event => Err(unexpected_event("node", &event)), - } - } - - fn materialize_current_node_for_merge(&self) -> Result { - let (events, pos) = self.current_events_and_pos(); - let mut scan_anchors = self.anchors.clone(); - let mut replayed_events = 0usize; - let (node, next) = self.node_at_for_key( - events, - pos, - &mut scan_anchors, - &mut Vec::new(), - &mut replayed_events, - true, - self.depth, - )?; - let expected = skip_node_in(events, pos)?; - if next != expected { - return Err(Error::data( - "unterminated merge materialization event stream", - None, - )); - } - Ok(node) - } - - fn next_mapping_has_merge_key(&self) -> Result { - let (events, start) = self.current_events_and_pos(); - let Some(Event::MappingStart { .. }) = events.get(start) else { - return Ok(false); - }; - let mut pos = start + 1; - let mut scan_anchors = self.anchors.clone(); - let mut replayed_events = 0usize; - while let Some(event) = events.get(pos) { - if matches!(event, Event::MappingEnd { .. }) { - return Ok(false); - } - let (key, next_pos) = self.node_at_for_key( - events, - pos, - &mut scan_anchors, - &mut Vec::new(), - &mut replayed_events, - true, - self.depth, - )?; - if node_is_merge_key(&key) { - return Ok(true); - } - pos = next_pos; - pos = scan_anchors_in(events, pos, &mut scan_anchors)?; - } - Err(Error::data("unterminated mapping event stream", None)) - } - - fn validate_next_mapping_duplicates(&self) -> Result<()> { - let (events, start) = self.current_events_and_pos(); - let Some(Event::MappingStart { .. }) = events.get(start) else { - return Ok(()); - }; - let mut pos = start + 1; - let mut seen = DuplicateKeyTracker::new(); - let mut scan_anchors = self.anchors.clone(); - let mut replayed_events = 0usize; - while let Some(event) = events.get(pos) { - if matches!(event, Event::MappingEnd { .. }) { - return Ok(()); - } - if let Some((key, next_pos)) = self.mapping_key_at( - events, - pos, - &mut scan_anchors, - &mut replayed_events, - self.depth, - )? { - if node_is_merge_key(&key) { - return Err(Error::data( - "event-backed merge-key expansion is not implemented", - Some(key.span), - )); - } - check_duplicate_with_tracker_at_depth_limit(&mut seen, &key, 1, None)?; - pos = next_pos; - } else { - pos = scan_anchors_in(events, pos, &mut scan_anchors)?; - } - pos = scan_anchors_in(events, pos, &mut scan_anchors)?; - } - Err(Error::data("unterminated mapping event stream", None)) - } - - fn current_events_and_pos(&self) -> (&[Event], usize) { - if let Some(frame) = self.inject.last() - && frame.pos < frame.events.len() - { - return (&frame.events, frame.pos); - } - (&self.events, self.pos) - } - - fn mapping_key_at( - &self, - events: &[Event], - pos: usize, - scan_anchors: &mut HashMap>, - replayed_events: &mut usize, - depth: usize, - ) -> Result> { - if let Some(name) = events.get(pos).and_then(event_anchor_name) { - let end = skip_node_in(events, pos)?; - scan_anchors.insert(name.to_string(), events[pos..end].to_vec()); - } - match events.get(pos) { - Some(Event::Scalar { .. }) - | Some(Event::Alias { .. }) - | Some(Event::SequenceStart { .. }) - | Some(Event::MappingStart { .. }) => self - .node_at_for_key( - events, - pos, - scan_anchors, - &mut Vec::new(), - replayed_events, - false, - depth, - ) - .map(|(node, next)| Some((node, next))), - Some(_) | None => Ok(None), - } - } - - fn scalar_key_at(&self, pos: usize) -> Result> { - self.scalar_key_at_in(&self.events, pos) - } - - fn scalar_key_at_in(&self, events: &[Event], pos: usize) -> Result> { - let Some(Event::Scalar { - value, - style, - meta, - span, - }) = events.get(pos) - else { - return Ok(None); - }; - self.scalar_from_event(value.clone(), *style, meta, *span) - .map(|node| Some((node, pos + 1))) - } - - fn scalar_key_node_from_event( - &self, - value: String, - style: ScalarStyle, - meta: &EventMeta, - span: Span, - ) -> Result { - let Some(tag) = &meta.tag else { - return self.scalar_from_event(value, style, meta, span); - }; - let inner = if tag.tag.is_yaml_core("int") { - Node::new( - NodeValue::Number(crate::de::parse_explicit_core_int_text(&value, Some(span))?), - span, - ) - } else if tag.tag.is_yaml_core("float") { - Node::new( - NodeValue::Number(crate::de::parse_explicit_core_float_text( - &value, - Some(span), - )?), - span, - ) - } else if tag.tag.is_yaml_core("bool") { - Node::new( - NodeValue::Bool(crate::de::parse_explicit_core_bool_text( - &value, - Some(span), - )?), - span, - ) - } else if tag.tag.is_yaml_core("null") { - crate::de::parse_explicit_core_null_text(&value, Some(span))?; - Node::new(NodeValue::Null, span) - } else { - let _ = style; - Node::new(NodeValue::String(value), span) - }; - Ok(tagged_key_node(tag.tag.clone(), tag.span, inner)) - } - - #[allow(clippy::too_many_arguments)] - fn node_at_for_key( - &self, - events: &[Event], - pos: usize, - scan_anchors: &mut HashMap>, - active_aliases: &mut Vec, - replayed_events: &mut usize, - allow_merge_key: bool, - depth: usize, - ) -> Result<(Node, usize)> { - let Some(event) = events.get(pos) else { - return Err(Error::data("unexpected end of YAML event stream", None)); - }; - self.check_depth(depth, event_span(event))?; - if let Some(name) = event_anchor_name(event) { - let end = skip_node_in(events, pos)?; - scan_anchors.insert(name.to_string(), events[pos..end].to_vec()); - } - - match event { - Event::Scalar { - value, - style, - meta, - span, - } => self - .scalar_key_node_from_event(value.clone(), *style, meta, *span) - .map(|node| (node, pos + 1)), - Event::Alias { anchor } => { - let name = &anchor.name; - if active_aliases.iter().any(|active| active == name) { - return Err(Error::reference( - format!("recursive alias `{name}` is not supported"), - anchor.span, - )); - } - let target = scan_anchors.get(name).cloned().ok_or_else(|| { - Error::reference(format!("unknown anchor `{name}`"), anchor.span) - })?; - *replayed_events = replayed_events.saturating_add(target.len()); - if *replayed_events > self.replay_budget { - return Err(Error::limit( - "alias event replay limit exceeded", - anchor.span, - )); - } - active_aliases.push(name.clone()); - let (mut node, end) = self.node_at_for_key( - &target, - 0, - scan_anchors, - active_aliases, - replayed_events, - allow_merge_key, - depth, - )?; - active_aliases.pop(); - if end != target.len() { - return Err(Error::data("unterminated alias key event subtree", None)); - } - node.span = anchor.span; - Ok((node, pos + 1)) - } - Event::SequenceStart { meta, span, .. } => { - let mut items = Vec::new(); - let mut next = pos + 1; - loop { - match events.get(next) { - Some(Event::SequenceEnd { span: end_span }) => { - let node = - Node::new(NodeValue::Sequence(items), span_union(*span, *end_span)); - return Ok((apply_event_tag(meta, node), next + 1)); - } - Some(_) => { - let (item, after_item) = self.node_at_for_key( - events, - next, - scan_anchors, - active_aliases, - replayed_events, - allow_merge_key, - depth + 1, - )?; - items.push(item); - next = after_item; - } - None => { - return Err(Error::data("unterminated sequence event stream", None)); - } - } - } - } - Event::MappingStart { meta, span, .. } => { - let mut entries = Vec::new(); - let mut seen = DuplicateKeyTracker::new(); - let mut next = pos + 1; - loop { - match events.get(next) { - Some(Event::MappingEnd { span: end_span }) => { - let node = Node::new( - NodeValue::Mapping(entries), - span_union(*span, *end_span), - ); - return Ok((apply_event_tag(meta, node), next + 1)); - } - Some(_) => { - let (key, after_key) = self.node_at_for_key( - events, - next, - scan_anchors, - active_aliases, - replayed_events, - allow_merge_key, - depth + 1, - )?; - if !allow_merge_key && node_is_merge_key(&key) { - return Err(Error::data( - "event-backed merge-key expansion is not implemented", - Some(key.span), - )); - } - if !(allow_merge_key - && self.schema.is_legacy_compatible() - && node_is_merge_key(&key)) - { - check_duplicate_with_tracker_at_depth_limit( - &mut seen, &key, 1, None, - )?; - } - let (value, after_value) = self.node_at_for_key( - events, - after_key, - scan_anchors, - active_aliases, - replayed_events, - allow_merge_key, - depth + 1, - )?; - entries.push((key, value)); - next = after_value; - } - None => return Err(Error::data("unterminated mapping event stream", None)), - } - } - } - event => Err(unexpected_event("node", event)), - } - } -} - -fn skip_node_in(events: &[Event], pos: usize) -> Result { - match events - .get(pos) - .ok_or_else(|| Error::data("unexpected end of YAML event stream", None))? - { - Event::Scalar { .. } | Event::Alias { .. } => Ok(pos + 1), - Event::SequenceStart { .. } => { - let mut next = pos + 1; - loop { - match events.get(next) { - Some(Event::SequenceEnd { .. }) => return Ok(next + 1), - Some(_) => next = skip_node_in(events, next)?, - None => return Err(Error::data("unterminated sequence event stream", None)), - } - } - } - Event::MappingStart { .. } => { - let mut next = pos + 1; - loop { - match events.get(next) { - Some(Event::MappingEnd { .. }) => return Ok(next + 1), - Some(_) => { - next = skip_node_in(events, next)?; - next = skip_node_in(events, next)?; - } - None => return Err(Error::data("unterminated mapping event stream", None)), - } - } - } - event => Err(unexpected_event("node", event)), - } -} - -fn span_union(start: Span, end: Span) -> Span { - Span::new(start.start, end.end, start.line, start.column) -} - -fn tagged_key_node(tag: crate::Tag, tag_span: Span, value: Node) -> Node { - let span = span_union(tag_span, value.span); - Node::new( - NodeValue::Tagged(Box::new(TaggedNode { - tag, - tag_span, - value, - })), - span, - ) -} - -fn apply_event_tag(meta: &EventMeta, node: Node) -> Node { - let Some(tag) = &meta.tag else { - return node; - }; - if tag.tag.is_non_specific() { - non_specific_event_node(span_union(tag.span, node.span), node) - } else { - tagged_key_node(tag.tag.clone(), tag.span, node) - } -} - -fn non_specific_event_node(span: Span, mut node: Node) -> Node { - node.span = span; - match &node.value { - NodeValue::Sequence(_) - | NodeValue::Mapping(_) - | NodeValue::String(_) - | NodeValue::Tagged(_) => node, - NodeValue::Null | NodeValue::Bool(_) | NodeValue::Number(_) => { - let source = node - .scalar_source() - .map(|source| source.raw().to_string()) - .unwrap_or_default(); - Node::new(NodeValue::String(source.clone()), span).with_scalar_source(source) - } - } -} - -fn node_is_merge_key(key: &Node) -> bool { - match &key.value { - NodeValue::String(_) => key.as_str() == Some("<<"), - NodeValue::Tagged(tagged) if tagged.tag.is_yaml_core("merge") => { - tagged.value.as_str() == Some("<<") - } - _ => false, - } -} - -fn scan_anchors_in( - events: &[Event], - pos: usize, - anchors: &mut HashMap>, -) -> Result { - let Some(event) = events.get(pos) else { - return Err(Error::data("unexpected end of YAML event stream", None)); - }; - if let Some(name) = event_anchor_name(event) { - let end = skip_node_in(events, pos)?; - anchors.insert(name.to_string(), events[pos..end].to_vec()); - } - match event { - Event::Scalar { .. } | Event::Alias { .. } => Ok(pos + 1), - Event::SequenceStart { .. } => { - let mut next = pos + 1; - loop { - match events.get(next) { - Some(Event::SequenceEnd { .. }) => return Ok(next + 1), - Some(_) => next = scan_anchors_in(events, next, anchors)?, - None => return Err(Error::data("unterminated sequence event stream", None)), - } - } - } - Event::MappingStart { .. } => { - let mut next = pos + 1; - loop { - match events.get(next) { - Some(Event::MappingEnd { .. }) => return Ok(next + 1), - Some(_) => { - next = scan_anchors_in(events, next, anchors)?; - next = scan_anchors_in(events, next, anchors)?; - } - None => return Err(Error::data("unterminated mapping event stream", None)), - } - } - } - event => Err(unexpected_event("node", event)), - } -} - -fn event_anchor_name(event: &Event) -> Option<&str> { - match event { - Event::Scalar { meta, .. } - | Event::SequenceStart { meta, .. } - | Event::MappingStart { meta, .. } => { - meta.anchor.as_ref().map(|anchor| anchor.name.as_str()) - } - Event::StreamStart - | Event::StreamEnd - | Event::DocumentStart { .. } - | Event::DocumentEnd { .. } - | Event::SequenceEnd { .. } - | Event::MappingEnd { .. } - | Event::Alias { .. } => None, - } -} - -struct EventNodeDeserializer<'a, 'de> { - source: &'a mut EventSource<'de>, -} - -impl<'de> EventNodeDeserializer<'_, 'de> { - fn deserialize_prepared_current_node(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let mut node = self.source.materialize_current_node_for_merge()?; - node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; - self.source.skip_node_raw()?; - de::Deserializer::deserialize_any(PreparedNodeDeserializer { node }, visitor) - } - - fn deserialize_prepared_current_seq(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let mut node = self.source.materialize_current_node_for_merge()?; - node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; - self.source.skip_node_raw()?; - de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node }, visitor) - } - - fn deserialize_prepared_current_map(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let mut node = self.source.materialize_current_node_for_merge()?; - node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; - self.source.skip_node_raw()?; - de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor) - } -} - -impl<'de> de::Deserializer<'de> for EventNodeDeserializer<'_, 'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.source.resolve_aliases_until_non_alias()?; - match self.source.peek() { - Some(Event::Scalar { .. }) => { - let node = self.source.take_scalar()?; - visit_scalar_any(&node, self.source.input, visitor) - } - Some(Event::SequenceStart { meta, .. }) | Some(Event::MappingStart { meta, .. }) - if meta.tag.is_some() => - { - self.deserialize_prepared_current_node(visitor) - } - Some(Event::SequenceStart { .. }) => self.deserialize_seq(visitor), - Some(Event::MappingStart { .. }) => self.deserialize_map(visitor), - Some(Event::Alias { anchor }) => Err(Error::reference( - "event-backed alias replay is not implemented", - anchor.span, - )), - Some(event) => Err(unexpected_event("node", event)), - None => Err(Error::data("unexpected end of YAML event stream", None)), - } - } - - fn deserialize_bool(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.source.take_scalar()?); - match node.value { - NodeValue::Bool(value) => with_span(visitor.visit_bool(value), node.span), - _ => Err(type_error("bool", &node)), - } - } - - fn deserialize_i8(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i16(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.source.take_scalar()?); - match node.value { - NodeValue::Number(number) => visit_i64_number(number, node.span, visitor), - _ => Err(type_error("integer", &node)), - } - } - - fn deserialize_u8(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_u64(visitor) - } - - fn deserialize_u16(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_u64(visitor) - } - - fn deserialize_u32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_u64(visitor) - } - - fn deserialize_u64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.source.take_scalar()?); - match node.value { - NodeValue::Number(number) => visit_u64_number(number, node.span, visitor), - _ => Err(type_error("unsigned integer", &node)), - } - } - - fn deserialize_i128(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.source.take_scalar()?); - match node.value { - NodeValue::Number(number) => visit_i128_number(number, node.span, visitor), - _ => Err(type_error("integer", &node)), - } - } - - fn deserialize_u128(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.source.take_scalar()?); - match node.value { - NodeValue::Number(number) => visit_u128_number(number, node.span, visitor), - _ => Err(type_error("unsigned integer", &node)), - } - } - - fn deserialize_f32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_f64(visitor) - } - - fn deserialize_f64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.source.take_scalar()?); - match node.value { - NodeValue::Number(number) => visit_f64_number(number, node.span, visitor), - _ => Err(type_error("number", &node)), - } - } - - fn deserialize_char(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = self.source.take_scalar()?; - let value = prepared_string_target_text(&node).ok_or_else(|| type_error("char", &node))?; - let mut chars = value.chars(); - match (chars.next(), chars.next()) { - (Some(ch), None) => with_span(visitor.visit_char(ch), node.span), - _ => Err(type_error("char", &node)), - } - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = self.source.take_scalar()?; - let value = string_target_text(&node).ok_or_else(|| type_error("string", &node))?; - if let Some(borrowed) = borrowed_event_str(self.source.input, node.span, value) { - return visitor.visit_borrowed_str(borrowed); - } - visitor.visit_str(value) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = self.source.take_scalar()?; - let value = string_target_text(&node).ok_or_else(|| type_error("string", &node))?; - visitor.visit_string(value.to_string()) - } - - fn deserialize_bytes(self, _visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = self.source.take_scalar()?; - Err(type_error("bytes", &node)) - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_bytes(visitor) - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.source.resolve_aliases_until_non_alias()?; - if self.source.peek_is_null_scalar()? { - self.source.take_scalar()?; - visitor.visit_none() - } else { - visitor.visit_some(self) - } - } - - fn deserialize_unit(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.source.take_scalar()?); - match node.value { - NodeValue::Null => visitor.visit_unit(), - _ => Err(type_error("unit/null", &node)), - } - } - - fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_unit(visitor) - } - - fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.source.resolve_aliases_until_non_alias()?; - if self - .source - .peek_has_yaml_core_tag(&["set", "omap", "pairs"]) - { - return self.deserialize_prepared_current_seq(visitor); - } - match self.source.next()? { - Event::SequenceStart { span, .. } => { - self.source.enter_depth(span)?; - let value = visitor.visit_seq(EventSeqAccess { - source: &mut *self.source, - index: 0, - }); - self.source.exit_depth(); - value - } - event => Err(unexpected_event("sequence", &event)), - } - } - - fn deserialize_tuple(self, _len: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.source.resolve_aliases_until_non_alias()?; - if self.source.peek_has_yaml_core_tag(&["omap"]) { - return self.deserialize_prepared_current_map(visitor); - } - if self.source.next_mapping_has_merge_key()? { - let mut node = self.source.materialize_current_node_for_merge()?; - node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; - self.source.skip_node_raw()?; - return de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor); - } - self.source.validate_next_mapping_duplicates()?; - match self.source.next()? { - Event::MappingStart { span, .. } => { - self.source.enter_depth(span)?; - let value = visitor.visit_map(EventMapAccess { - source: &mut *self.source, - value: None, - }); - self.source.exit_depth(); - value - } - event => Err(unexpected_event("mapping", &event)), - } - } - - fn deserialize_struct( - self, - _name: &'static str, - _fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_map(visitor) - } - - fn deserialize_enum( - self, - name: &'static str, - variants: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - // Materialize the current node and reuse the tree-backed enum logic so - // the event path accepts the same forms as `de.rs`: bare-scalar unit - // variants, single-key `{Variant: payload}` mappings (newtype/tuple/ - // struct variants), and tag-shorthand variants. The previous - // scalar-only path rejected every externally-tagged variant that - // carried a payload. - self.source.resolve_aliases_until_non_alias()?; - let mut node = self.source.materialize_current_node_for_merge()?; - node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; - self.source.skip_node_raw()?; - de::Deserializer::deserialize_enum( - PreparedNodeDeserializer { node }, - name, - variants, - visitor, - ) - } - - fn deserialize_identifier(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_str(visitor) - } - - fn deserialize_ignored_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.source.skip_node()?; - visitor.visit_unit() - } -} - -impl EventSource<'_> { - fn peek_has_yaml_core_tag(&self, suffixes: &[&str]) -> bool { - match self.peek() { - Some(Event::SequenceStart { meta, .. }) | Some(Event::MappingStart { meta, .. }) => { - meta.tag - .as_ref() - .is_some_and(|tag| suffixes.iter().any(|suffix| tag.tag.is_yaml_core(suffix))) - } - _ => false, - } - } - - fn peek_is_null_scalar(&self) -> Result { - let Some(Event::Scalar { - value, - style, - meta, - span, - }) = self.peek() - else { - return Ok(false); - }; - let node = self.scalar_from_event(value.clone(), *style, meta, *span)?; - Ok(prepared_is_null_node(&node)) - } -} - -struct EventSeqAccess<'a, 'de> { - source: &'a mut EventSource<'de>, - index: usize, -} - -impl<'de> SeqAccess<'de> for EventSeqAccess<'_, 'de> { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result> - where - T: DeserializeSeed<'de>, - { - if matches!(self.source.peek(), Some(Event::SequenceEnd { .. })) { - self.source.next()?; - return Ok(None); - } - let index = self.index; - self.index += 1; - seed.deserialize(EventNodeDeserializer { - source: self.source, - }) - .map(Some) - .map_err(|error| error.prepend_path_segment(ErrorPathSegment::Index(index))) - } -} - -struct EventMapAccess<'a, 'de> { - source: &'a mut EventSource<'de>, - value: Option, -} - -impl<'de> MapAccess<'de> for EventMapAccess<'_, 'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result> - where - K: DeserializeSeed<'de>, - { - if matches!(self.source.peek(), Some(Event::MappingEnd { .. })) { - self.source.next()?; - return Ok(None); - } - let depth = self.source.depth; - let (events, pos) = self.source.current_events_and_pos(); - let mut scan_anchors = self.source.anchors.clone(); - let mut replayed_events = 0usize; - let segment = self - .source - .mapping_key_at(events, pos, &mut scan_anchors, &mut replayed_events, depth)? - .map(|(node, _)| path_segment_for_node(&node)) - .unwrap_or(ErrorPathSegment::ComplexKey); - self.value = Some(segment.clone()); - seed.deserialize(EventNodeDeserializer { - source: self.source, - }) - .map(Some) - .map_err(|error| error.with_path_segment_if_empty(segment)) - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - let segment = self - .value - .take() - .ok_or_else(|| Error::data("value requested before key", None))?; - seed.deserialize(EventNodeDeserializer { - source: self.source, - }) - .map_err(|error| error.prepend_path_segment(segment)) - } -} - -struct PreparedNodeDeserializer { - node: Node, -} - -struct PreparedSeqAccess { - items: std::vec::IntoIter, - index: usize, -} - -impl<'de> SeqAccess<'de> for PreparedSeqAccess { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result> - where - T: DeserializeSeed<'de>, - { - let Some(node) = self.items.next() else { - return Ok(None); - }; - let index = self.index; - self.index += 1; - seed.deserialize(PreparedNodeDeserializer { node }) - .map(Some) - .map_err(|error| error.prepend_path_segment(ErrorPathSegment::Index(index))) - } -} - -struct PreparedMapAccess { - entries: std::vec::IntoIter<(Node, Node)>, - value: Option<(Node, ErrorPathSegment)>, -} - -impl<'de> MapAccess<'de> for PreparedMapAccess { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result> - where - K: DeserializeSeed<'de>, - { - let Some((key, value)) = self.entries.next() else { - return Ok(None); - }; - let segment = path_segment_for_node(&key); - self.value = Some((value, segment.clone())); - seed.deserialize(PreparedNodeDeserializer { node: key }) - .map(Some) - .map_err(|error| error.with_path_segment_if_empty(segment)) - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - let (node, segment) = self - .value - .take() - .ok_or_else(|| Error::data("value requested before key", None))?; - seed.deserialize(PreparedNodeDeserializer { node }) - .map_err(|error| error.prepend_path_segment(segment)) - } -} - -impl<'de> de::Deserializer<'de> for PreparedNodeDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let span = self.node.span; - match self.node.value { - NodeValue::Null => visitor.visit_unit(), - NodeValue::Bool(value) => visitor.visit_bool(value), - NodeValue::Number(number) => visit_any_number(number, span, visitor), - NodeValue::String(value) => visitor.visit_string(value), - NodeValue::Sequence(items) => visitor.visit_seq(PreparedSeqAccess { - items: items.into_iter(), - index: 0, - }), - NodeValue::Mapping(entries) => visitor.visit_map(PreparedMapAccess { - entries: entries.into_iter(), - value: None, - }), - NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { - tag: tagged.tag, - value: tagged.value, - }), - } - } - - fn deserialize_bool(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Bool(value) => with_span(visitor.visit_bool(value), node.span), - _ => Err(type_error("bool", &node)), - } - } - - fn deserialize_i8(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i16(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Number(number) => visit_i64_number(number, node.span, visitor), - _ => Err(type_error("integer", &node)), - } - } - - fn deserialize_u8(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_u64(visitor) - } - - fn deserialize_u16(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_u64(visitor) - } - - fn deserialize_u32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_u64(visitor) - } - - fn deserialize_u64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Number(number) => visit_u64_number(number, node.span, visitor), - _ => Err(type_error("unsigned integer", &node)), - } - } - - fn deserialize_i128(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Number(number) => visit_i128_number(number, node.span, visitor), - _ => Err(type_error("integer", &node)), - } - } - - fn deserialize_u128(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Number(number) => visit_u128_number(number, node.span, visitor), - _ => Err(type_error("unsigned integer", &node)), - } - } - - fn deserialize_f32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_f64(visitor) - } - - fn deserialize_f64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Number(number) => visit_f64_number(number, node.span, visitor), - _ => Err(type_error("number", &node)), - } - } - - fn deserialize_char(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - let value = prepared_string_target_text(&node).ok_or_else(|| type_error("char", &node))?; - let mut chars = value.chars(); - match (chars.next(), chars.next()) { - (Some(ch), None) => with_span(visitor.visit_char(ch), node.span), - _ => Err(type_error("char", &node)), - } - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_string(visitor) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - let value = - prepared_string_target_text(&node).ok_or_else(|| type_error("string", &node))?; - visitor.visit_string(value.to_string()) - } - - fn deserialize_bytes(self, _visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - Err(type_error("bytes", &node)) - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_bytes(visitor) - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - if prepared_is_null_node(&self.node) { - visitor.visit_none() - } else { - visitor.visit_some(self) - } - } - - fn deserialize_unit(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Null => visitor.visit_unit(), - _ => Err(type_error("unit/null", &node)), - } - } - - fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_unit(visitor) - } - - fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - if yaml11_set_entries_node(&self.node)?.is_some() { - let entries = take_yaml11_set_entries_node(self.node).expect("checked explicit !!set"); - let items = yaml11_set_key_nodes(entries)?; - return visitor.visit_seq(PreparedSeqAccess { - items: items.into_iter(), - index: 0, - }); - } - if yaml11_pair_items_node(&self.node, "omap")?.is_some() { - let items = - take_yaml11_pair_items_node(self.node, "omap").expect("checked explicit !!omap"); - let items = yaml11_pair_sequence_nodes(items, "omap")?; - return visitor.visit_seq(PreparedSeqAccess { - items: items.into_iter(), - index: 0, - }); - } - if yaml11_pair_items_node(&self.node, "pairs")?.is_some() { - let items = - take_yaml11_pair_items_node(self.node, "pairs").expect("checked explicit !!pairs"); - let items = yaml11_pair_sequence_nodes(items, "pairs")?; - return visitor.visit_seq(PreparedSeqAccess { - items: items.into_iter(), - index: 0, - }); - } - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Sequence(items) => visitor.visit_seq(PreparedSeqAccess { - items: items.into_iter(), - index: 0, - }), - _ => Err(type_error("sequence", &node)), - } - } - - fn deserialize_tuple(self, _len: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - if let Some(items) = yaml11_pair_items_node(&self.node, "omap")? { - validate_yaml11_omap_node_keys(items)?; - let items = - take_yaml11_pair_items_node(self.node, "omap").expect("checked explicit !!omap"); - let entries = yaml11_pair_entries(items, "omap")?; - return visitor.visit_map(PreparedMapAccess { - entries: entries.into_iter(), - value: None, - }); - } - let node = prepared_untag_node_owned(self.node); - match node.value { - NodeValue::Mapping(entries) => visitor.visit_map(PreparedMapAccess { - entries: entries.into_iter(), - value: None, - }), - _ => Err(type_error("mapping", &node)), - } - } - - fn deserialize_struct( - self, - _name: &'static str, - _fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_map(visitor) - } - - fn deserialize_enum( - self, - _name: &'static str, - _variants: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - match self.node.value { - NodeValue::String(variant) => visitor.visit_enum(variant.into_deserializer()), - NodeValue::Mapping(entries) if entries.len() == 1 => { - let mut entries = entries.into_iter(); - let (key, value) = entries.next().expect("length checked"); - visitor.visit_enum(PreparedEnumDeserializer { - key, - value: Some(value), - }) - } - NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { - tag: tagged.tag, - value: tagged.value, - }), - _ => Err(type_error("enum string or single-key mapping", &self.node)), - } - } - - fn deserialize_identifier(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_string(visitor) - } - - fn deserialize_ignored_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_unit() - } -} - -struct PreparedEnumDeserializer { - key: Node, - value: Option, -} - -impl<'de> EnumAccess<'de> for PreparedEnumDeserializer { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> - where - V: DeserializeSeed<'de>, - { - let variant = seed.deserialize(PreparedNodeDeserializer { - node: self.key.clone(), - })?; - Ok((variant, self)) - } -} - -impl<'de> VariantAccess<'de> for PreparedEnumDeserializer { - type Error = Error; - - fn unit_variant(self) -> Result<()> { - match self.value { - None => Ok(()), - Some(node) if matches!(node.value, NodeValue::Null) => Ok(()), - Some(node) => Err(type_error("unit enum variant", &node)), - } - } - - fn newtype_variant_seed(self, seed: T) -> Result - where - T: DeserializeSeed<'de>, - { - let node = self - .value - .ok_or_else(|| Error::data("newtype variant requires a value", None))?; - seed.deserialize(PreparedNodeDeserializer { node }) - } - - fn tuple_variant(self, _len: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = self - .value - .ok_or_else(|| Error::data("tuple variant requires a value", None))?; - de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node }, visitor) - } - - fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result - where - V: Visitor<'de>, - { - let node = self - .value - .ok_or_else(|| Error::data("struct variant requires a value", None))?; - de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor) - } -} - -struct PreparedTaggedEnumDeserializer { - tag: Tag, - value: Node, -} - -impl<'de> EnumAccess<'de> for PreparedTaggedEnumDeserializer { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> - where - V: DeserializeSeed<'de>, - { - let variant = - seed.deserialize(self.tag.serde_variant().into_owned().into_deserializer())?; - Ok((variant, self)) - } -} - -impl<'de> VariantAccess<'de> for PreparedTaggedEnumDeserializer { - type Error = Error; - - fn unit_variant(self) -> Result<()> { - if prepared_is_null_node(&self.value) { - Ok(()) - } else { - Err(type_error("unit enum variant", &self.value)) - } - } - - fn newtype_variant_seed(self, seed: T) -> Result - where - T: DeserializeSeed<'de>, - { - seed.deserialize(PreparedNodeDeserializer { node: self.value }) - } - - fn tuple_variant(self, _len: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node: self.value }, visitor) - } - - fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result - where - V: Visitor<'de>, - { - de::Deserializer::deserialize_map(PreparedNodeDeserializer { node: self.value }, visitor) - } -} - -fn explicit_core_tagged_node<'a>(mut node: &'a Node, suffix: &str) -> Option<&'a Node> { - while let NodeValue::Tagged(tagged) = &node.value { - if tagged.tag.is_yaml_core(suffix) { - return Some(&tagged.value); - } - node = &tagged.value; - } - None -} - -fn take_explicit_core_tagged_node(mut node: Node, suffix: &str) -> Option { - loop { - match node.value { - NodeValue::Tagged(tagged) if tagged.tag.is_yaml_core(suffix) => { - return Some(tagged.value); - } - NodeValue::Tagged(tagged) => node = tagged.value, - _ => return None, - } - } -} - -fn yaml11_set_entries_node(node: &Node) -> Result> { - let Some(value) = explicit_core_tagged_node(node, "set") else { - return Ok(None); - }; - match &value.value { - NodeValue::Mapping(entries) => Ok(Some(entries)), - _ => Err(type_error("mapping for explicit !!set", value)), - } -} - -fn take_yaml11_set_entries_node(node: Node) -> Option> { - let value = take_explicit_core_tagged_node(node, "set")?; - match value.value { - NodeValue::Mapping(entries) => Some(entries), - _ => None, - } -} - -fn yaml11_pair_items_node<'a>(node: &'a Node, suffix: &'static str) -> Result> { - let Some(value) = explicit_core_tagged_node(node, suffix) else { - return Ok(None); - }; - match &value.value { - NodeValue::Sequence(items) => Ok(Some(items)), - _ => Err(Error::data( - format!("expected sequence for explicit !!{suffix}"), - Some(value.span), - )), - } -} - -fn take_yaml11_pair_items_node(node: Node, suffix: &'static str) -> Option> { - let value = take_explicit_core_tagged_node(node, suffix)?; - match value.value { - NodeValue::Sequence(items) => Some(items), - _ => None, - } -} - -fn validate_yaml11_omap_node_keys(items: &[Node]) -> Result<()> { - let mut seen = DuplicateKeyTracker::new(); - for item in items { - let (key, _) = yaml11_singleton_pair_node(item, "omap")?; - check_duplicate_with_tracker_at_depth_limit( - &mut seen, - key, - 1, - Some(crate::schema::DEFAULT_MAX_NESTING_DEPTH), - )?; - } - Ok(()) -} - -fn yaml11_set_key_nodes(entries: Vec<(Node, Node)>) -> Result> { - entries - .into_iter() - .map(|(key, value)| { - ensure_yaml11_set_null_node(&value)?; - Ok(key) - }) - .collect() -} - -fn ensure_yaml11_set_null_node(value: &Node) -> Result<()> { - if prepared_is_null_node(value) { - Ok(()) - } else { - Err(Error::data( - "expected explicit !!set entry value to be null", - Some(value.span), - )) - } -} - -fn yaml11_pair_sequence_nodes(items: Vec, suffix: &'static str) -> Result> { - items - .into_iter() - .map(|item| { - let span = item.span; - let (key, value) = take_yaml11_singleton_pair_node(item, suffix)?; - Ok(Node::new(NodeValue::Sequence(vec![key, value]), span)) - }) - .collect() -} - -fn yaml11_pair_entries(items: Vec, suffix: &'static str) -> Result> { - items - .into_iter() - .map(|item| take_yaml11_singleton_pair_node(item, suffix)) - .collect() -} - -fn yaml11_singleton_pair_node<'a>( - node: &'a Node, - suffix: &'static str, -) -> Result<(&'a Node, &'a Node)> { - let node = prepared_untag_node(node); - match &node.value { - NodeValue::Mapping(entries) if entries.len() == 1 => Ok((&entries[0].0, &entries[0].1)), - NodeValue::Mapping(_) => Err(Error::data( - format!("expected explicit !!{suffix} entry to contain exactly one pair"), - Some(node.span), - )), - _ => Err(Error::data( - format!("expected single-pair mapping entry for explicit !!{suffix}"), - Some(node.span), - )), - } -} - -fn take_yaml11_singleton_pair_node(node: Node, suffix: &'static str) -> Result<(Node, Node)> { - let node = prepared_untag_node_owned(node); - match node.value { - NodeValue::Mapping(entries) if entries.len() == 1 => { - let mut entries = entries.into_iter(); - entries.next().ok_or_else(|| { - Error::data( - "internal: singleton mapping lost its entry", - Some(node.span), - ) - }) - } - NodeValue::Mapping(_) => Err(Error::data( - format!("expected explicit !!{suffix} entry to contain exactly one pair"), - Some(node.span), - )), - _ => Err(Error::data( - format!("expected single-pair mapping entry for explicit !!{suffix}"), - Some(node.span), - )), - } -} - -fn prepared_untag_node(mut node: &Node) -> &Node { - while let NodeValue::Tagged(tagged) = &node.value { - node = &tagged.value; - } - node -} - -fn prepared_untag_node_owned(node: Node) -> Node { - let Node { - value, - span, - source, - } = node; - match value { - NodeValue::Tagged(tagged) => prepared_untag_node_owned(tagged.value), - value => Node { - value, - span, - source, - }, - } -} - -fn prepared_is_null_node(node: &Node) -> bool { - match &node.value { - NodeValue::Null => true, - NodeValue::Tagged(tagged) => prepared_is_null_node(&tagged.value), - _ => false, - } -} - -fn prepared_string_target_text(node: &Node) -> Option<&str> { - match &node.value { - NodeValue::Tagged(tagged) => prepared_string_target_text(&tagged.value), - _ => string_target_text(node), - } -} - -fn visit_scalar_any<'de, V>(node: &Node, input: &'de str, visitor: V) -> Result -where - V: Visitor<'de>, -{ - match &node.value { - NodeValue::Null => visitor.visit_unit(), - NodeValue::Bool(value) => visitor.visit_bool(*value), - NodeValue::Number(number) => visit_any_number(*number, node.span, visitor), - NodeValue::String(value) => { - if let Some(borrowed) = borrowed_event_str(input, node.span, value) { - visitor.visit_borrowed_str(borrowed) - } else { - visitor.visit_str(value) - } - } - NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { - tag: tagged.tag.clone(), - value: tagged.value.clone(), - }), - NodeValue::Sequence(_) | NodeValue::Mapping(_) => Err(type_error("scalar", node)), - } -} - -fn string_target_text(node: &Node) -> Option<&str> { - match &node.value { - NodeValue::String(value) => Some(value), - NodeValue::Null => Some("null"), - NodeValue::Bool(value) => Some(if *value { "true" } else { "false" }), - NodeValue::Number(_) => node.scalar_source().map(|source| source.raw()), - NodeValue::Tagged(tagged) => string_target_text(&tagged.value), - NodeValue::Sequence(_) | NodeValue::Mapping(_) => None, - } -} - -fn borrowed_event_str<'de>(input: &'de str, span: Span, value: &str) -> Option<&'de str> { - let raw = input.get(span.start..span.end)?; - if raw == value { - return Some(raw); - } - let quote = raw.chars().next()?; - if !matches!(quote, '"' | '\'') || !raw.ends_with(quote) || raw.len() < 2 { - return None; - } - let inner = &raw[quote.len_utf8()..raw.len() - quote.len_utf8()]; - (inner == value).then_some(inner) -} - -fn path_segment_for_node(node: &Node) -> ErrorPathSegment { - match &node.value { - NodeValue::String(value) => ErrorPathSegment::Key(value.clone()), - NodeValue::Bool(value) => ErrorPathSegment::ScalarKey(value.to_string()), - NodeValue::Number(number) => ErrorPathSegment::ScalarKey(number.to_string()), - NodeValue::Null => ErrorPathSegment::ScalarKey("null".to_string()), - NodeValue::Sequence(_) | NodeValue::Mapping(_) | NodeValue::Tagged(_) => { - ErrorPathSegment::ComplexKey - } - } -} - -fn with_span(result: Result, span: Span) -> Result { - result.map_err(|error| error.with_span_if_missing(span)) -} - -fn visit_i64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result -where - V: Visitor<'de>, -{ - match number { - Number::Integer(value) => match i64::try_from(value) { - Ok(value) => with_span(visitor.visit_i64(value), span), - Err(_) => Err(Error::data( - "integer scalar is out of range for i64", - Some(span), - )), - }, - Number::Unsigned(value) => match i64::try_from(value) { - Ok(value) => with_span(visitor.visit_i64(value), span), - Err(_) => Err(Error::data( - "integer scalar is out of range for i64", - Some(span), - )), - }, - Number::Float(_) => Err(Error::data("expected integer, found float", Some(span))), - } -} - -fn visit_u64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result -where - V: Visitor<'de>, -{ - match number { - Number::Integer(value) if value >= 0 => match u64::try_from(value) { - Ok(value) => with_span(visitor.visit_u64(value), span), - Err(_) => Err(Error::data( - "integer scalar is out of range for u64", - Some(span), - )), - }, - Number::Unsigned(value) => match u64::try_from(value) { - Ok(value) => with_span(visitor.visit_u64(value), span), - Err(_) => Err(Error::data( - "integer scalar is out of range for u64", - Some(span), - )), - }, - Number::Integer(_) => Err(Error::data( - "expected unsigned integer, found integer", - Some(span), - )), - Number::Float(_) => Err(Error::data( - "expected unsigned integer, found float", - Some(span), - )), - } -} - -fn visit_i128_number<'de, V>(number: Number, span: Span, visitor: V) -> Result -where - V: Visitor<'de>, -{ - match number { - Number::Integer(value) => with_span(visitor.visit_i128(value), span), - Number::Unsigned(value) => match i128::try_from(value) { - Ok(value) => with_span(visitor.visit_i128(value), span), - Err(_) => Err(Error::data( - "integer scalar is out of range for i128", - Some(span), - )), - }, - Number::Float(_) => Err(Error::data("expected integer, found float", Some(span))), - } -} - -fn visit_u128_number<'de, V>(number: Number, span: Span, visitor: V) -> Result -where - V: Visitor<'de>, -{ - match number { - Number::Integer(value) if value >= 0 => { - let value = u128::try_from(value).expect("non-negative i128 fits u128"); - with_span(visitor.visit_u128(value), span) - } - Number::Unsigned(value) => with_span(visitor.visit_u128(value), span), - Number::Integer(_) => Err(Error::data( - "expected unsigned integer, found integer", - Some(span), - )), - Number::Float(_) => Err(Error::data( - "expected unsigned integer, found float", - Some(span), - )), - } -} - -fn visit_f64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result -where - V: Visitor<'de>, -{ - match number { - Number::Integer(value) => with_span(visitor.visit_f64(value as f64), span), - Number::Unsigned(value) => with_span(visitor.visit_f64(value as f64), span), - Number::Float(value) => with_span(visitor.visit_f64(value), span), - } -} - -fn visit_any_number<'de, V>(number: Number, span: Span, visitor: V) -> Result -where - V: Visitor<'de>, -{ - match number { - Number::Integer(value) => match i64::try_from(value) { - Ok(value) => with_span(visitor.visit_i64(value), span), - Err(_) => with_span(visitor.visit_i128(value), span), - }, - Number::Unsigned(value) => match u64::try_from(value) { - Ok(value) => with_span(visitor.visit_u64(value), span), - Err(_) => with_span(visitor.visit_u128(value), span), - }, - Number::Float(value) => with_span(visitor.visit_f64(value), span), - } -} - -fn type_error(expected: &'static str, node: &Node) -> Error { - Error::data( - format!("expected {expected}, found {}", kind_name(&node.value)), - Some(node.span), - ) -} - -fn kind_name(value: &NodeValue) -> &'static str { - match value { - NodeValue::Null => "null", - NodeValue::Bool(_) => "bool", - NodeValue::Number(Number::Integer(_)) => "integer", - NodeValue::Number(Number::Unsigned(_)) => "unsigned integer", - NodeValue::Number(Number::Float(_)) => "float", - NodeValue::String(_) => "string", - NodeValue::Sequence(_) => "sequence", - NodeValue::Mapping(_) => "mapping", - NodeValue::Tagged(_) => "tagged value", - } -} - -fn unexpected_event(expected: &'static str, event: &Event) -> Error { - Error::data( - format!("expected {expected}, found {}", event_kind(event)), - event_span(event), - ) -} - -fn event_kind(event: &Event) -> &'static str { - match event { - Event::StreamStart => "stream start", - Event::StreamEnd => "stream end", - Event::DocumentStart { .. } => "document start", - Event::DocumentEnd { .. } => "document end", - Event::SequenceStart { .. } => "sequence start", - Event::SequenceEnd { .. } => "sequence end", - Event::MappingStart { .. } => "mapping start", - Event::MappingEnd { .. } => "mapping end", - Event::Alias { .. } => "alias", - Event::Scalar { .. } => "scalar", - } -} - -fn event_span(event: &Event) -> Option { - match event { - Event::DocumentStart { span, .. } - | Event::DocumentEnd { span, .. } - | Event::SequenceStart { span, .. } - | Event::SequenceEnd { span } - | Event::MappingStart { span, .. } - | Event::MappingEnd { span } - | Event::Scalar { span, .. } => Some(*span), - Event::Alias { anchor } => Some(anchor.span), - Event::StreamStart | Event::StreamEnd => None, - } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde::{Deserialize, de::IgnoredAny}; - use std::collections::{BTreeMap, BTreeSet}; - use std::io::{self, Cursor, Read}; - - struct FailingAfterPrefixReader { - prefix: Cursor>, - } - - impl FailingAfterPrefixReader { - fn new(prefix: &[u8]) -> Self { - Self { - prefix: Cursor::new(prefix.to_vec()), - } - } - } - - impl Read for FailingAfterPrefixReader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let read = self.prefix.read(buf)?; - if read == 0 { - Err(io::Error::other("stream interrupted")) - } else { - Ok(read) - } - } - } - - #[derive(Debug, Deserialize, PartialEq)] - struct EventConfig<'a> { - name: &'a str, - ports: Vec, - enabled: bool, - labels: BTreeMap, - optional: Option, - } - - #[derive(Debug, Deserialize, PartialEq)] - struct OwnedEventConfig { - name: String, - ports: Vec, - enabled: bool, - labels: BTreeMap, - optional: Option, - } - - #[derive(Debug, Deserialize, PartialEq)] - struct ExplicitCoreScalars { - string_null: String, - optional_string_null: Option, - string_bool: String, - yes: bool, - off: bool, - maybe: Option, - unit: (), - } - - #[derive(Debug, Deserialize, PartialEq)] - struct ExplicitCoreNumbers { - integer: i64, - unsigned: u64, - float: f64, - } - - #[derive(Debug, Deserialize, PartialEq)] - struct TargetMap { - target: BTreeMap, - } - - #[derive(Debug, Deserialize, PartialEq)] - struct TargetValueMap { - target: BTreeMap, - } - - #[derive(Debug, Deserialize, PartialEq)] - struct KnownOnly { - name: String, - } - - fn assert_value_tagged_key( - mapping: &crate::Value, - expected_tag: crate::Tag, - expected_key: &str, - expected_value: &str, - ) { - let mapping = mapping.as_mapping().expect("mapping value"); - assert!( - mapping.iter().any(|(key, value)| { - matches!(key, crate::Value::Tagged(tagged) - if tagged.tag == expected_tag - && tagged.value.as_str() == Some(expected_key) - && value.as_str() == Some(expected_value)) - }), - "expected tagged key {expected_tag:?} {expected_key:?}: {expected_value:?}" - ); - } - - #[test] - fn event_deserializer_reads_typed_structs() { - let input = "\ -name: api -ports: [80, 443] -enabled: true -labels: - tier: backend - release: stable -optional: null -"; - - let parsed: EventConfig<'_> = - from_str_with_options(input, LoadOptions::new()).expect("event-backed typed config"); - assert_eq!(parsed.name, "api"); - assert!(std::ptr::eq(parsed.name.as_ptr(), input[6..9].as_ptr())); - assert_eq!(parsed.ports, vec![80, 443]); - assert!(parsed.enabled); - assert_eq!(parsed.labels["tier"], "backend"); - assert_eq!(parsed.labels["release"], "stable"); - assert_eq!(parsed.optional, None); - } - - #[test] - fn event_deserializer_rejects_duplicate_scalar_keys() { - let input = "labels:\n tier: backend\n tier: worker\n"; - let error = from_str_with_options::>>( - input, - LoadOptions::new(), - ) - .expect_err("event-backed duplicate keys reject"); - assert!(error.to_string().contains("duplicate mapping key")); - } - - #[test] - fn event_deserializer_rejects_duplicate_sequence_alias_mapping_keys() { - let input = "seq: &seq [a, b]\nroot: {? *seq : first, ? [a, b] : second}\n"; - let error = from_str_with_options::(input, LoadOptions::new()) - .expect_err("event-backed alias-expanded sequence keys reject"); - - assert!(error.to_string().contains("duplicate mapping key")); - } - - #[test] - fn event_deserializer_rejects_duplicate_mapping_alias_keys_order_insensitively() { - let input = "base: &base {a: 1, b: 2}\nroot: {? *base : first, ? {b: 2, a: 1} : second}\n"; - let error = from_str_with_options::(input, LoadOptions::new()) - .expect_err("event-backed alias-expanded mapping keys reject"); - - assert!(error.to_string().contains("duplicate mapping key")); - } - - #[test] - fn event_deserializer_accepts_distinct_complex_alias_mapping_keys() { - let input = "seq: &seq [a, b]\nroot: {? *seq : first, ? [a, c] : second}\n"; - - from_str_with_options::(input, LoadOptions::new()) - .expect("distinct complex alias keys pass duplicate preflight"); - } - - #[test] - fn event_deserializer_rejects_recursive_alias_mapping_keys() { - let input = "root: {? &self [*self] : value}\n"; - let error = from_str_with_options::(input, LoadOptions::new()) - .expect_err("recursive alias key rejects"); - - assert!(error.to_string().contains("recursive alias")); - } - - #[test] - fn event_deserializer_rejects_complex_alias_mapping_keys_over_budget() { - let input = "seq: &seq [a, b]\nroot: {? *seq : first}\n"; - let error = from_str_with_options::( - input, - LoadOptions::new().max_alias_expansion_nodes(1), - ) - .expect_err("complex alias key replay budget rejects"); - - assert!( - error - .to_string() - .contains("alias event replay limit exceeded") - ); - } - - #[test] - fn event_deserializer_expands_merge_keys() { - let input = "\ -base: &base - retries: 3 - command: deploy -target: - <<: *base - command: smoke -"; - let parsed = - from_str_with_options::(input, LoadOptions::new()).expect("merge keys"); - - assert_eq!(parsed.target["retries"], "3"); - assert_eq!(parsed.target["command"], "smoke"); - } - - #[test] - fn event_deserializer_expands_merge_lists_with_earlier_sources_winning() { - let input = "\ -base1: &base1 {a: one, shared: first} -base2: &base2 {b: two, shared: second} -target: {<<: [*base1, *base2], local: ok} -"; - let parsed = - from_str_with_options::(input, LoadOptions::new()).expect("merge list"); - - assert_eq!(parsed.target["a"], "one"); - assert_eq!(parsed.target["b"], "two"); - assert_eq!(parsed.target["shared"], "first"); - assert_eq!(parsed.target["local"], "ok"); - } - - #[test] - fn event_deserializer_expands_explicit_merge_tag_keys() { - let input = "\ -%TAG !m! tag:yaml.org,2002: ---- -base: &base {a: one, shared: base} -tagged: {!!merge <<: *base, shared: tagged} -canonical: {! <<: *base, shared: canonical} -handle: {!m!merge <<: *base, shared: handle} -"; - let parsed = from_str_with_options::>>( - input, - LoadOptions::new(), - ) - .expect("explicit merge tag keys"); - - for (key, expected_shared) in [ - ("tagged", "tagged"), - ("canonical", "canonical"), - ("handle", "handle"), - ] { - assert_eq!(parsed[key]["a"], "one"); - assert_eq!(parsed[key]["shared"], expected_shared); - } - } - - #[test] - fn event_deserializer_keeps_explicit_string_merge_key_literal() { - let input = "base: &base {!!str <<: literal, a: one}\ntarget: {<<: *base}\n"; - let parsed = from_str_with_options::(input, LoadOptions::new()) - .expect("explicit string merge key stays literal"); - - assert_eq!(parsed.target["a"], "one"); - assert_eq!(parsed.target["<<"], "literal"); - } - - #[test] - fn event_deserializer_reports_invalid_merge_payloads() { - let input = "target: {<<: scalar}\n"; - let error = from_str_with_options::(input, LoadOptions::new()) - .expect_err("invalid merge payload rejects"); - - assert!( - error - .to_string() - .contains("expected a mapping or list of mappings for merging"), - "{error}" - ); - } - - #[test] - fn event_deserializer_skips_valid_merge_maps_for_ignored_values() { - let input = "base: &base {a: one}\nname: app\nignored: {<<: *base, b: two}\n"; - let parsed = from_str_with_options::(input, LoadOptions::new()) - .expect("unknown merge-bearing field is skipped"); - - assert_eq!(parsed.name, "app"); - from_str_with_options::(input, LoadOptions::new()) - .expect("ignored-any skips merge-bearing maps"); - } - - #[test] - fn event_deserializer_rejects_invalid_merge_payloads_in_ignored_values() { - let input = "name: app\nignored: {<<: scalar}\n"; - let error = from_str_with_options::(input, LoadOptions::new()) - .expect_err("strict invalid merge payload rejects while skipping"); - - assert!( - error - .to_string() - .contains("expected a mapping or list of mappings for merging"), - "{error}" - ); - } - - #[test] - fn event_deserializer_yaml11_skips_literal_merge_payload_in_ignored_value() { - let input = "%YAML 1.1\n---\nname: app\nignored: {<<: scalar, keep: value}\n"; - let parsed = - from_str_with_options::(input, LoadOptions::yaml_version_directive()) - .expect("directive-driven YAML 1.1 literal merge payload is skipped"); - - assert_eq!(parsed.name, "app"); - } - - #[test] - fn event_deserializer_rejects_repeated_merge_keys_by_default() { - let input = "\ -first: &first {shared: first} -second: &second {shared: second} -target: - <<: *first - !!merge <<: *second -"; - let error = from_str_with_options::(input, LoadOptions::new()) - .expect_err("default repeated merge keys reject"); - - assert!(error.to_string().contains("duplicate mapping key `<<`")); - } - - #[test] - fn event_deserializer_yaml11_recovers_repeated_merge_keys() { - let input = "\ -first: &first {shared: first, retries: 3} -second: &second {shared: second, timeout: 10} -target: - <<: *first - ! <<: *second - keep: value -"; - let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) - .expect("YAML 1.1 repeated merge keys recover"); - - assert_eq!(parsed.target["shared"], "second"); - assert_eq!(parsed.target["retries"], "3"); - assert_eq!(parsed.target["timeout"], "10"); - assert_eq!(parsed.target["keep"], "value"); - } - - #[test] - fn event_deserializer_yaml11_keeps_scalar_merge_payload_literal() { - let input = "\ -target: - <<: scalar - keep: value -"; - let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) - .expect("YAML 1.1 scalar merge payload stays literal"); - - assert_eq!(parsed.target["<<"], "scalar"); - assert_eq!(parsed.target["keep"], "value"); - } - - #[test] - fn event_deserializer_yaml11_keeps_mixed_invalid_merge_list_literal() { - let input = "\ -base: &base {a: 1} -target: - <<: [*base, scalar] - keep: value -"; - let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) - .expect("YAML 1.1 mixed invalid merge list stays literal"); - let merge = &parsed.target["<<"]; - let merge = merge.as_sequence().expect("literal merge list"); - - assert_eq!(merge[0]["a"].as_u64(), Some(1)); - assert_eq!(merge[1].as_str(), Some("scalar")); - assert_eq!(parsed.target["keep"].as_str(), Some("value")); - } - - #[test] - fn event_deserializer_reads_explicit_core_scalar_tags() { - let input = "\ -string_null: !!str null -optional_string_null: !!str null -string_bool: !!str true -yes: !!bool YES -off: !!bool off -maybe: !!null null -unit: !!null ~ -"; - let parsed = - from_str_with_options::(input, LoadOptions::new()).unwrap(); - - assert_eq!( - parsed, - ExplicitCoreScalars { - string_null: "null".to_string(), - optional_string_null: Some("null".to_string()), - string_bool: "true".to_string(), - yes: true, - off: false, - maybe: None, - unit: (), - } - ); - } - - #[test] - fn event_deserializer_reads_explicit_core_numeric_tags() { - let input = "integer: !!int \"42\"\nunsigned: !!int 0x2A\nfloat: !!float \"1.5\"\n"; - let parsed = - from_str_with_options::(input, LoadOptions::new()).unwrap(); - - assert_eq!( - parsed, - ExplicitCoreNumbers { - integer: 42, - unsigned: 42, - float: 1.5, - } - ); - } - - #[test] - fn event_deserializer_explicit_tags_follow_directive_schema() { - let parsed = from_str_with_options::( - "%YAML 1.1\n--- !!bool YES\n", - LoadOptions::yaml_version_directive(), - ) - .expect("directive-driven explicit bool"); - - assert!(parsed); - } - - #[test] - fn event_deserializer_rejects_invalid_explicit_core_scalar_tags() { - let bool_error = from_str_with_options::("!!bool maybe\n", LoadOptions::new()) - .expect_err("invalid explicit bool"); - assert!( - bool_error - .to_string() - .contains("failed to parse explicit !!bool scalar"), - "{bool_error}" - ); - - let str_error = from_str_with_options::("!!str 7\n", LoadOptions::new()) - .expect_err("explicit string does not coerce to integer"); - assert!(str_error.to_string().contains("expected integer")); - } - - #[test] - fn event_deserializer_retains_tagged_scalars_for_value_and_unwraps_typed_strings() { - let value = from_str_with_options::("!Thing tagged\n", LoadOptions::new()) - .expect("custom tagged scalar value"); - let tagged = value.as_tagged().expect("custom tag retained"); - - assert_eq!(tagged.tag, crate::Tag::new("Thing")); - assert_eq!(tagged.value.as_str(), Some("tagged")); - - let typed = from_str_with_options::("!Thing tagged\n", LoadOptions::new()) - .expect("typed string unwraps custom tag"); - assert_eq!(typed, "tagged"); - - let explicit = from_str_with_options::("!!str null\n", LoadOptions::new()) - .expect("explicit core string tag value"); - let tagged = explicit.as_tagged().expect("explicit core tag retained"); - assert_eq!(tagged.tag, crate::Tag::new("!!str")); - assert_eq!(tagged.value.as_str(), Some("null")); - } - - #[test] - fn event_deserializer_retains_tagged_collections_for_value_and_unwraps_typed_targets() { - #[derive(Debug, Deserialize, PartialEq)] - struct TaggedCollections { - seq: Vec, - map: BTreeMap, - } - - let input = "seq: !Seq [a, b]\nmap: !Map {k: v}\n"; - let value = - from_str_with_options::(input, LoadOptions::new()).expect("value"); - - let sequence = value["seq"].as_tagged().expect("sequence tag retained"); - assert_eq!(sequence.tag, crate::Tag::new("Seq")); - assert_eq!( - sequence - .value - .as_sequence() - .expect("sequence payload") - .len(), - 2 - ); - assert_eq!(sequence.value[0].as_str(), Some("a")); - assert_eq!(sequence.value[1].as_str(), Some("b")); - - let mapping = value["map"].as_tagged().expect("mapping tag retained"); - assert_eq!(mapping.tag, crate::Tag::new("Map")); - assert_eq!(mapping.value["k"].as_str(), Some("v")); - - let typed = from_str_with_options::(input, LoadOptions::new()) - .expect("typed collections unwrap tags"); - assert_eq!( - typed, - TaggedCollections { - seq: vec!["a".to_string(), "b".to_string()], - map: BTreeMap::from([("k".to_string(), "v".to_string())]), - } - ); - - let top_value = from_str_with_options::("!Seq [a, b]\n", LoadOptions::new()) - .expect("top-level tagged sequence value"); - let tagged = top_value.as_tagged().expect("top-level tag retained"); - assert_eq!(tagged.tag, crate::Tag::new("Seq")); - assert_eq!(tagged.value[1].as_str(), Some("b")); - - let top_typed = from_str_with_options::>("!Seq [a, b]\n", LoadOptions::new()) - .expect("top-level typed sequence unwraps tag"); - assert_eq!(top_typed, ["a", "b"]); - } - - #[test] - fn event_deserializer_projects_yaml11_collection_tags_for_typed_targets() { - let set = from_str_with_options::>( - "!!set\n? alpha\n? beta\n", - LoadOptions::new(), - ) - .expect("typed !!set"); - assert_eq!( - set, - BTreeSet::from(["alpha".to_string(), "beta".to_string()]) - ); - - let omap_pairs = from_str_with_options::>( - "!!omap\n- first: 1\n- second: 2\n", - LoadOptions::new(), - ) - .expect("typed !!omap pair sequence"); - assert_eq!( - omap_pairs, - vec![("first".to_string(), 1), ("second".to_string(), 2)] - ); - - let omap_map = from_str_with_options::>( - "!!omap\n- second: 2\n- first: 1\n", - LoadOptions::new(), - ) - .expect("typed !!omap map"); - assert_eq!( - omap_map, - BTreeMap::from([("first".to_string(), 1), ("second".to_string(), 2)]) - ); - - let pairs = from_str_with_options::>( - "!!pairs\n- repeat: 1\n- repeat: 2\n", - LoadOptions::new(), - ) - .expect("typed !!pairs preserves duplicate keys"); - assert_eq!( - pairs, - vec![("repeat".to_string(), 1), ("repeat".to_string(), 2)] - ); - } - - #[test] - fn event_deserializer_rejects_lossy_yaml11_collection_tag_shapes() { - let duplicate = from_str_with_options::>( - "!!omap\n- z: 1\n- a: 2\n- z: 3\n", - LoadOptions::new(), - ) - .expect_err("typed !!omap map rejects duplicate keys"); - assert!(duplicate.to_string().contains("duplicate mapping key `z`")); - - let set_error = - from_str_with_options::>("!!set {alpha: true}\n", LoadOptions::new()) - .expect_err("typed !!set rejects non-null values"); - assert!( - set_error - .to_string() - .contains("expected explicit !!set entry value to be null"), - "{set_error}" - ); - - let omap_error = from_str_with_options::>( - "!!omap\n- {a: 1, b: 2}\n", - LoadOptions::new(), - ) - .expect_err("typed !!omap rejects multi-pair entries"); - assert!( - omap_error - .to_string() - .contains("expected explicit !!omap entry to contain exactly one pair"), - "{omap_error}" - ); - - let pairs_error = - from_str_with_options::>("!!pairs\n- scalar\n", LoadOptions::new()) - .expect_err("typed !!pairs rejects scalar entries"); - assert!( - pairs_error - .to_string() - .contains("expected single-pair mapping entry for explicit !!pairs"), - "{pairs_error}" - ); - } - - #[test] - fn event_deserializer_retains_tagged_merge_maps_for_value_and_unwraps_typed_targets() { - let input = "base: &base {a: one}\ntarget: !Thing {<<: *base, b: two}\n"; - let value = from_str_with_options::(input, LoadOptions::new()) - .expect("tagged merge map value"); - let tagged = value["target"].as_tagged().expect("target tag retained"); - - assert_eq!(tagged.tag, crate::Tag::new("Thing")); - assert_eq!(tagged.value["a"].as_str(), Some("one")); - assert_eq!(tagged.value["b"].as_str(), Some("two")); - - let typed = from_str_with_options::(input, LoadOptions::new()) - .expect("typed tagged merge map unwraps tag"); - assert_eq!(typed.target["a"], "one"); - assert_eq!(typed.target["b"], "two"); - } - - #[test] - fn event_deserializer_retains_tagged_literal_merge_keys_without_expansion() { - let input = "\ -custom: {!Thing <<: literal, image: app:custom} -string: {!!str <<: literal, image: app:string} -"; - let value = - from_str_with_options::(input, LoadOptions::new()).expect("tagged keys"); - - assert_value_tagged_key(&value["custom"], crate::Tag::new("Thing"), "<<", "literal"); - assert_value_tagged_key(&value["string"], crate::Tag::new("!!str"), "<<", "literal"); - assert_eq!(value["custom"]["image"].as_str(), Some("app:custom")); - assert_eq!(value["string"]["image"].as_str(), Some("app:string")); - - let typed = from_str_with_options::>>( - input, - LoadOptions::new(), - ) - .expect("typed maps unwrap tagged literal keys"); - assert_eq!(typed["custom"]["<<"], "literal"); - assert_eq!(typed["string"]["<<"], "literal"); - } - - #[test] - fn event_deserializer_replays_acyclic_scalar_aliases() { - let input = "base: &base api\nservice: *base\n"; - let parsed = from_str_with_options::>(input, LoadOptions::new()) - .expect("event-backed scalar alias replay"); - - assert_eq!(parsed["base"], "api"); - assert_eq!(parsed["service"], "api"); - } - - #[test] - fn event_deserializer_replays_acyclic_sequence_aliases() { - let input = "base: &base [api, worker]\nservice: *base\n"; - let parsed = - from_str_with_options::>>(input, LoadOptions::new()) - .expect("event-backed sequence alias replay"); - - assert_eq!(parsed["base"], ["api", "worker"]); - assert_eq!(parsed["service"], ["api", "worker"]); - } - - #[test] - fn event_deserializer_validates_alias_expanded_mapping_values() { - let input = "base: &base {a: one, b: two}\ntarget: *base\n"; - let parsed = - from_str_with_options::(input, LoadOptions::new()).expect("mapping alias"); - - assert_eq!(parsed.target["a"], "one"); - assert_eq!(parsed.target["b"], "two"); - } - - #[test] - fn event_deserializer_replays_scalar_alias_mapping_keys() { - let input = "root: {anchor: &svc service, ? *svc : api}\n"; - let parsed = from_str_with_options::>>( - input, - LoadOptions::new(), - ) - .expect("event-backed scalar alias mapping key replay"); - - assert_eq!(parsed["root"]["anchor"], "service"); - assert_eq!(parsed["root"]["service"], "api"); - } - - #[test] - fn event_deserializer_rejects_duplicate_alias_mapping_keys() { - let input = "root: {? &name name : api, ? *name : worker}\n"; - let error = from_str_with_options::>>( - input, - LoadOptions::new(), - ) - .expect_err("event-backed alias-expanded duplicate keys reject"); - assert!(error.to_string().contains("duplicate mapping key")); - } - - #[test] - fn event_deserializer_rejects_alias_replay_over_budget() { - let input = "base: &base api\nservice: *base\n"; - let error = from_str_with_options::>( - input, - LoadOptions::new().max_alias_expansion_nodes(0), - ) - .expect_err("event-backed alias replay budget rejects"); - - assert!( - error - .to_string() - .contains("alias event replay limit exceeded") - ); - } - - #[test] - fn event_deserializer_rejects_duplicate_keys_in_ignored_mappings() { - let input = "base: &base {a: one, a: two}\ntarget: *base\n"; - let error = from_str_with_options::(input, LoadOptions::new()) - .expect_err("ignored anchor source duplicate keys reject"); - - assert!(error.to_string().contains("duplicate mapping key")); - } - - #[test] - fn event_deserializer_reads_multiple_documents() { - let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [8080]\nenabled: false\nlabels:\n tier: job\noptional: note\n"; - let parsed: Vec = - from_documents_str_with_options(input, LoadOptions::new()) - .expect("event-backed document stream"); - - assert_eq!(parsed.len(), 2); - assert_eq!(parsed[0].name, "api"); - assert_eq!(parsed[0].ports, vec![80]); - assert_eq!(parsed[1].name, "worker"); - assert_eq!(parsed[1].ports, vec![8080]); - assert_eq!(parsed[1].labels["tier"], "job"); - assert_eq!(parsed[1].optional.as_deref(), Some("note")); - } - - #[test] - fn event_document_iterator_yields_borrowed_typed_documents() { - let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [8080]\nenabled: false\nlabels: {}\noptional: null\n"; - let mut iter = document_iter_str_with_options::>(input, LoadOptions::new()) - .expect("event-backed document iterator"); - - let first = iter.next().expect("first document").expect("first parses"); - assert_eq!(first.name, "api"); - assert!(std::ptr::eq(first.name.as_ptr(), input[10..13].as_ptr())); - - let second = iter - .next() - .expect("second document") - .expect("second parses"); - assert_eq!(second.name, "worker"); - let worker_offset = input.find("worker").expect("worker text in input"); - assert!(std::ptr::eq( - second.name.as_ptr(), - input[worker_offset..worker_offset + "worker".len()].as_ptr() - )); - assert!(iter.next().is_none()); - } - - #[test] - fn event_document_iterator_continues_after_typed_document_error() { - let input = "\ ---- -name: api -ports: [80] -enabled: true -labels: {} -optional: null ---- -name: bad -ports: [70000] -enabled: true -labels: {} -optional: null ---- -name: worker -ports: [8080] -enabled: false -labels: {} -optional: null -"; - let mut iter = - document_iter_str_with_options::(input, LoadOptions::new()) - .expect("event-backed document iterator"); - - let first = iter.next().expect("first document").expect("first parses"); - assert_eq!(first.name, "api"); - - let error = iter - .next() - .expect("second document") - .expect_err("second document has typed range error"); - assert_eq!(error.document_index(), Some(1)); - assert!(error.to_string().contains("70000"), "{error}"); - - let third = iter.next().expect("third document").expect("third parses"); - assert_eq!(third.name, "worker"); - assert!(iter.next().is_none()); - } - - #[test] - fn event_document_iterator_defers_later_parse_error_and_then_stops() { - let input = "---\nname: one\n---\n:\tbad\n---\nname: never\n"; - let mut iter = document_iter_str_with_options::(input, LoadOptions::new()) - .expect("event-backed document iterator"); - - let first = iter.next().expect("first document").expect("first parses"); - assert_eq!(first.name, "one"); - - let error = iter - .next() - .expect("second document item") - .expect_err("later parser error"); - assert_eq!(error.document_index(), Some(1)); - assert_eq!(error.line(), Some(4)); - assert_eq!(error.column(), Some(2)); - assert!(iter.next().is_none()); - } - - #[test] - fn event_document_iterator_empty_stream_yields_no_documents() { - let mut iter = document_iter_str_with_options::("", LoadOptions::new()) - .expect("empty event-backed document iterator"); - - assert!(iter.next().is_none()); - let collected = from_documents_str_with_options::("", LoadOptions::new()) - .expect("empty document collection"); - assert!(collected.is_empty()); - } - - #[test] - fn event_document_iterator_slice_checks_utf8_and_input_limits() { - let invalid = match document_iter_slice_with_options::( - b"name: \xFF\n", - LoadOptions::new(), - ) { - Ok(_) => panic!("invalid UTF-8 should fail"), - Err(error) => error, - }; - assert!(invalid.to_string().contains("input is not valid UTF-8")); - - let limited = match document_iter_slice_with_options::( - b"name: app\n", - LoadOptions::new().max_input_bytes(4), - ) { - Ok(_) => panic!("input limit should fail"), - Err(error) => error, - }; - assert!( - limited - .to_string() - .contains("YAML input exceeds configured limit of 4 bytes") - ); - } - - #[test] - fn event_document_reader_iterator_uses_owned_input_and_preserves_merge_alias_semantics() { - let input = "\ ---- -base: &base {a: one} -target: {<<: *base, b: two} ---- -base: &base {a: three} -target: *base -"; - let docs = document_iter_reader_with_options::( - Cursor::new(input.as_bytes()), - LoadOptions::new(), - ) - .expect("reader-backed event iterator") - .collect::>>() - .expect("reader-backed documents"); - - assert_eq!(docs.len(), 2); - assert_eq!(docs[0].target["a"], "one"); - assert_eq!(docs[0].target["b"], "two"); - assert_eq!(docs[1].target["a"], "three"); - } - - #[test] - fn event_document_reader_iterator_reports_read_errors_before_iteration() { - let error = match document_iter_reader_with_options::( - FailingAfterPrefixReader::new(b"name: api\n"), - LoadOptions::new(), - ) { - Ok(_) => panic!("reader failure should reject iterator construction"), - Err(error) => error, - }; - - assert!(error.to_string().contains("failed to read YAML input")); - assert_eq!(error.location(), None); - } - - #[test] - fn event_deserializer_document_errors_carry_document_index() { - let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [70000]\nenabled: true\nlabels: {}\noptional: null\n"; - let error = from_documents_str_with_options::(input, LoadOptions::new()) - .expect_err("event-backed stream reports second document error"); - assert_eq!(error.document_index(), Some(1)); - } - - #[test] - fn event_deserializer_skips_ignored_any_without_materializing_values() { - let input = "root:\n - name: api\n ports: [80, 443]\n - nested:\n ok: true\n"; - IgnoredAny::deserialize(EventNodeDeserializer { - source: &mut EventSource::new( - input, - crate::parse::EventStream::from_str(input) - .expect("event stream") - .collect::>>() - .expect("events"), - Schema::Yaml12, - LoadOptions::new().alias_expansion_budget(input.len()), - LoadOptions::new().selected_max_nesting_depth(), - ), - }) - .expect_err("raw stream markers must still be explicit"); - - from_str_with_options::(input, LoadOptions::new()).expect("ignored any"); - } - - fn alias_depth_chain(levels: usize) -> String { - // A literally shallow document (max nesting depth 2) whose final anchor - // expands, via the alias chain, to a structure `levels` deep. - let mut input = String::from("- &n0 0\n"); - for k in 1..levels { - input.push_str(&format!("- &n{k} [*n{prev}]\n", prev = k - 1)); - } - input - } - - #[test] - fn event_deserializer_bounds_alias_expansion_depth() { - // The event-backed path expands aliases lazily while walking, so the - // parser's literal-depth check does not bound the expanded depth. Without - // an explicit ceiling this recurses until the stack overflows; it must - // instead reject, matching the tree-backed `AnchorTable::resolve` guard. - let input = alias_depth_chain(400); - let error = from_str_with_options::>(&input, LoadOptions::new()) - .expect_err("deep alias chain must hit the nesting-depth ceiling"); - assert!( - error.to_string().contains("nesting depth"), - "unexpected error: {error}" - ); - } - - #[test] - fn event_deserializer_allows_alias_chain_within_depth_limit() { - let input = alias_depth_chain(8); - let parsed = from_str_with_options::>(&input, LoadOptions::new()) - .expect("alias chain within the depth limit deserializes"); - assert_eq!(parsed.len(), 8); - } - - #[test] - fn event_deserializer_reads_map_form_enum_variants() { - // Externally-tagged enum variants carrying a payload — the forms the - // earlier scalar-only path rejected. Covers unit, newtype, tuple, and - // struct variants in one sequence. - #[derive(Debug, Deserialize, PartialEq)] - enum EventEnum { - Unit, - Newtype(u32), - Tuple(u8, u8), - Struct { width: u32, height: u32 }, - } - - let input = "\ -- Unit -- Newtype: 7 -- Tuple: [1, 2] -- Struct: - width: 3 - height: 4 -"; - let parsed: Vec = - from_str_with_options(input, LoadOptions::new()).expect("event-backed enum variants"); - assert_eq!( - parsed, - vec![ - EventEnum::Unit, - EventEnum::Newtype(7), - EventEnum::Tuple(1, 2), - EventEnum::Struct { - width: 3, - height: 4, - }, - ] - ); - } - - #[test] - fn event_deserializer_reads_map_form_enum_variant_through_alias() { - #[derive(Debug, Deserialize, PartialEq)] - enum Mode { - Tuned { level: u8 }, - } - - // The anchored definition and the alias must both resolve to the same - // map-form variant. - let parsed = from_str_with_options::>( - "- &m {Tuned: {level: 9}}\n- *m\n", - LoadOptions::new(), - ) - .expect("aliased map-form enum variant"); - assert_eq!( - parsed, - vec![Mode::Tuned { level: 9 }, Mode::Tuned { level: 9 }] - ); - } -} diff --git a/src/event_de/mod.rs b/src/event_de/mod.rs new file mode 100644 index 0000000..d382642 --- /dev/null +++ b/src/event_de/mod.rs @@ -0,0 +1,313 @@ +#![allow(dead_code)] +// Compiled work-in-progress: this module is exercised by unit tests before it is +// wired into public Serde entrypoints. + +use crate::{ + Error, ErrorPathSegment, Node, NodeValue, Number, Result, Span, Tag, TaggedNode, + error::utf8_error_span, + key_identity::{DuplicateKeyTracker, check_duplicate_with_tracker_at_depth_limit}, + parse::{ + Event, EventMeta, ScalarStyle, merge_policy_for_schema, parse_scalar_with_schema, + schema_for_directives, + }, + schema::{LoadOptions, Schema}, +}; +use serde::de::{ + self, DeserializeOwned, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, + VariantAccess, Visitor, +}; +use std::{collections::HashMap, io::Read, marker::PhantomData}; + +pub(crate) fn from_str_with_options<'de, T>(input: &'de str, options: LoadOptions) -> Result +where + T: serde::Deserialize<'de>, +{ + let configured_schema = options.selected_schema(); + let replay_budget = options.alias_expansion_budget(input.len()); + let max_nesting_depth = options.selected_max_nesting_depth(); + let events = crate::parse::EventStream::from_str_with_options(input, options)? + .collect::>>()?; + let mut source = EventSource::new( + input, + events, + configured_schema, + replay_budget, + max_nesting_depth, + ); + source.enter_stream()?; + source.enter_document()?; + let value = T::deserialize(EventNodeDeserializer { + source: &mut source, + })?; + source.finish_document()?; + match source.peek() { + Some(Event::StreamEnd) => Ok(value), + Some(Event::DocumentStart { .. }) => Err(Error::data( + "expected single YAML document, found multiple documents", + None, + )), + Some(event) => Err(unexpected_event("stream end", event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } +} + +pub(crate) fn from_documents_str_with_options( + input: &str, + options: LoadOptions, +) -> Result> +where + T: DeserializeOwned, +{ + document_iter_str_with_options(input, options)?.collect() +} + +pub(crate) fn document_iter_str_with_options<'de, T>( + input: &'de str, + options: LoadOptions, +) -> Result> +where + T: serde::Deserialize<'de>, +{ + let configured_schema = options.selected_schema(); + let replay_budget = options.alias_expansion_budget(input.len()); + let max_nesting_depth = options.selected_max_nesting_depth(); + Ok(EventDocumentIter { + input, + frames: EventDocumentFrames::from_str_with_options(input, options)?, + configured_schema, + replay_budget, + max_nesting_depth, + _marker: PhantomData, + }) +} + +pub(crate) fn document_iter_slice_with_options<'de, T>( + input: &'de [u8], + options: LoadOptions, +) -> Result> +where + T: serde::Deserialize<'de>, +{ + options.check_input_len(input.len())?; + let input = std::str::from_utf8(input) + .map_err(|err| Error::encoding("input is not valid UTF-8", utf8_error_span(input, err)))?; + document_iter_str_with_options(input, options) +} + +pub(crate) fn document_iter_reader_with_options( + reader: R, + options: LoadOptions, +) -> Result> +where + T: DeserializeOwned, + R: Read, +{ + let bytes = crate::de::read_to_end_with_options(reader, options)?; + let input = String::from_utf8(bytes).map_err(|err| { + Error::encoding( + "input is not valid UTF-8", + utf8_error_span(err.as_bytes(), err.utf8_error()), + ) + })?; + let configured_schema = options.selected_schema(); + let replay_budget = options.alias_expansion_budget(input.len()); + let max_nesting_depth = options.selected_max_nesting_depth(); + let frames = EventDocumentFrames::from_str_with_options(&input, options)?; + Ok(OwnedEventDocumentIter { + input, + frames, + configured_schema, + replay_budget, + max_nesting_depth, + _marker: PhantomData, + }) +} + +pub(crate) struct EventDocumentIter<'de, T> { + input: &'de str, + frames: EventDocumentFrames, + configured_schema: Schema, + replay_budget: usize, + max_nesting_depth: Option, + _marker: PhantomData, +} + +impl<'de, T> Iterator for EventDocumentIter<'de, T> +where + T: serde::Deserialize<'de>, +{ + type Item = Result; + + fn next(&mut self) -> Option { + let (index, frame) = self.frames.next_frame()?; + Some( + frame + .and_then(|events| { + deserialize_document_frame( + self.input, + events, + self.configured_schema, + self.replay_budget, + self.max_nesting_depth, + ) + }) + .map_err(|error| error.with_document_index(index)), + ) + } +} + +pub(crate) struct OwnedEventDocumentIter { + input: String, + frames: EventDocumentFrames, + configured_schema: Schema, + replay_budget: usize, + max_nesting_depth: Option, + _marker: PhantomData, +} + +impl Iterator for OwnedEventDocumentIter +where + T: DeserializeOwned, +{ + type Item = Result; + + fn next(&mut self) -> Option { + let (index, frame) = self.frames.next_frame()?; + Some( + frame + .and_then(|events| { + deserialize_document_frame( + &self.input, + events, + self.configured_schema, + self.replay_budget, + self.max_nesting_depth, + ) + }) + .map_err(|error| error.with_document_index(index)), + ) + } +} + +mod prepared; +mod serde_impl; +mod source; + +#[cfg(test)] +mod tests; + +use self::prepared::{event_span, unexpected_event}; +use self::serde_impl::EventNodeDeserializer; +use self::source::{EventDocumentFrames, EventSource, deserialize_document_frame, skip_node_in}; + +fn span_union(start: Span, end: Span) -> Span { + Span::new(start.start, end.end, start.line, start.column) +} + +fn tagged_key_node(tag: crate::Tag, tag_span: Span, value: Node) -> Node { + let span = span_union(tag_span, value.span); + Node::new( + NodeValue::Tagged(Box::new(TaggedNode { + tag, + tag_span, + value, + })), + span, + ) +} + +fn apply_event_tag(meta: &EventMeta, node: Node) -> Node { + let Some(tag) = &meta.tag else { + return node; + }; + if tag.tag.is_non_specific() { + non_specific_event_node(span_union(tag.span, node.span), node) + } else { + tagged_key_node(tag.tag.clone(), tag.span, node) + } +} + +fn non_specific_event_node(span: Span, mut node: Node) -> Node { + node.span = span; + match &node.value { + NodeValue::Sequence(_) + | NodeValue::Mapping(_) + | NodeValue::String(_) + | NodeValue::Tagged(_) => node, + NodeValue::Null | NodeValue::Bool(_) | NodeValue::Number(_) => { + let source = node + .scalar_source() + .map(|source| source.raw().to_string()) + .unwrap_or_default(); + Node::new(NodeValue::String(source.clone()), span).with_scalar_source(source) + } + } +} + +fn node_is_merge_key(key: &Node) -> bool { + match &key.value { + NodeValue::String(_) => key.as_str() == Some("<<"), + NodeValue::Tagged(tagged) if tagged.tag.is_yaml_core("merge") => { + tagged.value.as_str() == Some("<<") + } + _ => false, + } +} + +fn scan_anchors_in( + events: &[Event], + pos: usize, + anchors: &mut HashMap>, +) -> Result { + let Some(event) = events.get(pos) else { + return Err(Error::data("unexpected end of YAML event stream", None)); + }; + if let Some(name) = event_anchor_name(event) { + let end = skip_node_in(events, pos)?; + anchors.insert(name.to_string(), events[pos..end].to_vec()); + } + match event { + Event::Scalar { .. } | Event::Alias { .. } => Ok(pos + 1), + Event::SequenceStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::SequenceEnd { .. }) => return Ok(next + 1), + Some(_) => next = scan_anchors_in(events, next, anchors)?, + None => return Err(Error::data("unterminated sequence event stream", None)), + } + } + } + Event::MappingStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::MappingEnd { .. }) => return Ok(next + 1), + Some(_) => { + next = scan_anchors_in(events, next, anchors)?; + next = scan_anchors_in(events, next, anchors)?; + } + None => return Err(Error::data("unterminated mapping event stream", None)), + } + } + } + event => Err(unexpected_event("node", event)), + } +} + +fn event_anchor_name(event: &Event) -> Option<&str> { + match event { + Event::Scalar { meta, .. } + | Event::SequenceStart { meta, .. } + | Event::MappingStart { meta, .. } => { + meta.anchor.as_ref().map(|anchor| anchor.name.as_str()) + } + Event::StreamStart + | Event::StreamEnd + | Event::DocumentStart { .. } + | Event::DocumentEnd { .. } + | Event::SequenceEnd { .. } + | Event::MappingEnd { .. } + | Event::Alias { .. } => None, + } +} diff --git a/src/event_de/prepared.rs b/src/event_de/prepared.rs new file mode 100644 index 0000000..a669fd7 --- /dev/null +++ b/src/event_de/prepared.rs @@ -0,0 +1,986 @@ +use super::*; + +pub(super) struct PreparedNodeDeserializer { + pub(super) node: Node, +} + +struct PreparedSeqAccess { + items: std::vec::IntoIter, + index: usize, +} + +impl<'de> SeqAccess<'de> for PreparedSeqAccess { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: DeserializeSeed<'de>, + { + let Some(node) = self.items.next() else { + return Ok(None); + }; + let index = self.index; + self.index += 1; + seed.deserialize(PreparedNodeDeserializer { node }) + .map(Some) + .map_err(|error| error.prepend_path_segment(ErrorPathSegment::Index(index))) + } +} + +struct PreparedMapAccess { + entries: std::vec::IntoIter<(Node, Node)>, + value: Option<(Node, ErrorPathSegment)>, +} + +impl<'de> MapAccess<'de> for PreparedMapAccess { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + let Some((key, value)) = self.entries.next() else { + return Ok(None); + }; + let segment = path_segment_for_node(&key); + self.value = Some((value, segment.clone())); + seed.deserialize(PreparedNodeDeserializer { node: key }) + .map(Some) + .map_err(|error| error.with_path_segment_if_empty(segment)) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + let (node, segment) = self + .value + .take() + .ok_or_else(|| Error::data("value requested before key", None))?; + seed.deserialize(PreparedNodeDeserializer { node }) + .map_err(|error| error.prepend_path_segment(segment)) + } +} + +impl<'de> de::Deserializer<'de> for PreparedNodeDeserializer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let span = self.node.span; + match self.node.value { + NodeValue::Null => visitor.visit_unit(), + NodeValue::Bool(value) => visitor.visit_bool(value), + NodeValue::Number(number) => visit_any_number(number, span, visitor), + NodeValue::String(value) => visitor.visit_string(value), + NodeValue::Sequence(items) => visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }), + NodeValue::Mapping(entries) => visitor.visit_map(PreparedMapAccess { + entries: entries.into_iter(), + value: None, + }), + NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { + tag: tagged.tag, + value: tagged.value, + }), + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Bool(value) => with_span(visitor.visit_bool(value), node.span), + _ => Err(type_error("bool", &node)), + } + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_i64_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_u64_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_i128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_i128_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_u128_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_f64(visitor) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Number(number) => visit_f64_number(number, node.span, visitor), + _ => Err(type_error("number", &node)), + } + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + let value = prepared_string_target_text(&node).ok_or_else(|| type_error("char", &node))?; + let mut chars = value.chars(); + match (chars.next(), chars.next()) { + (Some(ch), None) => with_span(visitor.visit_char(ch), node.span), + _ => Err(type_error("char", &node)), + } + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + let value = + prepared_string_target_text(&node).ok_or_else(|| type_error("string", &node))?; + visitor.visit_string(value.to_string()) + } + + fn deserialize_bytes(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + Err(type_error("bytes", &node)) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if prepared_is_null_node(&self.node) { + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Null => visitor.visit_unit(), + _ => Err(type_error("unit/null", &node)), + } + } + + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if yaml11_set_entries_node(&self.node)?.is_some() { + let entries = take_yaml11_set_entries_node(self.node).expect("checked explicit !!set"); + let items = yaml11_set_key_nodes(entries)?; + return visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }); + } + if yaml11_pair_items_node(&self.node, "omap")?.is_some() { + let items = + take_yaml11_pair_items_node(self.node, "omap").expect("checked explicit !!omap"); + let items = yaml11_pair_sequence_nodes(items, "omap")?; + return visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }); + } + if yaml11_pair_items_node(&self.node, "pairs")?.is_some() { + let items = + take_yaml11_pair_items_node(self.node, "pairs").expect("checked explicit !!pairs"); + let items = yaml11_pair_sequence_nodes(items, "pairs")?; + return visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }); + } + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Sequence(items) => visitor.visit_seq(PreparedSeqAccess { + items: items.into_iter(), + index: 0, + }), + _ => Err(type_error("sequence", &node)), + } + } + + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if let Some(items) = yaml11_pair_items_node(&self.node, "omap")? { + validate_yaml11_omap_node_keys(items)?; + let items = + take_yaml11_pair_items_node(self.node, "omap").expect("checked explicit !!omap"); + let entries = yaml11_pair_entries(items, "omap")?; + return visitor.visit_map(PreparedMapAccess { + entries: entries.into_iter(), + value: None, + }); + } + let node = prepared_untag_node_owned(self.node); + match node.value { + NodeValue::Mapping(entries) => visitor.visit_map(PreparedMapAccess { + entries: entries.into_iter(), + value: None, + }), + _ => Err(type_error("mapping", &node)), + } + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + match self.node.value { + NodeValue::String(variant) => visitor.visit_enum(variant.into_deserializer()), + NodeValue::Mapping(entries) if entries.len() == 1 => { + let mut entries = entries.into_iter(); + let (key, value) = entries.next().expect("length checked"); + visitor.visit_enum(PreparedEnumDeserializer { + key, + value: Some(value), + }) + } + NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { + tag: tagged.tag, + value: tagged.value, + }), + _ => Err(type_error("enum string or single-key mapping", &self.node)), + } + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } +} + +pub(super) struct PreparedEnumDeserializer { + key: Node, + value: Option, +} + +impl<'de> EnumAccess<'de> for PreparedEnumDeserializer { + type Error = Error; + type Variant = Self; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: DeserializeSeed<'de>, + { + let variant = seed.deserialize(PreparedNodeDeserializer { + node: self.key.clone(), + })?; + Ok((variant, self)) + } +} + +impl<'de> VariantAccess<'de> for PreparedEnumDeserializer { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + match self.value { + None => Ok(()), + Some(node) if matches!(node.value, NodeValue::Null) => Ok(()), + Some(node) => Err(type_error("unit enum variant", &node)), + } + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + let node = self + .value + .ok_or_else(|| Error::data("newtype variant requires a value", None))?; + seed.deserialize(PreparedNodeDeserializer { node }) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self + .value + .ok_or_else(|| Error::data("tuple variant requires a value", None))?; + de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node }, visitor) + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self + .value + .ok_or_else(|| Error::data("struct variant requires a value", None))?; + de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor) + } +} + +pub(super) struct PreparedTaggedEnumDeserializer { + tag: Tag, + value: Node, +} + +impl<'de> EnumAccess<'de> for PreparedTaggedEnumDeserializer { + type Error = Error; + type Variant = Self; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> + where + V: DeserializeSeed<'de>, + { + let variant = + seed.deserialize(self.tag.serde_variant().into_owned().into_deserializer())?; + Ok((variant, self)) + } +} + +impl<'de> VariantAccess<'de> for PreparedTaggedEnumDeserializer { + type Error = Error; + + fn unit_variant(self) -> Result<()> { + if prepared_is_null_node(&self.value) { + Ok(()) + } else { + Err(type_error("unit enum variant", &self.value)) + } + } + + fn newtype_variant_seed(self, seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + seed.deserialize(PreparedNodeDeserializer { node: self.value }) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node: self.value }, visitor) + } + + fn struct_variant(self, _fields: &'static [&'static str], visitor: V) -> Result + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_map(PreparedNodeDeserializer { node: self.value }, visitor) + } +} + +pub(super) fn explicit_core_tagged_node<'a>(mut node: &'a Node, suffix: &str) -> Option<&'a Node> { + while let NodeValue::Tagged(tagged) = &node.value { + if tagged.tag.is_yaml_core(suffix) { + return Some(&tagged.value); + } + node = &tagged.value; + } + None +} + +pub(super) fn take_explicit_core_tagged_node(mut node: Node, suffix: &str) -> Option { + loop { + match node.value { + NodeValue::Tagged(tagged) if tagged.tag.is_yaml_core(suffix) => { + return Some(tagged.value); + } + NodeValue::Tagged(tagged) => node = tagged.value, + _ => return None, + } + } +} + +pub(super) fn yaml11_set_entries_node(node: &Node) -> Result> { + let Some(value) = explicit_core_tagged_node(node, "set") else { + return Ok(None); + }; + match &value.value { + NodeValue::Mapping(entries) => Ok(Some(entries)), + _ => Err(type_error("mapping for explicit !!set", value)), + } +} + +pub(super) fn take_yaml11_set_entries_node(node: Node) -> Option> { + let value = take_explicit_core_tagged_node(node, "set")?; + match value.value { + NodeValue::Mapping(entries) => Some(entries), + _ => None, + } +} + +pub(super) fn yaml11_pair_items_node<'a>( + node: &'a Node, + suffix: &'static str, +) -> Result> { + let Some(value) = explicit_core_tagged_node(node, suffix) else { + return Ok(None); + }; + match &value.value { + NodeValue::Sequence(items) => Ok(Some(items)), + _ => Err(Error::data( + format!("expected sequence for explicit !!{suffix}"), + Some(value.span), + )), + } +} + +pub(super) fn take_yaml11_pair_items_node(node: Node, suffix: &'static str) -> Option> { + let value = take_explicit_core_tagged_node(node, suffix)?; + match value.value { + NodeValue::Sequence(items) => Some(items), + _ => None, + } +} + +pub(super) fn validate_yaml11_omap_node_keys(items: &[Node]) -> Result<()> { + let mut seen = DuplicateKeyTracker::new(); + for item in items { + let (key, _) = yaml11_singleton_pair_node(item, "omap")?; + check_duplicate_with_tracker_at_depth_limit( + &mut seen, + key, + 1, + Some(crate::schema::DEFAULT_MAX_NESTING_DEPTH), + )?; + } + Ok(()) +} + +pub(super) fn yaml11_set_key_nodes(entries: Vec<(Node, Node)>) -> Result> { + entries + .into_iter() + .map(|(key, value)| { + ensure_yaml11_set_null_node(&value)?; + Ok(key) + }) + .collect() +} + +pub(super) fn ensure_yaml11_set_null_node(value: &Node) -> Result<()> { + if prepared_is_null_node(value) { + Ok(()) + } else { + Err(Error::data( + "expected explicit !!set entry value to be null", + Some(value.span), + )) + } +} + +pub(super) fn yaml11_pair_sequence_nodes( + items: Vec, + suffix: &'static str, +) -> Result> { + items + .into_iter() + .map(|item| { + let span = item.span; + let (key, value) = take_yaml11_singleton_pair_node(item, suffix)?; + Ok(Node::new(NodeValue::Sequence(vec![key, value]), span)) + }) + .collect() +} + +pub(super) fn yaml11_pair_entries( + items: Vec, + suffix: &'static str, +) -> Result> { + items + .into_iter() + .map(|item| take_yaml11_singleton_pair_node(item, suffix)) + .collect() +} + +pub(super) fn yaml11_singleton_pair_node<'a>( + node: &'a Node, + suffix: &'static str, +) -> Result<(&'a Node, &'a Node)> { + let node = prepared_untag_node(node); + match &node.value { + NodeValue::Mapping(entries) if entries.len() == 1 => Ok((&entries[0].0, &entries[0].1)), + NodeValue::Mapping(_) => Err(Error::data( + format!("expected explicit !!{suffix} entry to contain exactly one pair"), + Some(node.span), + )), + _ => Err(Error::data( + format!("expected single-pair mapping entry for explicit !!{suffix}"), + Some(node.span), + )), + } +} + +pub(super) fn take_yaml11_singleton_pair_node( + node: Node, + suffix: &'static str, +) -> Result<(Node, Node)> { + let node = prepared_untag_node_owned(node); + match node.value { + NodeValue::Mapping(entries) if entries.len() == 1 => { + let mut entries = entries.into_iter(); + entries.next().ok_or_else(|| { + Error::data( + "internal: singleton mapping lost its entry", + Some(node.span), + ) + }) + } + NodeValue::Mapping(_) => Err(Error::data( + format!("expected explicit !!{suffix} entry to contain exactly one pair"), + Some(node.span), + )), + _ => Err(Error::data( + format!("expected single-pair mapping entry for explicit !!{suffix}"), + Some(node.span), + )), + } +} + +pub(super) fn prepared_untag_node(mut node: &Node) -> &Node { + while let NodeValue::Tagged(tagged) = &node.value { + node = &tagged.value; + } + node +} + +pub(super) fn prepared_untag_node_owned(node: Node) -> Node { + let Node { + value, + span, + source, + } = node; + match value { + NodeValue::Tagged(tagged) => prepared_untag_node_owned(tagged.value), + value => Node { + value, + span, + source, + }, + } +} + +pub(super) fn prepared_is_null_node(node: &Node) -> bool { + match &node.value { + NodeValue::Null => true, + NodeValue::Tagged(tagged) => prepared_is_null_node(&tagged.value), + _ => false, + } +} + +pub(super) fn prepared_string_target_text(node: &Node) -> Option<&str> { + match &node.value { + NodeValue::Tagged(tagged) => prepared_string_target_text(&tagged.value), + _ => string_target_text(node), + } +} + +pub(super) fn visit_scalar_any<'de, V>(node: &Node, input: &'de str, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match &node.value { + NodeValue::Null => visitor.visit_unit(), + NodeValue::Bool(value) => visitor.visit_bool(*value), + NodeValue::Number(number) => visit_any_number(*number, node.span, visitor), + NodeValue::String(value) => { + if let Some(borrowed) = borrowed_event_str(input, node.span, value) { + visitor.visit_borrowed_str(borrowed) + } else { + visitor.visit_str(value) + } + } + NodeValue::Tagged(tagged) => visitor.visit_enum(PreparedTaggedEnumDeserializer { + tag: tagged.tag.clone(), + value: tagged.value.clone(), + }), + NodeValue::Sequence(_) | NodeValue::Mapping(_) => Err(type_error("scalar", node)), + } +} + +pub(super) fn string_target_text(node: &Node) -> Option<&str> { + match &node.value { + NodeValue::String(value) => Some(value), + NodeValue::Null => Some("null"), + NodeValue::Bool(value) => Some(if *value { "true" } else { "false" }), + NodeValue::Number(_) => node.scalar_source().map(|source| source.raw()), + NodeValue::Tagged(tagged) => string_target_text(&tagged.value), + NodeValue::Sequence(_) | NodeValue::Mapping(_) => None, + } +} + +pub(super) fn borrowed_event_str<'de>( + input: &'de str, + span: Span, + value: &str, +) -> Option<&'de str> { + let raw = input.get(span.start..span.end)?; + if raw == value { + return Some(raw); + } + let quote = raw.chars().next()?; + if !matches!(quote, '"' | '\'') || !raw.ends_with(quote) || raw.len() < 2 { + return None; + } + let inner = &raw[quote.len_utf8()..raw.len() - quote.len_utf8()]; + (inner == value).then_some(inner) +} + +pub(super) fn path_segment_for_node(node: &Node) -> ErrorPathSegment { + match &node.value { + NodeValue::String(value) => ErrorPathSegment::Key(value.clone()), + NodeValue::Bool(value) => ErrorPathSegment::ScalarKey(value.to_string()), + NodeValue::Number(number) => ErrorPathSegment::ScalarKey(number.to_string()), + NodeValue::Null => ErrorPathSegment::ScalarKey("null".to_string()), + NodeValue::Sequence(_) | NodeValue::Mapping(_) | NodeValue::Tagged(_) => { + ErrorPathSegment::ComplexKey + } + } +} + +pub(super) fn with_span(result: Result, span: Span) -> Result { + result.map_err(|error| error.with_span_if_missing(span)) +} + +pub(super) fn visit_i64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => match i64::try_from(value) { + Ok(value) => with_span(visitor.visit_i64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for i64", + Some(span), + )), + }, + Number::Unsigned(value) => match i64::try_from(value) { + Ok(value) => with_span(visitor.visit_i64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for i64", + Some(span), + )), + }, + Number::Float(_) => Err(Error::data("expected integer, found float", Some(span))), + } +} + +pub(super) fn visit_u64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) if value >= 0 => match u64::try_from(value) { + Ok(value) => with_span(visitor.visit_u64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for u64", + Some(span), + )), + }, + Number::Unsigned(value) => match u64::try_from(value) { + Ok(value) => with_span(visitor.visit_u64(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for u64", + Some(span), + )), + }, + Number::Integer(_) => Err(Error::data( + "expected unsigned integer, found integer", + Some(span), + )), + Number::Float(_) => Err(Error::data( + "expected unsigned integer, found float", + Some(span), + )), + } +} + +pub(super) fn visit_i128_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => with_span(visitor.visit_i128(value), span), + Number::Unsigned(value) => match i128::try_from(value) { + Ok(value) => with_span(visitor.visit_i128(value), span), + Err(_) => Err(Error::data( + "integer scalar is out of range for i128", + Some(span), + )), + }, + Number::Float(_) => Err(Error::data("expected integer, found float", Some(span))), + } +} + +pub(super) fn visit_u128_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) if value >= 0 => { + let value = u128::try_from(value).expect("non-negative i128 fits u128"); + with_span(visitor.visit_u128(value), span) + } + Number::Unsigned(value) => with_span(visitor.visit_u128(value), span), + Number::Integer(_) => Err(Error::data( + "expected unsigned integer, found integer", + Some(span), + )), + Number::Float(_) => Err(Error::data( + "expected unsigned integer, found float", + Some(span), + )), + } +} + +pub(super) fn visit_f64_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => with_span(visitor.visit_f64(value as f64), span), + Number::Unsigned(value) => with_span(visitor.visit_f64(value as f64), span), + Number::Float(value) => with_span(visitor.visit_f64(value), span), + } +} + +pub(super) fn visit_any_number<'de, V>(number: Number, span: Span, visitor: V) -> Result +where + V: Visitor<'de>, +{ + match number { + Number::Integer(value) => match i64::try_from(value) { + Ok(value) => with_span(visitor.visit_i64(value), span), + Err(_) => with_span(visitor.visit_i128(value), span), + }, + Number::Unsigned(value) => match u64::try_from(value) { + Ok(value) => with_span(visitor.visit_u64(value), span), + Err(_) => with_span(visitor.visit_u128(value), span), + }, + Number::Float(value) => with_span(visitor.visit_f64(value), span), + } +} + +pub(super) fn type_error(expected: &'static str, node: &Node) -> Error { + Error::data( + format!("expected {expected}, found {}", kind_name(&node.value)), + Some(node.span), + ) +} + +pub(super) fn kind_name(value: &NodeValue) -> &'static str { + match value { + NodeValue::Null => "null", + NodeValue::Bool(_) => "bool", + NodeValue::Number(Number::Integer(_)) => "integer", + NodeValue::Number(Number::Unsigned(_)) => "unsigned integer", + NodeValue::Number(Number::Float(_)) => "float", + NodeValue::String(_) => "string", + NodeValue::Sequence(_) => "sequence", + NodeValue::Mapping(_) => "mapping", + NodeValue::Tagged(_) => "tagged value", + } +} + +pub(super) fn unexpected_event(expected: &'static str, event: &Event) -> Error { + Error::data( + format!("expected {expected}, found {}", event_kind(event)), + event_span(event), + ) +} + +pub(super) fn event_kind(event: &Event) -> &'static str { + match event { + Event::StreamStart => "stream start", + Event::StreamEnd => "stream end", + Event::DocumentStart { .. } => "document start", + Event::DocumentEnd { .. } => "document end", + Event::SequenceStart { .. } => "sequence start", + Event::SequenceEnd { .. } => "sequence end", + Event::MappingStart { .. } => "mapping start", + Event::MappingEnd { .. } => "mapping end", + Event::Alias { .. } => "alias", + Event::Scalar { .. } => "scalar", + } +} + +pub(super) fn event_span(event: &Event) -> Option { + match event { + Event::DocumentStart { span, .. } + | Event::DocumentEnd { span, .. } + | Event::SequenceStart { span, .. } + | Event::SequenceEnd { span } + | Event::MappingStart { span, .. } + | Event::MappingEnd { span } + | Event::Scalar { span, .. } => Some(*span), + Event::Alias { anchor } => Some(anchor.span), + Event::StreamStart | Event::StreamEnd => None, + } +} diff --git a/src/event_de/serde_impl.rs b/src/event_de/serde_impl.rs new file mode 100644 index 0000000..bb76439 --- /dev/null +++ b/src/event_de/serde_impl.rs @@ -0,0 +1,499 @@ +use super::prepared::*; +use super::source::EventSource; +use super::*; + +pub(super) struct EventNodeDeserializer<'a, 'de> { + pub(super) source: &'a mut EventSource<'de>, +} + +impl<'de> EventNodeDeserializer<'_, 'de> { + fn deserialize_prepared_current_node(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_any(PreparedNodeDeserializer { node }, visitor) + } + + fn deserialize_prepared_current_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_seq(PreparedNodeDeserializer { node }, visitor) + } + + fn deserialize_prepared_current_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor) + } +} + +impl<'de> de::Deserializer<'de> for EventNodeDeserializer<'_, 'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + match self.source.peek() { + Some(Event::Scalar { .. }) => { + let node = self.source.take_scalar()?; + visit_scalar_any(&node, self.source.input, visitor) + } + Some(Event::SequenceStart { meta, .. }) | Some(Event::MappingStart { meta, .. }) + if meta.tag.is_some() => + { + self.deserialize_prepared_current_node(visitor) + } + Some(Event::SequenceStart { .. }) => self.deserialize_seq(visitor), + Some(Event::MappingStart { .. }) => self.deserialize_map(visitor), + Some(Event::Alias { anchor }) => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + Some(event) => Err(unexpected_event("node", event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Bool(value) => with_span(visitor.visit_bool(value), node.span), + _ => Err(type_error("bool", &node)), + } + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_i64(visitor) + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_i64_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_u64(visitor) + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_u64_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_i128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_i128_number(number, node.span, visitor), + _ => Err(type_error("integer", &node)), + } + } + + fn deserialize_u128(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_u128_number(number, node.span, visitor), + _ => Err(type_error("unsigned integer", &node)), + } + } + + fn deserialize_f32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_f64(visitor) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Number(number) => visit_f64_number(number, node.span, visitor), + _ => Err(type_error("number", &node)), + } + } + + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + let value = prepared_string_target_text(&node).ok_or_else(|| type_error("char", &node))?; + let mut chars = value.chars(); + match (chars.next(), chars.next()) { + (Some(ch), None) => with_span(visitor.visit_char(ch), node.span), + _ => Err(type_error("char", &node)), + } + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + let value = string_target_text(&node).ok_or_else(|| type_error("string", &node))?; + if let Some(borrowed) = borrowed_event_str(self.source.input, node.span, value) { + return visitor.visit_borrowed_str(borrowed); + } + visitor.visit_str(value) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + let value = string_target_text(&node).ok_or_else(|| type_error("string", &node))?; + visitor.visit_string(value.to_string()) + } + + fn deserialize_bytes(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = self.source.take_scalar()?; + Err(type_error("bytes", &node)) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + if self.source.peek_is_null_scalar()? { + self.source.take_scalar()?; + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let node = prepared_untag_node_owned(self.source.take_scalar()?); + match node.value { + NodeValue::Null => visitor.visit_unit(), + _ => Err(type_error("unit/null", &node)), + } + } + + fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + if self + .source + .peek_has_yaml_core_tag(&["set", "omap", "pairs"]) + { + return self.deserialize_prepared_current_seq(visitor); + } + match self.source.next()? { + Event::SequenceStart { span, .. } => { + self.source.enter_depth(span)?; + let value = visitor.visit_seq(EventSeqAccess { + source: &mut *self.source, + index: 0, + }); + self.source.exit_depth(); + value + } + event => Err(unexpected_event("sequence", &event)), + } + } + + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.resolve_aliases_until_non_alias()?; + if self.source.peek_has_yaml_core_tag(&["omap"]) { + return self.deserialize_prepared_current_map(visitor); + } + if self.source.next_mapping_has_merge_key()? { + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + return de::Deserializer::deserialize_map(PreparedNodeDeserializer { node }, visitor); + } + self.source.validate_next_mapping_duplicates()?; + match self.source.next()? { + Event::MappingStart { span, .. } => { + self.source.enter_depth(span)?; + let value = visitor.visit_map(EventMapAccess { + source: &mut *self.source, + value: None, + }); + self.source.exit_depth(); + value + } + event => Err(unexpected_event("mapping", &event)), + } + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + fn deserialize_enum( + self, + name: &'static str, + variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + // Materialize the current node and reuse the tree-backed enum logic so + // the event path accepts the same forms as `de.rs`: bare-scalar unit + // variants, single-key `{Variant: payload}` mappings (newtype/tuple/ + // struct variants), and tag-shorthand variants. The previous + // scalar-only path rejected every externally-tagged variant that + // carried a payload. + self.source.resolve_aliases_until_non_alias()?; + let mut node = self.source.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.source.schema))?; + self.source.skip_node_raw()?; + de::Deserializer::deserialize_enum( + PreparedNodeDeserializer { node }, + name, + variants, + visitor, + ) + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.source.skip_node()?; + visitor.visit_unit() + } +} + +impl EventSource<'_> { + fn peek_has_yaml_core_tag(&self, suffixes: &[&str]) -> bool { + match self.peek() { + Some(Event::SequenceStart { meta, .. }) | Some(Event::MappingStart { meta, .. }) => { + meta.tag + .as_ref() + .is_some_and(|tag| suffixes.iter().any(|suffix| tag.tag.is_yaml_core(suffix))) + } + _ => false, + } + } + + fn peek_is_null_scalar(&self) -> Result { + let Some(Event::Scalar { + value, + style, + meta, + span, + }) = self.peek() + else { + return Ok(false); + }; + let node = self.scalar_from_event(value.clone(), *style, meta, *span)?; + Ok(prepared_is_null_node(&node)) + } +} + +struct EventSeqAccess<'a, 'de> { + source: &'a mut EventSource<'de>, + index: usize, +} + +impl<'de> SeqAccess<'de> for EventSeqAccess<'_, 'de> { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: DeserializeSeed<'de>, + { + if matches!(self.source.peek(), Some(Event::SequenceEnd { .. })) { + self.source.next()?; + return Ok(None); + } + let index = self.index; + self.index += 1; + seed.deserialize(EventNodeDeserializer { + source: self.source, + }) + .map(Some) + .map_err(|error| error.prepend_path_segment(ErrorPathSegment::Index(index))) + } +} + +struct EventMapAccess<'a, 'de> { + source: &'a mut EventSource<'de>, + value: Option, +} + +impl<'de> MapAccess<'de> for EventMapAccess<'_, 'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + if matches!(self.source.peek(), Some(Event::MappingEnd { .. })) { + self.source.next()?; + return Ok(None); + } + let depth = self.source.depth; + let (events, pos) = self.source.current_events_and_pos(); + let mut scan_anchors = self.source.anchors.clone(); + let mut replayed_events = 0usize; + let segment = self + .source + .mapping_key_at(events, pos, &mut scan_anchors, &mut replayed_events, depth)? + .map(|(node, _)| path_segment_for_node(&node)) + .unwrap_or(ErrorPathSegment::ComplexKey); + self.value = Some(segment.clone()); + seed.deserialize(EventNodeDeserializer { + source: self.source, + }) + .map(Some) + .map_err(|error| error.with_path_segment_if_empty(segment)) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + let segment = self + .value + .take() + .ok_or_else(|| Error::data("value requested before key", None))?; + seed.deserialize(EventNodeDeserializer { + source: self.source, + }) + .map_err(|error| error.prepend_path_segment(segment)) + } +} diff --git a/src/event_de/source.rs b/src/event_de/source.rs new file mode 100644 index 0000000..d945510 --- /dev/null +++ b/src/event_de/source.rs @@ -0,0 +1,875 @@ +use super::*; + +pub(super) struct EventDocumentFrames { + events: crate::parse::EventStream, + started: bool, + finished: bool, + index: usize, +} + +impl EventDocumentFrames { + pub(super) fn from_str_with_options(input: &str, options: LoadOptions) -> Result { + Ok(Self { + events: crate::parse::EventStream::from_str_with_options(input, options)?, + started: false, + finished: false, + index: 0, + }) + } + + pub(super) fn next_frame(&mut self) -> Option<(usize, Result>)> { + if self.finished { + return None; + } + let index = self.index; + if let Err(error) = self.enter_stream() { + self.finished = true; + return Some((index, Err(error))); + } + + match self.events.next() { + Some(Ok(Event::StreamEnd)) => { + self.finished = true; + None + } + Some(Ok(start @ Event::DocumentStart { .. })) => { + Some((index, self.collect_document_frame(start))) + } + Some(Ok(event)) => { + self.finished = true; + Some(( + index, + Err(unexpected_event("document start or stream end", &event)), + )) + } + Some(Err(error)) => { + self.finished = true; + Some((index, Err(error))) + } + None => { + self.finished = true; + None + } + } + } + + pub(super) fn enter_stream(&mut self) -> Result<()> { + if self.started { + return Ok(()); + } + self.started = true; + match self.events.next() { + Some(Ok(Event::StreamStart)) => Ok(()), + Some(Ok(event)) => Err(unexpected_event("stream start", &event)), + Some(Err(error)) => Err(error), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } + } + + fn collect_document_frame(&mut self, start: Event) -> Result> { + let mut frame = Vec::new(); + frame.push(Event::StreamStart); + frame.push(start); + loop { + match self.events.next() { + Some(Ok(event)) => { + let end = matches!(event, Event::DocumentEnd { .. }); + frame.push(event); + if end { + frame.push(Event::StreamEnd); + self.index += 1; + return Ok(frame); + } + } + Some(Err(error)) => { + self.finished = true; + return Err(error); + } + None => { + self.finished = true; + return Err(Error::data("unexpected end of YAML event stream", None)); + } + } + } + } +} + +pub(super) fn deserialize_document_frame<'de, T>( + input: &'de str, + events: Vec, + configured_schema: Schema, + replay_budget: usize, + max_nesting_depth: Option, +) -> Result +where + T: serde::Deserialize<'de>, +{ + let mut source = EventSource::new( + input, + events, + configured_schema, + replay_budget, + max_nesting_depth, + ); + source.enter_stream()?; + source.enter_document()?; + let value = T::deserialize(EventNodeDeserializer { + source: &mut source, + })?; + source.finish_document()?; + match source.peek() { + Some(Event::StreamEnd) => Ok(value), + Some(event) => Err(unexpected_event("stream end", event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } +} + +pub(super) struct EventSource<'de> { + pub(super) input: &'de str, + events: Vec, + pos: usize, + configured_schema: Schema, + pub(super) schema: Schema, + pub(super) anchors: HashMap>, + inject: Vec, + replayed_events: usize, + replay_budget: usize, + max_nesting_depth: Option, + pub(super) depth: usize, +} + +struct InjectedEvents { + anchor: String, + events: Vec, + pos: usize, +} + +impl<'de> EventSource<'de> { + pub(super) fn new( + input: &'de str, + events: Vec, + configured_schema: Schema, + replay_budget: usize, + max_nesting_depth: Option, + ) -> Self { + Self { + input, + events, + pos: 0, + configured_schema, + schema: configured_schema, + anchors: HashMap::new(), + inject: Vec::new(), + replayed_events: 0, + replay_budget, + max_nesting_depth, + depth: 0, + } + } + + /// Records descent into a nested collection and enforces the configured + /// nesting-depth ceiling. The event-backed path expands aliases lazily as + /// it walks, so — unlike the tree-backed path's `AnchorTable::resolve` — the + /// parser's literal-depth check does not bound the *expanded* depth. Without + /// this guard a literally shallow document with a long alias chain recurses + /// until the stack overflows. Mirrors the tree-backed `depth > max` check. + pub(super) fn enter_depth(&mut self, span: Span) -> Result<()> { + self.depth = self.depth.saturating_add(1); + if self.max_nesting_depth.is_some_and(|max| self.depth > max) { + return Err(Error::limit( + "maximum YAML nesting depth exceeded while expanding alias", + span, + )); + } + Ok(()) + } + + pub(super) fn exit_depth(&mut self) { + self.depth = self.depth.saturating_sub(1); + } + + /// Same ceiling as [`enter_depth`], but for the read-only key/merge + /// materialization walk in [`node_at_for_key`], which threads an explicit + /// `depth` because it borrows `self` immutably. + fn check_depth(&self, depth: usize, span: impl Into>) -> Result<()> { + if self.max_nesting_depth.is_some_and(|max| depth > max) { + return Err(Error::limit( + "maximum YAML nesting depth exceeded while expanding alias", + span, + )); + } + Ok(()) + } + + pub(super) fn peek(&self) -> Option<&Event> { + if let Some(frame) = self.inject.last() + && frame.pos < frame.events.len() + { + return frame.events.get(frame.pos); + } + self.events.get(self.pos) + } + + pub(super) fn next(&mut self) -> Result { + loop { + let event = self.next_raw()?; + if let Event::Alias { anchor } = event { + self.inject_alias(anchor.name, anchor.span)?; + continue; + } + return Ok(event); + } + } + + fn next_raw(&mut self) -> Result { + if let Some(event) = self.next_injected_event() { + return Ok(event); + } + + let pos = self.pos; + let event = self + .events + .get(pos) + .cloned() + .ok_or_else(|| Error::data("unexpected end of YAML event stream", None))?; + self.record_anchor_at(pos, &event)?; + self.pos += 1; + Ok(event) + } + + pub(super) fn resolve_aliases_until_non_alias(&mut self) -> Result<()> { + while matches!(self.peek(), Some(Event::Alias { .. })) { + let Event::Alias { anchor } = self.next_raw()? else { + unreachable!("peek observed an alias"); + }; + self.inject_alias(anchor.name, anchor.span)?; + } + Ok(()) + } + + fn next_injected_event(&mut self) -> Option { + loop { + let frame = self.inject.last_mut()?; + if frame.pos < frame.events.len() { + let event = frame.events[frame.pos].clone(); + frame.pos += 1; + if frame.pos == frame.events.len() { + self.inject.pop(); + } + return Some(event); + } + self.inject.pop(); + } + } + + fn record_anchor_at(&mut self, pos: usize, event: &Event) -> Result<()> { + let Some(name) = event_anchor_name(event) else { + return Ok(()); + }; + let end = skip_node_in(&self.events, pos)?; + self.anchors + .insert(name.to_string(), self.events[pos..end].to_vec()); + Ok(()) + } + + fn inject_alias(&mut self, name: String, span: Span) -> Result<()> { + if self.inject.iter().any(|frame| frame.anchor == name) { + return Err(Error::reference( + format!("recursive alias `{name}` is not supported"), + span, + )); + } + let events = self + .anchors + .get(&name) + .cloned() + .ok_or_else(|| Error::reference(format!("unknown anchor `{name}`"), span))?; + self.replayed_events = self.replayed_events.saturating_add(events.len()); + if self.replayed_events > self.replay_budget { + return Err(Error::limit("alias event replay limit exceeded", span)); + } + self.inject.push(InjectedEvents { + anchor: name, + events, + pos: 0, + }); + Ok(()) + } + + pub(super) fn enter_stream(&mut self) -> Result<()> { + match self.next()? { + Event::StreamStart => Ok(()), + event => Err(unexpected_event("stream start", &event)), + } + } + + pub(super) fn enter_document(&mut self) -> Result<()> { + match self.next()? { + Event::DocumentStart { directives, .. } => { + self.anchors.clear(); + self.inject.clear(); + self.replayed_events = 0; + self.depth = 0; + self.schema = schema_for_directives(self.configured_schema, &directives); + Ok(()) + } + event => Err(unexpected_event("document start", &event)), + } + } + + pub(super) fn finish_document(&mut self) -> Result<()> { + match self.next()? { + Event::DocumentEnd { .. } => Ok(()), + event => Err(unexpected_event("document end", &event)), + } + } + + pub(super) fn scalar_from_event( + &self, + value: String, + style: ScalarStyle, + meta: &EventMeta, + span: Span, + ) -> Result { + if let Some(tag) = &meta.tag { + let tag = &tag.tag; + let tag_span = meta.tag.as_ref().expect("tag checked").span; + if tag.is_yaml_core("str") { + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::String(value), span), + )); + } + if tag.is_yaml_core("int") { + let number = crate::de::parse_explicit_core_int_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Number(number), span).with_scalar_source(value), + )); + } + if tag.is_yaml_core("float") { + let number = crate::de::parse_explicit_core_float_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Number(number), span).with_scalar_source(value), + )); + } + if tag.is_yaml_core("bool") { + let value = crate::de::parse_explicit_core_bool_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Bool(value), span), + )); + } + if tag.is_yaml_core("null") { + crate::de::parse_explicit_core_null_text(&value, Some(span))?; + return Ok(tagged_key_node( + tag.clone(), + tag_span, + Node::new(NodeValue::Null, span), + )); + } + let inner = self.untagged_scalar_from_event(value, style, span)?; + if tag.is_non_specific() { + return Ok(non_specific_event_node(span_union(tag_span, span), inner)); + } + return Ok(Node::new( + NodeValue::Tagged(Box::new(TaggedNode { + tag: tag.clone(), + tag_span, + value: inner, + })), + span_union(tag_span, span), + )); + } + self.untagged_scalar_from_event(value, style, span) + } + + fn untagged_scalar_from_event( + &self, + value: String, + style: ScalarStyle, + span: Span, + ) -> Result { + match style { + ScalarStyle::Plain => parse_scalar_with_schema(&value, span, self.schema), + ScalarStyle::SingleQuoted + | ScalarStyle::DoubleQuoted + | ScalarStyle::Literal + | ScalarStyle::Folded => Ok(Node::new(NodeValue::String(value), span)), + } + } + + pub(super) fn take_scalar(&mut self) -> Result { + match self.next()? { + Event::Scalar { + value, + style, + meta, + span, + } => self.scalar_from_event(value, style, &meta, span), + Event::Alias { anchor } => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + event => Err(unexpected_event("scalar", &event)), + } + } + + pub(super) fn skip_node(&mut self) -> Result<()> { + self.resolve_aliases_until_non_alias()?; + match self.peek().cloned() { + Some(Event::Scalar { .. }) => { + self.next()?; + Ok(()) + } + Some(Event::Alias { anchor }) => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + Some(Event::SequenceStart { span, .. }) => { + self.enter_depth(span)?; + self.next()?; + loop { + if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { + self.next()?; + self.exit_depth(); + return Ok(()); + } + self.skip_node()?; + } + } + Some(Event::MappingStart { span, .. }) => { + if self.next_mapping_has_merge_key()? { + let mut node = self.materialize_current_node_for_merge()?; + node.apply_merge_keys_with_policy(merge_policy_for_schema(self.schema))?; + self.skip_node_raw()?; + return Ok(()); + } + self.validate_next_mapping_duplicates()?; + self.enter_depth(span)?; + self.next()?; + loop { + if matches!(self.peek(), Some(Event::MappingEnd { .. })) { + self.next()?; + self.exit_depth(); + return Ok(()); + } + self.skip_node()?; + self.skip_node()?; + } + } + Some(event) => Err(unexpected_event("node", &event)), + None => Err(Error::data("unexpected end of YAML event stream", None)), + } + } + + pub(super) fn skip_node_raw(&mut self) -> Result<()> { + match self.next()? { + Event::Scalar { .. } => Ok(()), + Event::Alias { anchor } => Err(Error::reference( + "event-backed alias replay is not implemented", + anchor.span, + )), + Event::SequenceStart { span, .. } => { + self.enter_depth(span)?; + loop { + if matches!(self.peek(), Some(Event::SequenceEnd { .. })) { + self.next()?; + self.exit_depth(); + return Ok(()); + } + self.skip_node_raw()?; + } + } + Event::MappingStart { span, .. } => { + self.enter_depth(span)?; + loop { + if matches!(self.peek(), Some(Event::MappingEnd { .. })) { + self.next()?; + self.exit_depth(); + return Ok(()); + } + self.skip_node_raw()?; + self.skip_node_raw()?; + } + } + event => Err(unexpected_event("node", &event)), + } + } + + pub(super) fn materialize_current_node_for_merge(&self) -> Result { + let (events, pos) = self.current_events_and_pos(); + let mut scan_anchors = self.anchors.clone(); + let mut replayed_events = 0usize; + let (node, next) = self.node_at_for_key( + events, + pos, + &mut scan_anchors, + &mut Vec::new(), + &mut replayed_events, + true, + self.depth, + )?; + let expected = skip_node_in(events, pos)?; + if next != expected { + return Err(Error::data( + "unterminated merge materialization event stream", + None, + )); + } + Ok(node) + } + + pub(super) fn next_mapping_has_merge_key(&self) -> Result { + let (events, start) = self.current_events_and_pos(); + let Some(Event::MappingStart { .. }) = events.get(start) else { + return Ok(false); + }; + let mut pos = start + 1; + let mut scan_anchors = self.anchors.clone(); + let mut replayed_events = 0usize; + while let Some(event) = events.get(pos) { + if matches!(event, Event::MappingEnd { .. }) { + return Ok(false); + } + let (key, next_pos) = self.node_at_for_key( + events, + pos, + &mut scan_anchors, + &mut Vec::new(), + &mut replayed_events, + true, + self.depth, + )?; + if node_is_merge_key(&key) { + return Ok(true); + } + pos = next_pos; + pos = scan_anchors_in(events, pos, &mut scan_anchors)?; + } + Err(Error::data("unterminated mapping event stream", None)) + } + + pub(super) fn validate_next_mapping_duplicates(&self) -> Result<()> { + let (events, start) = self.current_events_and_pos(); + let Some(Event::MappingStart { .. }) = events.get(start) else { + return Ok(()); + }; + let mut pos = start + 1; + let mut seen = DuplicateKeyTracker::new(); + let mut scan_anchors = self.anchors.clone(); + let mut replayed_events = 0usize; + while let Some(event) = events.get(pos) { + if matches!(event, Event::MappingEnd { .. }) { + return Ok(()); + } + if let Some((key, next_pos)) = self.mapping_key_at( + events, + pos, + &mut scan_anchors, + &mut replayed_events, + self.depth, + )? { + if node_is_merge_key(&key) { + return Err(Error::data( + "event-backed merge-key expansion is not implemented", + Some(key.span), + )); + } + check_duplicate_with_tracker_at_depth_limit(&mut seen, &key, 1, None)?; + pos = next_pos; + } else { + pos = scan_anchors_in(events, pos, &mut scan_anchors)?; + } + pos = scan_anchors_in(events, pos, &mut scan_anchors)?; + } + Err(Error::data("unterminated mapping event stream", None)) + } + + pub(super) fn current_events_and_pos(&self) -> (&[Event], usize) { + if let Some(frame) = self.inject.last() + && frame.pos < frame.events.len() + { + return (&frame.events, frame.pos); + } + (&self.events, self.pos) + } + + pub(super) fn mapping_key_at( + &self, + events: &[Event], + pos: usize, + scan_anchors: &mut HashMap>, + replayed_events: &mut usize, + depth: usize, + ) -> Result> { + if let Some(name) = events.get(pos).and_then(event_anchor_name) { + let end = skip_node_in(events, pos)?; + scan_anchors.insert(name.to_string(), events[pos..end].to_vec()); + } + match events.get(pos) { + Some(Event::Scalar { .. }) + | Some(Event::Alias { .. }) + | Some(Event::SequenceStart { .. }) + | Some(Event::MappingStart { .. }) => self + .node_at_for_key( + events, + pos, + scan_anchors, + &mut Vec::new(), + replayed_events, + false, + depth, + ) + .map(|(node, next)| Some((node, next))), + Some(_) | None => Ok(None), + } + } + + fn scalar_key_at(&self, pos: usize) -> Result> { + self.scalar_key_at_in(&self.events, pos) + } + + fn scalar_key_at_in(&self, events: &[Event], pos: usize) -> Result> { + let Some(Event::Scalar { + value, + style, + meta, + span, + }) = events.get(pos) + else { + return Ok(None); + }; + self.scalar_from_event(value.clone(), *style, meta, *span) + .map(|node| Some((node, pos + 1))) + } + + fn scalar_key_node_from_event( + &self, + value: String, + style: ScalarStyle, + meta: &EventMeta, + span: Span, + ) -> Result { + let Some(tag) = &meta.tag else { + return self.scalar_from_event(value, style, meta, span); + }; + let inner = if tag.tag.is_yaml_core("int") { + Node::new( + NodeValue::Number(crate::de::parse_explicit_core_int_text(&value, Some(span))?), + span, + ) + } else if tag.tag.is_yaml_core("float") { + Node::new( + NodeValue::Number(crate::de::parse_explicit_core_float_text( + &value, + Some(span), + )?), + span, + ) + } else if tag.tag.is_yaml_core("bool") { + Node::new( + NodeValue::Bool(crate::de::parse_explicit_core_bool_text( + &value, + Some(span), + )?), + span, + ) + } else if tag.tag.is_yaml_core("null") { + crate::de::parse_explicit_core_null_text(&value, Some(span))?; + Node::new(NodeValue::Null, span) + } else { + let _ = style; + Node::new(NodeValue::String(value), span) + }; + Ok(tagged_key_node(tag.tag.clone(), tag.span, inner)) + } + + #[allow(clippy::too_many_arguments)] + fn node_at_for_key( + &self, + events: &[Event], + pos: usize, + scan_anchors: &mut HashMap>, + active_aliases: &mut Vec, + replayed_events: &mut usize, + allow_merge_key: bool, + depth: usize, + ) -> Result<(Node, usize)> { + let Some(event) = events.get(pos) else { + return Err(Error::data("unexpected end of YAML event stream", None)); + }; + self.check_depth(depth, event_span(event))?; + if let Some(name) = event_anchor_name(event) { + let end = skip_node_in(events, pos)?; + scan_anchors.insert(name.to_string(), events[pos..end].to_vec()); + } + + match event { + Event::Scalar { + value, + style, + meta, + span, + } => self + .scalar_key_node_from_event(value.clone(), *style, meta, *span) + .map(|node| (node, pos + 1)), + Event::Alias { anchor } => { + let name = &anchor.name; + if active_aliases.iter().any(|active| active == name) { + return Err(Error::reference( + format!("recursive alias `{name}` is not supported"), + anchor.span, + )); + } + let target = scan_anchors.get(name).cloned().ok_or_else(|| { + Error::reference(format!("unknown anchor `{name}`"), anchor.span) + })?; + *replayed_events = replayed_events.saturating_add(target.len()); + if *replayed_events > self.replay_budget { + return Err(Error::limit( + "alias event replay limit exceeded", + anchor.span, + )); + } + active_aliases.push(name.clone()); + let (mut node, end) = self.node_at_for_key( + &target, + 0, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + depth, + )?; + active_aliases.pop(); + if end != target.len() { + return Err(Error::data("unterminated alias key event subtree", None)); + } + node.span = anchor.span; + Ok((node, pos + 1)) + } + Event::SequenceStart { meta, span, .. } => { + let mut items = Vec::new(); + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::SequenceEnd { span: end_span }) => { + let node = + Node::new(NodeValue::Sequence(items), span_union(*span, *end_span)); + return Ok((apply_event_tag(meta, node), next + 1)); + } + Some(_) => { + let (item, after_item) = self.node_at_for_key( + events, + next, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + depth + 1, + )?; + items.push(item); + next = after_item; + } + None => { + return Err(Error::data("unterminated sequence event stream", None)); + } + } + } + } + Event::MappingStart { meta, span, .. } => { + let mut entries = Vec::new(); + let mut seen = DuplicateKeyTracker::new(); + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::MappingEnd { span: end_span }) => { + let node = Node::new( + NodeValue::Mapping(entries), + span_union(*span, *end_span), + ); + return Ok((apply_event_tag(meta, node), next + 1)); + } + Some(_) => { + let (key, after_key) = self.node_at_for_key( + events, + next, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + depth + 1, + )?; + if !allow_merge_key && node_is_merge_key(&key) { + return Err(Error::data( + "event-backed merge-key expansion is not implemented", + Some(key.span), + )); + } + if !(allow_merge_key + && self.schema.is_legacy_compatible() + && node_is_merge_key(&key)) + { + check_duplicate_with_tracker_at_depth_limit( + &mut seen, &key, 1, None, + )?; + } + let (value, after_value) = self.node_at_for_key( + events, + after_key, + scan_anchors, + active_aliases, + replayed_events, + allow_merge_key, + depth + 1, + )?; + entries.push((key, value)); + next = after_value; + } + None => return Err(Error::data("unterminated mapping event stream", None)), + } + } + } + event => Err(unexpected_event("node", event)), + } + } +} + +pub(super) fn skip_node_in(events: &[Event], pos: usize) -> Result { + match events + .get(pos) + .ok_or_else(|| Error::data("unexpected end of YAML event stream", None))? + { + Event::Scalar { .. } | Event::Alias { .. } => Ok(pos + 1), + Event::SequenceStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::SequenceEnd { .. }) => return Ok(next + 1), + Some(_) => next = skip_node_in(events, next)?, + None => return Err(Error::data("unterminated sequence event stream", None)), + } + } + } + Event::MappingStart { .. } => { + let mut next = pos + 1; + loop { + match events.get(next) { + Some(Event::MappingEnd { .. }) => return Ok(next + 1), + Some(_) => { + next = skip_node_in(events, next)?; + next = skip_node_in(events, next)?; + } + None => return Err(Error::data("unterminated mapping event stream", None)), + } + } + } + event => Err(unexpected_event("node", event)), + } +} diff --git a/src/event_de/tests.rs b/src/event_de/tests.rs new file mode 100644 index 0000000..b7f260f --- /dev/null +++ b/src/event_de/tests.rs @@ -0,0 +1,984 @@ +use super::*; +use serde::{Deserialize, de::IgnoredAny}; +use std::collections::{BTreeMap, BTreeSet}; +use std::io::{self, Cursor, Read}; + +struct FailingAfterPrefixReader { + prefix: Cursor>, +} + +impl FailingAfterPrefixReader { + fn new(prefix: &[u8]) -> Self { + Self { + prefix: Cursor::new(prefix.to_vec()), + } + } +} + +impl Read for FailingAfterPrefixReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let read = self.prefix.read(buf)?; + if read == 0 { + Err(io::Error::other("stream interrupted")) + } else { + Ok(read) + } + } +} + +#[derive(Debug, Deserialize, PartialEq)] +struct EventConfig<'a> { + name: &'a str, + ports: Vec, + enabled: bool, + labels: BTreeMap, + optional: Option, +} + +#[derive(Debug, Deserialize, PartialEq)] +struct OwnedEventConfig { + name: String, + ports: Vec, + enabled: bool, + labels: BTreeMap, + optional: Option, +} + +#[derive(Debug, Deserialize, PartialEq)] +struct ExplicitCoreScalars { + string_null: String, + optional_string_null: Option, + string_bool: String, + yes: bool, + off: bool, + maybe: Option, + unit: (), +} + +#[derive(Debug, Deserialize, PartialEq)] +struct ExplicitCoreNumbers { + integer: i64, + unsigned: u64, + float: f64, +} + +#[derive(Debug, Deserialize, PartialEq)] +struct TargetMap { + target: BTreeMap, +} + +#[derive(Debug, Deserialize, PartialEq)] +struct TargetValueMap { + target: BTreeMap, +} + +#[derive(Debug, Deserialize, PartialEq)] +struct KnownOnly { + name: String, +} + +fn assert_value_tagged_key( + mapping: &crate::Value, + expected_tag: crate::Tag, + expected_key: &str, + expected_value: &str, +) { + let mapping = mapping.as_mapping().expect("mapping value"); + assert!( + mapping.iter().any(|(key, value)| { + matches!(key, crate::Value::Tagged(tagged) + if tagged.tag == expected_tag + && tagged.value.as_str() == Some(expected_key) + && value.as_str() == Some(expected_value)) + }), + "expected tagged key {expected_tag:?} {expected_key:?}: {expected_value:?}" + ); +} + +#[test] +fn event_deserializer_reads_typed_structs() { + let input = "\ +name: api +ports: [80, 443] +enabled: true +labels: + tier: backend + release: stable +optional: null +"; + + let parsed: EventConfig<'_> = + from_str_with_options(input, LoadOptions::new()).expect("event-backed typed config"); + assert_eq!(parsed.name, "api"); + assert!(std::ptr::eq(parsed.name.as_ptr(), input[6..9].as_ptr())); + assert_eq!(parsed.ports, vec![80, 443]); + assert!(parsed.enabled); + assert_eq!(parsed.labels["tier"], "backend"); + assert_eq!(parsed.labels["release"], "stable"); + assert_eq!(parsed.optional, None); +} + +#[test] +fn event_deserializer_rejects_duplicate_scalar_keys() { + let input = "labels:\n tier: backend\n tier: worker\n"; + let error = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect_err("event-backed duplicate keys reject"); + assert!(error.to_string().contains("duplicate mapping key")); +} + +#[test] +fn event_deserializer_rejects_duplicate_sequence_alias_mapping_keys() { + let input = "seq: &seq [a, b]\nroot: {? *seq : first, ? [a, b] : second}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("event-backed alias-expanded sequence keys reject"); + + assert!(error.to_string().contains("duplicate mapping key")); +} + +#[test] +fn event_deserializer_rejects_duplicate_mapping_alias_keys_order_insensitively() { + let input = "base: &base {a: 1, b: 2}\nroot: {? *base : first, ? {b: 2, a: 1} : second}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("event-backed alias-expanded mapping keys reject"); + + assert!(error.to_string().contains("duplicate mapping key")); +} + +#[test] +fn event_deserializer_accepts_distinct_complex_alias_mapping_keys() { + let input = "seq: &seq [a, b]\nroot: {? *seq : first, ? [a, c] : second}\n"; + + from_str_with_options::(input, LoadOptions::new()) + .expect("distinct complex alias keys pass duplicate preflight"); +} + +#[test] +fn event_deserializer_rejects_recursive_alias_mapping_keys() { + let input = "root: {? &self [*self] : value}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("recursive alias key rejects"); + + assert!(error.to_string().contains("recursive alias")); +} + +#[test] +fn event_deserializer_rejects_complex_alias_mapping_keys_over_budget() { + let input = "seq: &seq [a, b]\nroot: {? *seq : first}\n"; + let error = + from_str_with_options::(input, LoadOptions::new().max_alias_expansion_nodes(1)) + .expect_err("complex alias key replay budget rejects"); + + assert!( + error + .to_string() + .contains("alias event replay limit exceeded") + ); +} + +#[test] +fn event_deserializer_expands_merge_keys() { + let input = "\ +base: &base + retries: 3 + command: deploy +target: + <<: *base + command: smoke +"; + let parsed = from_str_with_options::(input, LoadOptions::new()).expect("merge keys"); + + assert_eq!(parsed.target["retries"], "3"); + assert_eq!(parsed.target["command"], "smoke"); +} + +#[test] +fn event_deserializer_expands_merge_lists_with_earlier_sources_winning() { + let input = "\ +base1: &base1 {a: one, shared: first} +base2: &base2 {b: two, shared: second} +target: {<<: [*base1, *base2], local: ok} +"; + let parsed = from_str_with_options::(input, LoadOptions::new()).expect("merge list"); + + assert_eq!(parsed.target["a"], "one"); + assert_eq!(parsed.target["b"], "two"); + assert_eq!(parsed.target["shared"], "first"); + assert_eq!(parsed.target["local"], "ok"); +} + +#[test] +fn event_deserializer_expands_explicit_merge_tag_keys() { + let input = "\ +%TAG !m! tag:yaml.org,2002: +--- +base: &base {a: one, shared: base} +tagged: {!!merge <<: *base, shared: tagged} +canonical: {! <<: *base, shared: canonical} +handle: {!m!merge <<: *base, shared: handle} +"; + let parsed = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect("explicit merge tag keys"); + + for (key, expected_shared) in [ + ("tagged", "tagged"), + ("canonical", "canonical"), + ("handle", "handle"), + ] { + assert_eq!(parsed[key]["a"], "one"); + assert_eq!(parsed[key]["shared"], expected_shared); + } +} + +#[test] +fn event_deserializer_keeps_explicit_string_merge_key_literal() { + let input = "base: &base {!!str <<: literal, a: one}\ntarget: {<<: *base}\n"; + let parsed = from_str_with_options::(input, LoadOptions::new()) + .expect("explicit string merge key stays literal"); + + assert_eq!(parsed.target["a"], "one"); + assert_eq!(parsed.target["<<"], "literal"); +} + +#[test] +fn event_deserializer_reports_invalid_merge_payloads() { + let input = "target: {<<: scalar}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("invalid merge payload rejects"); + + assert!( + error + .to_string() + .contains("expected a mapping or list of mappings for merging"), + "{error}" + ); +} + +#[test] +fn event_deserializer_skips_valid_merge_maps_for_ignored_values() { + let input = "base: &base {a: one}\nname: app\nignored: {<<: *base, b: two}\n"; + let parsed = from_str_with_options::(input, LoadOptions::new()) + .expect("unknown merge-bearing field is skipped"); + + assert_eq!(parsed.name, "app"); + from_str_with_options::(input, LoadOptions::new()) + .expect("ignored-any skips merge-bearing maps"); +} + +#[test] +fn event_deserializer_rejects_invalid_merge_payloads_in_ignored_values() { + let input = "name: app\nignored: {<<: scalar}\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("strict invalid merge payload rejects while skipping"); + + assert!( + error + .to_string() + .contains("expected a mapping or list of mappings for merging"), + "{error}" + ); +} + +#[test] +fn event_deserializer_yaml11_skips_literal_merge_payload_in_ignored_value() { + let input = "%YAML 1.1\n---\nname: app\nignored: {<<: scalar, keep: value}\n"; + let parsed = from_str_with_options::(input, LoadOptions::yaml_version_directive()) + .expect("directive-driven YAML 1.1 literal merge payload is skipped"); + + assert_eq!(parsed.name, "app"); +} + +#[test] +fn event_deserializer_rejects_repeated_merge_keys_by_default() { + let input = "\ +first: &first {shared: first} +second: &second {shared: second} +target: + <<: *first + !!merge <<: *second +"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("default repeated merge keys reject"); + + assert!(error.to_string().contains("duplicate mapping key `<<`")); +} + +#[test] +fn event_deserializer_yaml11_recovers_repeated_merge_keys() { + let input = "\ +first: &first {shared: first, retries: 3} +second: &second {shared: second, timeout: 10} +target: + <<: *first + ! <<: *second + keep: value +"; + let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) + .expect("YAML 1.1 repeated merge keys recover"); + + assert_eq!(parsed.target["shared"], "second"); + assert_eq!(parsed.target["retries"], "3"); + assert_eq!(parsed.target["timeout"], "10"); + assert_eq!(parsed.target["keep"], "value"); +} + +#[test] +fn event_deserializer_yaml11_keeps_scalar_merge_payload_literal() { + let input = "\ +target: + <<: scalar + keep: value +"; + let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) + .expect("YAML 1.1 scalar merge payload stays literal"); + + assert_eq!(parsed.target["<<"], "scalar"); + assert_eq!(parsed.target["keep"], "value"); +} + +#[test] +fn event_deserializer_yaml11_keeps_mixed_invalid_merge_list_literal() { + let input = "\ +base: &base {a: 1} +target: + <<: [*base, scalar] + keep: value +"; + let parsed = from_str_with_options::(input, LoadOptions::yaml_1_1()) + .expect("YAML 1.1 mixed invalid merge list stays literal"); + let merge = &parsed.target["<<"]; + let merge = merge.as_sequence().expect("literal merge list"); + + assert_eq!(merge[0]["a"].as_u64(), Some(1)); + assert_eq!(merge[1].as_str(), Some("scalar")); + assert_eq!(parsed.target["keep"].as_str(), Some("value")); +} + +#[test] +fn event_deserializer_reads_explicit_core_scalar_tags() { + let input = "\ +string_null: !!str null +optional_string_null: !!str null +string_bool: !!str true +yes: !!bool YES +off: !!bool off +maybe: !!null null +unit: !!null ~ +"; + let parsed = from_str_with_options::(input, LoadOptions::new()).unwrap(); + + assert_eq!( + parsed, + ExplicitCoreScalars { + string_null: "null".to_string(), + optional_string_null: Some("null".to_string()), + string_bool: "true".to_string(), + yes: true, + off: false, + maybe: None, + unit: (), + } + ); +} + +#[test] +fn event_deserializer_reads_explicit_core_numeric_tags() { + let input = "integer: !!int \"42\"\nunsigned: !!int 0x2A\nfloat: !!float \"1.5\"\n"; + let parsed = from_str_with_options::(input, LoadOptions::new()).unwrap(); + + assert_eq!( + parsed, + ExplicitCoreNumbers { + integer: 42, + unsigned: 42, + float: 1.5, + } + ); +} + +#[test] +fn event_deserializer_explicit_tags_follow_directive_schema() { + let parsed = from_str_with_options::( + "%YAML 1.1\n--- !!bool YES\n", + LoadOptions::yaml_version_directive(), + ) + .expect("directive-driven explicit bool"); + + assert!(parsed); +} + +#[test] +fn event_deserializer_rejects_invalid_explicit_core_scalar_tags() { + let bool_error = from_str_with_options::("!!bool maybe\n", LoadOptions::new()) + .expect_err("invalid explicit bool"); + assert!( + bool_error + .to_string() + .contains("failed to parse explicit !!bool scalar"), + "{bool_error}" + ); + + let str_error = from_str_with_options::("!!str 7\n", LoadOptions::new()) + .expect_err("explicit string does not coerce to integer"); + assert!(str_error.to_string().contains("expected integer")); +} + +#[test] +fn event_deserializer_retains_tagged_scalars_for_value_and_unwraps_typed_strings() { + let value = from_str_with_options::("!Thing tagged\n", LoadOptions::new()) + .expect("custom tagged scalar value"); + let tagged = value.as_tagged().expect("custom tag retained"); + + assert_eq!(tagged.tag, crate::Tag::new("Thing")); + assert_eq!(tagged.value.as_str(), Some("tagged")); + + let typed = from_str_with_options::("!Thing tagged\n", LoadOptions::new()) + .expect("typed string unwraps custom tag"); + assert_eq!(typed, "tagged"); + + let explicit = from_str_with_options::("!!str null\n", LoadOptions::new()) + .expect("explicit core string tag value"); + let tagged = explicit.as_tagged().expect("explicit core tag retained"); + assert_eq!(tagged.tag, crate::Tag::new("!!str")); + assert_eq!(tagged.value.as_str(), Some("null")); +} + +#[test] +fn event_deserializer_retains_tagged_collections_for_value_and_unwraps_typed_targets() { + #[derive(Debug, Deserialize, PartialEq)] + struct TaggedCollections { + seq: Vec, + map: BTreeMap, + } + + let input = "seq: !Seq [a, b]\nmap: !Map {k: v}\n"; + let value = from_str_with_options::(input, LoadOptions::new()).expect("value"); + + let sequence = value["seq"].as_tagged().expect("sequence tag retained"); + assert_eq!(sequence.tag, crate::Tag::new("Seq")); + assert_eq!( + sequence + .value + .as_sequence() + .expect("sequence payload") + .len(), + 2 + ); + assert_eq!(sequence.value[0].as_str(), Some("a")); + assert_eq!(sequence.value[1].as_str(), Some("b")); + + let mapping = value["map"].as_tagged().expect("mapping tag retained"); + assert_eq!(mapping.tag, crate::Tag::new("Map")); + assert_eq!(mapping.value["k"].as_str(), Some("v")); + + let typed = from_str_with_options::(input, LoadOptions::new()) + .expect("typed collections unwrap tags"); + assert_eq!( + typed, + TaggedCollections { + seq: vec!["a".to_string(), "b".to_string()], + map: BTreeMap::from([("k".to_string(), "v".to_string())]), + } + ); + + let top_value = from_str_with_options::("!Seq [a, b]\n", LoadOptions::new()) + .expect("top-level tagged sequence value"); + let tagged = top_value.as_tagged().expect("top-level tag retained"); + assert_eq!(tagged.tag, crate::Tag::new("Seq")); + assert_eq!(tagged.value[1].as_str(), Some("b")); + + let top_typed = from_str_with_options::>("!Seq [a, b]\n", LoadOptions::new()) + .expect("top-level typed sequence unwraps tag"); + assert_eq!(top_typed, ["a", "b"]); +} + +#[test] +fn event_deserializer_projects_yaml11_collection_tags_for_typed_targets() { + let set = + from_str_with_options::>("!!set\n? alpha\n? beta\n", LoadOptions::new()) + .expect("typed !!set"); + assert_eq!( + set, + BTreeSet::from(["alpha".to_string(), "beta".to_string()]) + ); + + let omap_pairs = from_str_with_options::>( + "!!omap\n- first: 1\n- second: 2\n", + LoadOptions::new(), + ) + .expect("typed !!omap pair sequence"); + assert_eq!( + omap_pairs, + vec![("first".to_string(), 1), ("second".to_string(), 2)] + ); + + let omap_map = from_str_with_options::>( + "!!omap\n- second: 2\n- first: 1\n", + LoadOptions::new(), + ) + .expect("typed !!omap map"); + assert_eq!( + omap_map, + BTreeMap::from([("first".to_string(), 1), ("second".to_string(), 2)]) + ); + + let pairs = from_str_with_options::>( + "!!pairs\n- repeat: 1\n- repeat: 2\n", + LoadOptions::new(), + ) + .expect("typed !!pairs preserves duplicate keys"); + assert_eq!( + pairs, + vec![("repeat".to_string(), 1), ("repeat".to_string(), 2)] + ); +} + +#[test] +fn event_deserializer_rejects_lossy_yaml11_collection_tag_shapes() { + let duplicate = from_str_with_options::>( + "!!omap\n- z: 1\n- a: 2\n- z: 3\n", + LoadOptions::new(), + ) + .expect_err("typed !!omap map rejects duplicate keys"); + assert!(duplicate.to_string().contains("duplicate mapping key `z`")); + + let set_error = + from_str_with_options::>("!!set {alpha: true}\n", LoadOptions::new()) + .expect_err("typed !!set rejects non-null values"); + assert!( + set_error + .to_string() + .contains("expected explicit !!set entry value to be null"), + "{set_error}" + ); + + let omap_error = + from_str_with_options::>("!!omap\n- {a: 1, b: 2}\n", LoadOptions::new()) + .expect_err("typed !!omap rejects multi-pair entries"); + assert!( + omap_error + .to_string() + .contains("expected explicit !!omap entry to contain exactly one pair"), + "{omap_error}" + ); + + let pairs_error = + from_str_with_options::>("!!pairs\n- scalar\n", LoadOptions::new()) + .expect_err("typed !!pairs rejects scalar entries"); + assert!( + pairs_error + .to_string() + .contains("expected single-pair mapping entry for explicit !!pairs"), + "{pairs_error}" + ); +} + +#[test] +fn event_deserializer_retains_tagged_merge_maps_for_value_and_unwraps_typed_targets() { + let input = "base: &base {a: one}\ntarget: !Thing {<<: *base, b: two}\n"; + let value = from_str_with_options::(input, LoadOptions::new()) + .expect("tagged merge map value"); + let tagged = value["target"].as_tagged().expect("target tag retained"); + + assert_eq!(tagged.tag, crate::Tag::new("Thing")); + assert_eq!(tagged.value["a"].as_str(), Some("one")); + assert_eq!(tagged.value["b"].as_str(), Some("two")); + + let typed = from_str_with_options::(input, LoadOptions::new()) + .expect("typed tagged merge map unwraps tag"); + assert_eq!(typed.target["a"], "one"); + assert_eq!(typed.target["b"], "two"); +} + +#[test] +fn event_deserializer_retains_tagged_literal_merge_keys_without_expansion() { + let input = "\ +custom: {!Thing <<: literal, image: app:custom} +string: {!!str <<: literal, image: app:string} +"; + let value = + from_str_with_options::(input, LoadOptions::new()).expect("tagged keys"); + + assert_value_tagged_key(&value["custom"], crate::Tag::new("Thing"), "<<", "literal"); + assert_value_tagged_key(&value["string"], crate::Tag::new("!!str"), "<<", "literal"); + assert_eq!(value["custom"]["image"].as_str(), Some("app:custom")); + assert_eq!(value["string"]["image"].as_str(), Some("app:string")); + + let typed = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect("typed maps unwrap tagged literal keys"); + assert_eq!(typed["custom"]["<<"], "literal"); + assert_eq!(typed["string"]["<<"], "literal"); +} + +#[test] +fn event_deserializer_replays_acyclic_scalar_aliases() { + let input = "base: &base api\nservice: *base\n"; + let parsed = from_str_with_options::>(input, LoadOptions::new()) + .expect("event-backed scalar alias replay"); + + assert_eq!(parsed["base"], "api"); + assert_eq!(parsed["service"], "api"); +} + +#[test] +fn event_deserializer_replays_acyclic_sequence_aliases() { + let input = "base: &base [api, worker]\nservice: *base\n"; + let parsed = from_str_with_options::>>(input, LoadOptions::new()) + .expect("event-backed sequence alias replay"); + + assert_eq!(parsed["base"], ["api", "worker"]); + assert_eq!(parsed["service"], ["api", "worker"]); +} + +#[test] +fn event_deserializer_validates_alias_expanded_mapping_values() { + let input = "base: &base {a: one, b: two}\ntarget: *base\n"; + let parsed = + from_str_with_options::(input, LoadOptions::new()).expect("mapping alias"); + + assert_eq!(parsed.target["a"], "one"); + assert_eq!(parsed.target["b"], "two"); +} + +#[test] +fn event_deserializer_replays_scalar_alias_mapping_keys() { + let input = "root: {anchor: &svc service, ? *svc : api}\n"; + let parsed = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect("event-backed scalar alias mapping key replay"); + + assert_eq!(parsed["root"]["anchor"], "service"); + assert_eq!(parsed["root"]["service"], "api"); +} + +#[test] +fn event_deserializer_rejects_duplicate_alias_mapping_keys() { + let input = "root: {? &name name : api, ? *name : worker}\n"; + let error = from_str_with_options::>>( + input, + LoadOptions::new(), + ) + .expect_err("event-backed alias-expanded duplicate keys reject"); + assert!(error.to_string().contains("duplicate mapping key")); +} + +#[test] +fn event_deserializer_rejects_alias_replay_over_budget() { + let input = "base: &base api\nservice: *base\n"; + let error = from_str_with_options::>( + input, + LoadOptions::new().max_alias_expansion_nodes(0), + ) + .expect_err("event-backed alias replay budget rejects"); + + assert!( + error + .to_string() + .contains("alias event replay limit exceeded") + ); +} + +#[test] +fn event_deserializer_rejects_duplicate_keys_in_ignored_mappings() { + let input = "base: &base {a: one, a: two}\ntarget: *base\n"; + let error = from_str_with_options::(input, LoadOptions::new()) + .expect_err("ignored anchor source duplicate keys reject"); + + assert!(error.to_string().contains("duplicate mapping key")); +} + +#[test] +fn event_deserializer_reads_multiple_documents() { + let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [8080]\nenabled: false\nlabels:\n tier: job\noptional: note\n"; + let parsed: Vec = from_documents_str_with_options(input, LoadOptions::new()) + .expect("event-backed document stream"); + + assert_eq!(parsed.len(), 2); + assert_eq!(parsed[0].name, "api"); + assert_eq!(parsed[0].ports, vec![80]); + assert_eq!(parsed[1].name, "worker"); + assert_eq!(parsed[1].ports, vec![8080]); + assert_eq!(parsed[1].labels["tier"], "job"); + assert_eq!(parsed[1].optional.as_deref(), Some("note")); +} + +#[test] +fn event_document_iterator_yields_borrowed_typed_documents() { + let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [8080]\nenabled: false\nlabels: {}\noptional: null\n"; + let mut iter = document_iter_str_with_options::>(input, LoadOptions::new()) + .expect("event-backed document iterator"); + + let first = iter.next().expect("first document").expect("first parses"); + assert_eq!(first.name, "api"); + assert!(std::ptr::eq(first.name.as_ptr(), input[10..13].as_ptr())); + + let second = iter + .next() + .expect("second document") + .expect("second parses"); + assert_eq!(second.name, "worker"); + let worker_offset = input.find("worker").expect("worker text in input"); + assert!(std::ptr::eq( + second.name.as_ptr(), + input[worker_offset..worker_offset + "worker".len()].as_ptr() + )); + assert!(iter.next().is_none()); +} + +#[test] +fn event_document_iterator_continues_after_typed_document_error() { + let input = "\ +--- +name: api +ports: [80] +enabled: true +labels: {} +optional: null +--- +name: bad +ports: [70000] +enabled: true +labels: {} +optional: null +--- +name: worker +ports: [8080] +enabled: false +labels: {} +optional: null +"; + let mut iter = document_iter_str_with_options::(input, LoadOptions::new()) + .expect("event-backed document iterator"); + + let first = iter.next().expect("first document").expect("first parses"); + assert_eq!(first.name, "api"); + + let error = iter + .next() + .expect("second document") + .expect_err("second document has typed range error"); + assert_eq!(error.document_index(), Some(1)); + assert!(error.to_string().contains("70000"), "{error}"); + + let third = iter.next().expect("third document").expect("third parses"); + assert_eq!(third.name, "worker"); + assert!(iter.next().is_none()); +} + +#[test] +fn event_document_iterator_defers_later_parse_error_and_then_stops() { + let input = "---\nname: one\n---\n:\tbad\n---\nname: never\n"; + let mut iter = document_iter_str_with_options::(input, LoadOptions::new()) + .expect("event-backed document iterator"); + + let first = iter.next().expect("first document").expect("first parses"); + assert_eq!(first.name, "one"); + + let error = iter + .next() + .expect("second document item") + .expect_err("later parser error"); + assert_eq!(error.document_index(), Some(1)); + assert_eq!(error.line(), Some(4)); + assert_eq!(error.column(), Some(2)); + assert!(iter.next().is_none()); +} + +#[test] +fn event_document_iterator_empty_stream_yields_no_documents() { + let mut iter = document_iter_str_with_options::("", LoadOptions::new()) + .expect("empty event-backed document iterator"); + + assert!(iter.next().is_none()); + let collected = from_documents_str_with_options::("", LoadOptions::new()) + .expect("empty document collection"); + assert!(collected.is_empty()); +} + +#[test] +fn event_document_iterator_slice_checks_utf8_and_input_limits() { + let invalid = + match document_iter_slice_with_options::(b"name: \xFF\n", LoadOptions::new()) + { + Ok(_) => panic!("invalid UTF-8 should fail"), + Err(error) => error, + }; + assert!(invalid.to_string().contains("input is not valid UTF-8")); + + let limited = match document_iter_slice_with_options::( + b"name: app\n", + LoadOptions::new().max_input_bytes(4), + ) { + Ok(_) => panic!("input limit should fail"), + Err(error) => error, + }; + assert!( + limited + .to_string() + .contains("YAML input exceeds configured limit of 4 bytes") + ); +} + +#[test] +fn event_document_reader_iterator_uses_owned_input_and_preserves_merge_alias_semantics() { + let input = "\ +--- +base: &base {a: one} +target: {<<: *base, b: two} +--- +base: &base {a: three} +target: *base +"; + let docs = document_iter_reader_with_options::( + Cursor::new(input.as_bytes()), + LoadOptions::new(), + ) + .expect("reader-backed event iterator") + .collect::>>() + .expect("reader-backed documents"); + + assert_eq!(docs.len(), 2); + assert_eq!(docs[0].target["a"], "one"); + assert_eq!(docs[0].target["b"], "two"); + assert_eq!(docs[1].target["a"], "three"); +} + +#[test] +fn event_document_reader_iterator_reports_read_errors_before_iteration() { + let error = match document_iter_reader_with_options::( + FailingAfterPrefixReader::new(b"name: api\n"), + LoadOptions::new(), + ) { + Ok(_) => panic!("reader failure should reject iterator construction"), + Err(error) => error, + }; + + assert!(error.to_string().contains("failed to read YAML input")); + assert_eq!(error.location(), None); +} + +#[test] +fn event_deserializer_document_errors_carry_document_index() { + let input = "---\nname: api\nports: [80]\nenabled: true\nlabels: {}\noptional: null\n---\nname: worker\nports: [70000]\nenabled: true\nlabels: {}\noptional: null\n"; + let error = from_documents_str_with_options::(input, LoadOptions::new()) + .expect_err("event-backed stream reports second document error"); + assert_eq!(error.document_index(), Some(1)); +} + +#[test] +fn event_deserializer_skips_ignored_any_without_materializing_values() { + let input = "root:\n - name: api\n ports: [80, 443]\n - nested:\n ok: true\n"; + IgnoredAny::deserialize(EventNodeDeserializer { + source: &mut EventSource::new( + input, + crate::parse::EventStream::from_str(input) + .expect("event stream") + .collect::>>() + .expect("events"), + Schema::Yaml12, + LoadOptions::new().alias_expansion_budget(input.len()), + LoadOptions::new().selected_max_nesting_depth(), + ), + }) + .expect_err("raw stream markers must still be explicit"); + + from_str_with_options::(input, LoadOptions::new()).expect("ignored any"); +} + +fn alias_depth_chain(levels: usize) -> String { + // A literally shallow document (max nesting depth 2) whose final anchor + // expands, via the alias chain, to a structure `levels` deep. + let mut input = String::from("- &n0 0\n"); + for k in 1..levels { + input.push_str(&format!("- &n{k} [*n{prev}]\n", prev = k - 1)); + } + input +} + +#[test] +fn event_deserializer_bounds_alias_expansion_depth() { + // The event-backed path expands aliases lazily while walking, so the + // parser's literal-depth check does not bound the expanded depth. Without + // an explicit ceiling this recurses until the stack overflows; it must + // instead reject, matching the tree-backed `AnchorTable::resolve` guard. + let input = alias_depth_chain(400); + let error = from_str_with_options::>(&input, LoadOptions::new()) + .expect_err("deep alias chain must hit the nesting-depth ceiling"); + assert!( + error.to_string().contains("nesting depth"), + "unexpected error: {error}" + ); +} + +#[test] +fn event_deserializer_allows_alias_chain_within_depth_limit() { + let input = alias_depth_chain(8); + let parsed = from_str_with_options::>(&input, LoadOptions::new()) + .expect("alias chain within the depth limit deserializes"); + assert_eq!(parsed.len(), 8); +} + +#[test] +fn event_deserializer_reads_map_form_enum_variants() { + // Externally-tagged enum variants carrying a payload — the forms the + // earlier scalar-only path rejected. Covers unit, newtype, tuple, and + // struct variants in one sequence. + #[derive(Debug, Deserialize, PartialEq)] + enum EventEnum { + Unit, + Newtype(u32), + Tuple(u8, u8), + Struct { width: u32, height: u32 }, + } + + let input = "\ +- Unit +- Newtype: 7 +- Tuple: [1, 2] +- Struct: + width: 3 + height: 4 +"; + let parsed: Vec = + from_str_with_options(input, LoadOptions::new()).expect("event-backed enum variants"); + assert_eq!( + parsed, + vec![ + EventEnum::Unit, + EventEnum::Newtype(7), + EventEnum::Tuple(1, 2), + EventEnum::Struct { + width: 3, + height: 4, + }, + ] + ); +} + +#[test] +fn event_deserializer_reads_map_form_enum_variant_through_alias() { + #[derive(Debug, Deserialize, PartialEq)] + enum Mode { + Tuned { level: u8 }, + } + + // The anchored definition and the alias must both resolve to the same + // map-form variant. + let parsed = + from_str_with_options::>("- &m {Tuned: {level: 9}}\n- *m\n", LoadOptions::new()) + .expect("aliased map-form enum variant"); + assert_eq!( + parsed, + vec![Mode::Tuned { level: 9 }, Mode::Tuned { level: 9 }] + ); +}