From 5abda8048a7354ac50781e860b15a2770c0466f5 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 25 Mar 2026 12:01:21 -0400 Subject: [PATCH 1/4] Upgrade Core to `2eb445bacd0f0137c01b65c0289aa6ded67b2283` Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 2 +- vendor/core/CMakeLists.txt | 9 + vendor/core/config.cmake.in | 3 + vendor/core/src/core/jsonschema/jsonschema.cc | 38 +++-- vendor/core/src/core/uri/canonicalize.cc | 21 +++ vendor/core/src/core/uri/escaping.h | 156 +++++++++++------- vendor/core/src/core/uri/filesystem.cc | 4 +- vendor/core/src/core/uri/parse.cc | 10 +- vendor/core/src/core/uri/recompose.cc | 29 +++- vendor/core/src/lang/error/CMakeLists.txt | 6 + .../error/include/sourcemeta/core/error.h | 17 ++ .../include/sourcemeta/core/error_file.h | 38 +++++ 12 files changed, 249 insertions(+), 84 deletions(-) create mode 100644 vendor/core/src/lang/error/CMakeLists.txt create mode 100644 vendor/core/src/lang/error/include/sourcemeta/core/error.h create mode 100644 vendor/core/src/lang/error/include/sourcemeta/core/error_file.h diff --git a/DEPENDENCIES b/DEPENDENCIES index c8cb61fff..e16cc0dbe 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,3 +1,3 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 28d02328c467616ecec2bffd37c07930974fedf2 +core https://github.com/sourcemeta/core 2eb445bacd0f0137c01b65c0289aa6ded67b2283 jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite 06481b143722c8c06671bd40dcde99b422ffd531 diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index 4c16e6d86..fdbd3feee 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -8,6 +8,7 @@ option(SOURCEMETA_CORE_LANG_IO "Build the Sourcemeta Core language I/O library" option(SOURCEMETA_CORE_LANG_PROCESS "Build the Sourcemeta Core language Process library" ON) option(SOURCEMETA_CORE_LANG_PARALLEL "Build the Sourcemeta Core language parallel library" ON) option(SOURCEMETA_CORE_LANG_NUMERIC "Build the Sourcemeta Core language numeric library" ON) +option(SOURCEMETA_CORE_LANG_ERROR "Build the Sourcemeta Core language error library" ON) option(SOURCEMETA_CORE_LANG_OPTIONS "Build the Sourcemeta Core Options library" ON) option(SOURCEMETA_CORE_UNICODE "Build the Sourcemeta Core Unicode library" ON) option(SOURCEMETA_CORE_PUNYCODE "Build the Sourcemeta Core Punycode library" ON) @@ -81,6 +82,10 @@ if(SOURCEMETA_CORE_LANG_NUMERIC) add_subdirectory(src/lang/numeric) endif() +if(SOURCEMETA_CORE_LANG_ERROR) + add_subdirectory(src/lang/error) +endif() + if(SOURCEMETA_CORE_LANG_OPTIONS) add_subdirectory(src/lang/options) endif() @@ -200,6 +205,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/numeric) endif() + if(SOURCEMETA_CORE_LANG_ERROR) + add_subdirectory(test/error) + endif() + if(SOURCEMETA_CORE_LANG_OPTIONS) add_subdirectory(test/options) endif() diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 236073c79..e311a1b50 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -25,6 +25,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS html) list(APPEND SOURCEMETA_CORE_COMPONENTS alterschema) list(APPEND SOURCEMETA_CORE_COMPONENTS editorschema) + list(APPEND SOURCEMETA_CORE_COMPONENTS error) list(APPEND SOURCEMETA_CORE_COMPONENTS options) endif() @@ -127,6 +128,8 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonschema.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_editorschema.cmake") + elseif(component STREQUAL "error") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_error.cmake") elseif(component STREQUAL "options") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_options.cmake") else() diff --git a/vendor/core/src/core/jsonschema/jsonschema.cc b/vendor/core/src/core/jsonschema/jsonschema.cc index 408f911e3..f7071696d 100644 --- a/vendor/core/src/core/jsonschema/jsonschema.cc +++ b/vendor/core/src/core/jsonschema/jsonschema.cc @@ -7,8 +7,10 @@ #include // std::numeric_limits #include // std::accumulate #include // std::ostringstream +#include // std::string_view #include // std::remove_reference_t #include // std::unordered_map +#include // std::unordered_set #include // std::move auto sourcemeta::core::is_schema(const sourcemeta::core::JSON &schema) -> bool { @@ -244,10 +246,12 @@ auto sourcemeta::core::metaschema( return maybe_metaschema.value(); } -auto sourcemeta::core::base_dialect( - const sourcemeta::core::JSON &schema, - const sourcemeta::core::SchemaResolver &resolver, - std::string_view default_dialect) -> std::optional { +static auto +base_dialect_with_visited(const sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaResolver &resolver, + std::string_view default_dialect, + std::unordered_set &visited) + -> std::optional { assert(sourcemeta::core::is_schema(schema)); const std::string_view effective_dialect{ sourcemeta::core::dialect(schema, default_dialect)}; @@ -259,19 +263,24 @@ auto sourcemeta::core::base_dialect( } // Check for known base dialects - const auto result{to_base_dialect(effective_dialect)}; + const auto result{sourcemeta::core::to_base_dialect(effective_dialect)}; if (result.has_value()) { return result; } + // Detect cycles in the metaschema chain + if (!visited.emplace(effective_dialect).second) { + throw sourcemeta::core::SchemaUnknownBaseDialectError(); + } + // Otherwise, traverse the metaschema hierarchy up const std::optional metaschema{ resolver(effective_dialect)}; if (!metaschema.has_value()) { - URI effective_dialect_uri; + sourcemeta::core::URI effective_dialect_uri; try { - effective_dialect_uri = URI{effective_dialect}; - } catch (const URIParseError &) { + effective_dialect_uri = sourcemeta::core::URI{effective_dialect}; + } catch (const sourcemeta::core::URIParseError &) { throw sourcemeta::core::SchemaKeywordError( "$schema", std::string{effective_dialect}, "The dialect is not a valid URI"); @@ -292,12 +301,21 @@ auto sourcemeta::core::base_dialect( // If the metaschema declares the same dialect (self-descriptive), and it's // not an official dialect, we cannot determine the base dialect const std::string_view metaschema_dialect{ - dialect(metaschema.value(), effective_dialect)}; + sourcemeta::core::dialect(metaschema.value(), effective_dialect)}; if (metaschema_dialect == effective_dialect) { throw sourcemeta::core::SchemaUnknownBaseDialectError(); } - return base_dialect(metaschema.value(), resolver, effective_dialect); + return base_dialect_with_visited(metaschema.value(), resolver, + effective_dialect, visited); +} + +auto sourcemeta::core::base_dialect( + const sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaResolver &resolver, + std::string_view default_dialect) -> std::optional { + std::unordered_set visited; + return base_dialect_with_visited(schema, resolver, default_dialect, visited); } namespace { diff --git a/vendor/core/src/core/uri/canonicalize.cc b/vendor/core/src/core/uri/canonicalize.cc index 8ec0d39cd..cf1e9615a 100644 --- a/vendor/core/src/core/uri/canonicalize.cc +++ b/vendor/core/src/core/uri/canonicalize.cc @@ -1,5 +1,6 @@ #include +#include "escaping.h" #include "normalize.h" #include // std::tolower @@ -48,6 +49,26 @@ auto URI::canonicalize() -> URI & { this->fragment_ = std::nullopt; } + if (this->path_.has_value()) { + uri_normalize_percent_encoding_inplace(this->path_.value()); + } + + if (this->query_.has_value()) { + uri_normalize_percent_encoding_inplace(this->query_.value()); + } + + if (this->fragment_.has_value()) { + uri_normalize_percent_encoding_inplace(this->fragment_.value()); + } + + if (this->userinfo_.has_value()) { + uri_normalize_percent_encoding_inplace(this->userinfo_.value()); + } + + if (this->host_.has_value()) { + uri_normalize_percent_encoding_inplace(this->host_.value()); + } + // Remove default ports (80 for http, 443 for https) if (this->port_.has_value() && this->scheme_.has_value()) { const auto port_value = this->port_.value(); diff --git a/vendor/core/src/core/uri/escaping.h b/vendor/core/src/core/uri/escaping.h index f9d7b5540..cc67a518d 100644 --- a/vendor/core/src/core/uri/escaping.h +++ b/vendor/core/src/core/uri/escaping.h @@ -20,15 +20,12 @@ enum class URIEscapeMode : std::uint8_t { // "sub-delims" ABNF categories // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A SkipSubDelims, - // Escape every characted that is not in either the URI "fragment" category - // - // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - // pct-encoded = "%" HEXDIG HEXDIG - // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / - // "=" - // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - // fragment = *( pchar / "/" / "?" ) - // + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // path = *( pchar / "/" ) + // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A + Path, + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // fragment = *( pchar / "/" / "?" ) // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A Fragment, // Like SkipSubDelims but also preserves ":" for Windows filesystem paths @@ -68,21 +65,23 @@ inline auto uri_escape(std::istream &input, std::ostream &output, continue; } - if (mode == URIEscapeMode::SkipSubDelims || + if (mode == URIEscapeMode::SkipSubDelims || mode == URIEscapeMode::Path || mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem) { - // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" - // / "=" - // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A if (uri_is_sub_delim(character)) { output << character; continue; } } + if (mode == URIEscapeMode::Path) { + if (character == URI_COLON || character == URI_AT || + character == URI_SLASH) { + output << character; + continue; + } + } + if (mode == URIEscapeMode::Fragment) { - // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A - // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - // fragment = *( pchar / "/" / "?" ) if (character == URI_COLON || character == URI_AT || character == URI_SLASH || character == URI_QUESTION) { output << character; @@ -91,20 +90,19 @@ inline auto uri_escape(std::istream &input, std::ostream &output, } if (mode == URIEscapeMode::Filesystem) { - // Preserve ":" for Windows drive letters (e.g., C:) if (character == URI_COLON) { output << character; continue; } } - // Percent encode this character - output << URI_PERCENT << std::hex << std::uppercase - << +(static_cast(character)); + const auto byte{static_cast(character)}; + const auto high{(byte >> 4) & 0x0F}; + const auto low{byte & 0x0F}; + output << URI_PERCENT; + output << static_cast(high < 10 ? '0' + high : 'A' + high - 10); + output << static_cast(low < 10 ? '0' + low : 'A' + low - 10); } - - // Reset stream format flags - output << std::dec << std::nouppercase; } inline auto uri_unescape(std::istream &input, std::ostream &output) -> void { @@ -133,55 +131,87 @@ inline auto uri_unescape(std::istream &input, std::ostream &output) -> void { } } -// Full unescaping for URI normalization (in-place modification) -// Decodes all percent-encoded sequences -// Modifies the input string in-place for zero-copy performance -inline auto uri_unescape_selective_inplace(std::string &str) -> void { - std::string::size_type write_pos = 0; - - for (std::string::size_type read_pos = 0; read_pos < str.size();) { - if (str[read_pos] == URI_PERCENT && read_pos + 2 < str.size() && - std::isxdigit(static_cast(str[read_pos + 1])) && - std::isxdigit(static_cast(str[read_pos + 2]))) { - // Parse the hex value - const auto first_digit = str[read_pos + 1]; - const auto second_digit = str[read_pos + 2]; - - const auto hex_to_int = [](char c) -> unsigned char { - if (c >= '0' && c <= '9') { - return static_cast(c - '0'); - } - if (c >= 'A' && c <= 'F') { - return static_cast(c - 'A' + 10); - } - if (c >= 'a' && c <= 'f') { - return static_cast(c - 'a' + 10); - } - return 0; - }; +inline auto uri_hex_to_int(char character) -> unsigned char { + if (character >= '0' && character <= '9') { + return static_cast(character - '0'); + } + + if (character >= 'A' && character <= 'F') { + return static_cast(character - 'A' + 10); + } + + if (character >= 'a' && character <= 'f') { + return static_cast(character - 'a' + 10); + } + + return 0; +} + +inline auto uri_is_percent_encoded(const std::string &input, + std::string::size_type position) -> bool { + return position < input.size() && input[position] == URI_PERCENT && + position + 2 < input.size() && + std::isxdigit(static_cast(input[position + 1])) && + std::isxdigit(static_cast(input[position + 2])); +} + +inline auto uri_unescape_all_inplace(std::string &input) -> void { + std::string::size_type write_position = 0; + + for (std::string::size_type read_position = 0; + read_position < input.size();) { + if (uri_is_percent_encoded(input, read_position)) { + const auto value = static_cast( + (uri_hex_to_int(input[read_position + 1]) << 4) | + uri_hex_to_int(input[read_position + 2])); + input[write_position++] = static_cast(value); + read_position += 3; + } else { + input[write_position++] = input[read_position++]; + } + } + + input.resize(write_position); +} + +inline auto uri_unescape_unreserved_inplace(std::string &input) -> void { + std::string::size_type write_position = 0; + for (std::string::size_type read_position = 0; + read_position < input.size();) { + if (uri_is_percent_encoded(input, read_position)) { const auto value = static_cast( - (hex_to_int(first_digit) << 4) | hex_to_int(second_digit)); + (uri_hex_to_int(input[read_position + 1]) << 4) | + uri_hex_to_int(input[read_position + 2])); + if (uri_is_unreserved(static_cast(value))) { + input[write_position++] = static_cast(value); + } else { + input[write_position++] = input[read_position]; + input[write_position++] = input[read_position + 1]; + input[write_position++] = input[read_position + 2]; + } - // Decode all percent-encoded sequences - // Internal storage is always fully decoded - str[write_pos++] = static_cast(value); - read_pos += 3; + read_position += 3; } else { - str[write_pos++] = str[read_pos++]; + input[write_position++] = input[read_position++]; } } - str.resize(write_pos); + input.resize(write_position); } -// Full unescaping for URI normalization (copy version for compatibility) -// Decodes all percent-encoded sequences -inline auto uri_unescape_selective(const std::string_view input) - -> std::string { - std::string result{input}; - uri_unescape_selective_inplace(result); - return result; +inline auto uri_normalize_percent_encoding_inplace(std::string &input) -> void { + for (std::string::size_type position = 0; position < input.size();) { + if (uri_is_percent_encoded(input, position)) { + input[position + 1] = static_cast( + std::toupper(static_cast(input[position + 1]))); + input[position + 2] = static_cast( + std::toupper(static_cast(input[position + 2]))); + position += 3; + } else { + ++position; + } + } } } // namespace sourcemeta::core diff --git a/vendor/core/src/core/uri/filesystem.cc b/vendor/core/src/core/uri/filesystem.cc index 9b30b4618..d9d8da269 100644 --- a/vendor/core/src/core/uri/filesystem.cc +++ b/vendor/core/src/core/uri/filesystem.cc @@ -1,5 +1,7 @@ #include +#include "escaping.h" + #include // std::ranges::replace #include // std::filesystem #include // std::advance, std::next @@ -25,7 +27,7 @@ auto URI::to_path() const -> std::filesystem::path { std::ranges::replace(path, '/', '\\'); } - // Path is already fully decoded, just return it + uri_unescape_all_inplace(path); return path; } diff --git a/vendor/core/src/core/uri/parse.cc b/vendor/core/src/core/uri/parse.cc index bb1f8bec5..09acb7d44 100644 --- a/vendor/core/src/core/uri/parse.cc +++ b/vendor/core/src/core/uri/parse.cc @@ -456,12 +456,12 @@ auto parse_authority(const std::string_view input, } else { auto userinfo_raw = parse_userinfo(input, position); if (userinfo_raw.has_value()) { - uri_unescape_selective_inplace(userinfo_raw.value()); + uri_unescape_unreserved_inplace(userinfo_raw.value()); userinfo = std::move(userinfo_raw.value()); } auto host_raw = parse_host(input, position); - uri_unescape_selective_inplace(host_raw); + uri_unescape_unreserved_inplace(host_raw); host = std::move(host_raw); } @@ -556,7 +556,7 @@ auto do_parse(const std::string_view input, } } - uri_unescape_selective_inplace(parsed_path.value()); + uri_unescape_unreserved_inplace(parsed_path.value()); path = std::move(parsed_path.value()); } else if (has_authority || has_scheme) { if (input.ends_with(URI_SLASH) || input == "/") { @@ -587,13 +587,13 @@ auto do_parse(const std::string_view input, } else { auto parsed_query = parse_query(input, position); if (parsed_query.has_value()) { - uri_unescape_selective_inplace(parsed_query.value()); + uri_unescape_unreserved_inplace(parsed_query.value()); query = std::move(parsed_query.value()); } auto parsed_fragment = parse_fragment(input, position); if (parsed_fragment.has_value()) { - uri_unescape_selective_inplace(parsed_fragment.value()); + uri_unescape_unreserved_inplace(parsed_fragment.value()); fragment = std::move(parsed_fragment.value()); } } diff --git a/vendor/core/src/core/uri/recompose.cc b/vendor/core/src/core/uri/recompose.cc index 916de8cef..ec59a20fb 100644 --- a/vendor/core/src/core/uri/recompose.cc +++ b/vendor/core/src/core/uri/recompose.cc @@ -17,13 +17,26 @@ auto escape_component_to_string(std::string &output, std::string_view input, const URIEscapeMode mode) -> void { output.reserve(output.size() + input.size() * 3); - for (const char character : input) { + for (std::string_view::size_type index = 0; index < input.size(); ++index) { + const char character = input[index]; + + // Preserve existing percent-encoded sequences + if (character == URI_PERCENT && index + 2 < input.size() && + std::isxdigit(static_cast(input[index + 1])) && + std::isxdigit(static_cast(input[index + 2]))) { + output += input[index]; + output += input[index + 1]; + output += input[index + 2]; + index += 2; + continue; + } + if (uri_is_unreserved(character)) { output += character; continue; } - if (mode == URIEscapeMode::SkipSubDelims || + if (mode == URIEscapeMode::SkipSubDelims || mode == URIEscapeMode::Path || mode == URIEscapeMode::Fragment || mode == URIEscapeMode::Filesystem) { if (uri_is_sub_delim(character)) { output += character; @@ -31,6 +44,14 @@ auto escape_component_to_string(std::string &output, std::string_view input, } } + if (mode == URIEscapeMode::Path) { + if (character == URI_COLON || character == URI_AT || + character == URI_SLASH) { + output += character; + continue; + } + } + if (mode == URIEscapeMode::Fragment) { if (character == URI_COLON || character == URI_AT || character == URI_SLASH || character == URI_QUESTION) { @@ -132,9 +153,9 @@ auto URI::recompose_without_fragment() const -> std::optional { if (result_scheme.has_value() && !has_authority && path_value.starts_with("/") && !path_value.starts_with("//")) { escape_component_to_string(result, path_value.substr(1), - URIEscapeMode::Fragment); + URIEscapeMode::Path); } else { - escape_component_to_string(result, path_value, URIEscapeMode::Fragment); + escape_component_to_string(result, path_value, URIEscapeMode::Path); } } diff --git a/vendor/core/src/lang/error/CMakeLists.txt b/vendor/core/src/lang/error/CMakeLists.txt new file mode 100644 index 000000000..f712f1a38 --- /dev/null +++ b/vendor/core/src/lang/error/CMakeLists.txt @@ -0,0 +1,6 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME error + PRIVATE_HEADERS file.h) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME error) +endif() diff --git a/vendor/core/src/lang/error/include/sourcemeta/core/error.h b/vendor/core/src/lang/error/include/sourcemeta/core/error.h new file mode 100644 index 000000000..268c27553 --- /dev/null +++ b/vendor/core/src/lang/error/include/sourcemeta/core/error.h @@ -0,0 +1,17 @@ +#ifndef SOURCEMETA_CORE_ERROR_H_ +#define SOURCEMETA_CORE_ERROR_H_ + +// NOLINTBEGIN(misc-include-cleaner) +#include +// NOLINTEND(misc-include-cleaner) + +/// @defgroup error Error +/// @brief A growing collection of error utilities +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +#endif diff --git a/vendor/core/src/lang/error/include/sourcemeta/core/error_file.h b/vendor/core/src/lang/error/include/sourcemeta/core/error_file.h new file mode 100644 index 000000000..b597fe677 --- /dev/null +++ b/vendor/core/src/lang/error/include/sourcemeta/core/error_file.h @@ -0,0 +1,38 @@ +#ifndef SOURCEMETA_CORE_ERROR_FILE_H_ +#define SOURCEMETA_CORE_ERROR_FILE_H_ + +#include // assert +#include // std::filesystem::path, std::filesystem::exists +#include // std::move, std::forward + +namespace sourcemeta::core { + +/// @ingroup error +/// A wrapper that decorates an arbitrary exception type with a file path. +/// +/// ```cpp +/// #include +/// #include +/// +/// throw sourcemeta::core::FileError( +/// "/tmp/foo.json", "something went wrong"); +/// ``` +template class FileError : public T { +public: + template + FileError(std::filesystem::path path, Args &&...args) + : T{std::forward(args)...}, path_{std::move(path)} { + assert(std::filesystem::exists(this->path_)); + } + + [[nodiscard]] auto path() const noexcept -> const std::filesystem::path & { + return this->path_; + } + +private: + std::filesystem::path path_; +}; + +} // namespace sourcemeta::core + +#endif From 6479b3eda960cac81724a831a9f43cb942d2412e Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 25 Mar 2026 12:34:18 -0400 Subject: [PATCH 2/4] Add breaking test Signed-off-by: Juan Cruz Viotti --- test/evaluator/evaluator_2020_12_test.cc | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/evaluator/evaluator_2020_12_test.cc b/test/evaluator/evaluator_2020_12_test.cc index b511a23b2..86a65c98d 100644 --- a/test/evaluator/evaluator_2020_12_test.cc +++ b/test/evaluator/evaluator_2020_12_test.cc @@ -1520,6 +1520,27 @@ TEST(Evaluator_2020_12, dynamicRef_3) { "in scope that declared the dynamic anchor \"meta\""); } +TEST(Evaluator_2020_12, ref_with_percent_encoded_colon_in_fragment) { + const sourcemeta::core::JSON schema{sourcemeta::core::parse_json(R"JSON({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "#/$defs/https%3A~1~1example.com~1schema/type", + "$defs": { + "https://example.com/schema/type": { "type": "string" } + } + })JSON")}; + + const sourcemeta::core::JSON instance{"foo"}; + EVALUATE_WITH_TRACE_FAST_SUCCESS(schema, instance, 1, ""); + + EVALUATE_TRACE_PRE(0, AssertionTypeStrict, "/$ref/type", + "#/$defs/https:~1~1example.com~1schema~1type/type", ""); + EVALUATE_TRACE_POST_SUCCESS(0, AssertionTypeStrict, "/$ref/type", + "#/$defs/https:~1~1example.com~1schema~1type/type", + ""); + EVALUATE_TRACE_POST_DESCRIBE(instance, 0, + "The value was expected to be of type string"); +} + TEST(Evaluator_2020_12, definitions_1) { const sourcemeta::core::JSON schema{sourcemeta::core::parse_json(R"JSON({ "$schema": "https://json-schema.org/draft/2020-12/schema", From f1b43e359ccc8aaaaac253cf83ee95d123f1650f Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 25 Mar 2026 13:13:29 -0400 Subject: [PATCH 3/4] WIP Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 2 +- vendor/core/src/core/uri/canonicalize.cc | 23 +++++++++++++++++- vendor/core/src/core/uri/escaping.h | 30 ++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/DEPENDENCIES b/DEPENDENCIES index e16cc0dbe..2ebc53512 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,3 +1,3 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 2eb445bacd0f0137c01b65c0289aa6ded67b2283 +core https://github.com/sourcemeta/core 63f8b0fc592fc78694ae59e0b6cbdf009a65b135 jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite 06481b143722c8c06671bd40dcde99b422ffd531 diff --git a/vendor/core/src/core/uri/canonicalize.cc b/vendor/core/src/core/uri/canonicalize.cc index cf1e9615a..528c69ed5 100644 --- a/vendor/core/src/core/uri/canonicalize.cc +++ b/vendor/core/src/core/uri/canonicalize.cc @@ -4,7 +4,6 @@ #include "normalize.h" #include // std::tolower -#include // std::uint32_t #include // std::optional #include // std::string @@ -49,24 +48,46 @@ auto URI::canonicalize() -> URI & { this->fragment_ = std::nullopt; } + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A + const auto is_pchar = [](char character) -> bool { + return uri_is_unreserved(character) || uri_is_sub_delim(character) || + character == URI_COLON || character == URI_AT; + }; + if (this->path_.has_value()) { uri_normalize_percent_encoding_inplace(this->path_.value()); + uri_unescape_if_inplace(this->path_.value(), is_pchar); } if (this->query_.has_value()) { uri_normalize_percent_encoding_inplace(this->query_.value()); + uri_unescape_if_inplace(this->query_.value(), [&](char character) { + return is_pchar(character) || character == URI_SLASH || + character == URI_QUESTION; + }); } if (this->fragment_.has_value()) { uri_normalize_percent_encoding_inplace(this->fragment_.value()); + uri_unescape_if_inplace(this->fragment_.value(), [&](char character) { + return is_pchar(character) || character == URI_SLASH || + character == URI_QUESTION; + }); } if (this->userinfo_.has_value()) { uri_normalize_percent_encoding_inplace(this->userinfo_.value()); + uri_unescape_if_inplace(this->userinfo_.value(), [&](char character) { + return uri_is_sub_delim(character) || character == URI_COLON; + }); } if (this->host_.has_value()) { uri_normalize_percent_encoding_inplace(this->host_.value()); + uri_unescape_if_inplace(this->host_.value(), [](char character) { + return uri_is_sub_delim(character); + }); } // Remove default ports (80 for http, 443 for https) diff --git a/vendor/core/src/core/uri/escaping.h b/vendor/core/src/core/uri/escaping.h index cc67a518d..153bb8bb0 100644 --- a/vendor/core/src/core/uri/escaping.h +++ b/vendor/core/src/core/uri/escaping.h @@ -214,6 +214,36 @@ inline auto uri_normalize_percent_encoding_inplace(std::string &input) -> void { } } +template +inline auto uri_unescape_if_inplace(std::string &input, Predicate should_decode) + -> void { + std::string::size_type write_position = 0; + + for (std::string::size_type read_position = 0; + read_position < input.size();) { + if (uri_is_percent_encoded(input, read_position)) { + const auto value = static_cast( + (uri_hex_to_int(input[read_position + 1]) << 4) | + uri_hex_to_int(input[read_position + 2])); + const auto decoded = static_cast(value); + + if (should_decode(decoded)) { + input[write_position++] = decoded; + } else { + input[write_position++] = input[read_position]; + input[write_position++] = input[read_position + 1]; + input[write_position++] = input[read_position + 2]; + } + + read_position += 3; + } else { + input[write_position++] = input[read_position++]; + } + } + + input.resize(write_position); +} + } // namespace sourcemeta::core #endif From 2ab27a0369b054764f7057d9d3f5f0a1c196e665 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 25 Mar 2026 13:24:16 -0400 Subject: [PATCH 4/4] Fix Signed-off-by: Juan Cruz Viotti --- test/evaluator/evaluator_2020_12_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/evaluator/evaluator_2020_12_test.cc b/test/evaluator/evaluator_2020_12_test.cc index 86a65c98d..cbf7e2ed3 100644 --- a/test/evaluator/evaluator_2020_12_test.cc +++ b/test/evaluator/evaluator_2020_12_test.cc @@ -1523,7 +1523,7 @@ TEST(Evaluator_2020_12, dynamicRef_3) { TEST(Evaluator_2020_12, ref_with_percent_encoded_colon_in_fragment) { const sourcemeta::core::JSON schema{sourcemeta::core::parse_json(R"JSON({ "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "#/$defs/https%3A~1~1example.com~1schema/type", + "$ref": "#/$defs/https%3A~1~1example.com~1schema~1type", "$defs": { "https://example.com/schema/type": { "type": "string" } } @@ -1534,9 +1534,9 @@ TEST(Evaluator_2020_12, ref_with_percent_encoded_colon_in_fragment) { EVALUATE_TRACE_PRE(0, AssertionTypeStrict, "/$ref/type", "#/$defs/https:~1~1example.com~1schema~1type/type", ""); - EVALUATE_TRACE_POST_SUCCESS(0, AssertionTypeStrict, "/$ref/type", - "#/$defs/https:~1~1example.com~1schema~1type/type", - ""); + EVALUATE_TRACE_POST_SUCCESS( + 0, AssertionTypeStrict, "/$ref/type", + "#/$defs/https:~1~1example.com~1schema~1type/type", ""); EVALUATE_TRACE_POST_DESCRIBE(instance, 0, "The value was expected to be of type string"); }