diff --git a/DEPENDENCIES b/DEPENDENCIES index a8795d7c..6bb005fa 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,3 +1,3 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 56eab6ef118e9731df539d3a507db1c23241f911 +core https://github.com/sourcemeta/core e2864c369f7117a55aada78617fa2658938f0965 jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite 06481b143722c8c06671bd40dcde99b422ffd531 diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h index 4e44ce62..1f2fbd84 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h @@ -317,6 +317,45 @@ auto base_dialect(const JSON &schema, const SchemaResolver &resolver, std::string_view default_dialect = "") -> std::optional; +/// @ingroup jsonschema +/// +/// Parse the `$vocabulary` keyword from a given schema, if set. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const sourcemeta::core::JSON document = +/// sourcemeta::core::parse_json(R"JSON({ +/// "$schema": "https://json-schema.org/draft/2020-12/schema", +/// "$vocabulary": { +/// "https://json-schema.org/draft/2020-12/vocab/core": true, +/// "https://json-schema.org/draft/2020-12/vocab/applicator": true +/// } +/// })JSON"); +/// +/// const auto result{ +/// sourcemeta::core::parse_vocabularies( +/// document, sourcemeta::core::schema_resolver)}; +/// +/// assert(result.has_value()); +/// assert(result->size() == 2); +/// ``` +SOURCEMETA_CORE_JSONSCHEMA_EXPORT +auto parse_vocabularies(const JSON &schema, const SchemaResolver &resolver, + std::string_view default_dialect = "") + -> std::optional; + +/// @ingroup jsonschema +/// +/// A shortcut to sourcemeta::core::parse_vocabularies when the base dialect +/// is already known. +SOURCEMETA_CORE_JSONSCHEMA_EXPORT +auto parse_vocabularies(const JSON &schema, + const SchemaBaseDialect base_dialect) + -> std::optional; + /// @ingroup jsonschema /// /// List the vocabularies that a specific schema makes use of. If you set a diff --git a/vendor/core/src/core/jsonschema/jsonschema.cc b/vendor/core/src/core/jsonschema/jsonschema.cc index f7071696..56dabda6 100644 --- a/vendor/core/src/core/jsonschema/jsonschema.cc +++ b/vendor/core/src/core/jsonschema/jsonschema.cc @@ -440,6 +440,55 @@ auto is_pre_vocabulary_base_dialect( } } // namespace +auto sourcemeta::core::parse_vocabularies( + const sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaBaseDialect base_dialect) + -> std::optional { + if (base_dialect != + sourcemeta::core::SchemaBaseDialect::JSON_Schema_2020_12 && + base_dialect != + sourcemeta::core::SchemaBaseDialect::JSON_Schema_2020_12_Hyper && + base_dialect != + sourcemeta::core::SchemaBaseDialect::JSON_Schema_2019_09 && + base_dialect != + sourcemeta::core::SchemaBaseDialect::JSON_Schema_2019_09_Hyper) { + return std::nullopt; + } + + if (!schema.is_object()) { + return std::nullopt; + } + + const auto *vocabulary_entry{schema.try_at("$vocabulary")}; + if (!vocabulary_entry) { + return std::nullopt; + } + + assert(vocabulary_entry->is_object()); + sourcemeta::core::Vocabularies result; + for (const auto &entry : vocabulary_entry->as_object()) { + assert(entry.second.is_boolean()); + result.insert(entry.first, entry.second.to_boolean()); + } + + return result; +} + +auto sourcemeta::core::parse_vocabularies( + const sourcemeta::core::JSON &schema, + const sourcemeta::core::SchemaResolver &resolver, + std::string_view default_dialect) + -> std::optional { + const auto schema_base_dialect{ + sourcemeta::core::base_dialect(schema, resolver, default_dialect)}; + if (schema_base_dialect.has_value()) { + return sourcemeta::core::parse_vocabularies(schema, + schema_base_dialect.value()); + } else { + return std::nullopt; + } +} + auto sourcemeta::core::vocabularies( const sourcemeta::core::JSON &schema, const sourcemeta::core::SchemaResolver &resolver, @@ -545,16 +594,10 @@ auto sourcemeta::core::vocabularies(const SchemaResolver &resolver, * dialect */ - Vocabularies result; const auto core{core_vocabulary_known(base_dialect)}; - if (schema_dialect.defines("$vocabulary")) { - const sourcemeta::core::JSON &vocabularies{ - schema_dialect.at("$vocabulary")}; - assert(vocabularies.is_object()); - for (const auto &entry : vocabularies.as_object()) { - result.insert(entry.first, entry.second.to_boolean()); - } - } else { + auto result{parse_vocabularies(schema_dialect, base_dialect) + .value_or(Vocabularies{})}; + if (result.empty()) { result.insert(core, true); } diff --git a/vendor/core/src/extension/alterschema/CMakeLists.txt b/vendor/core/src/extension/alterschema/CMakeLists.txt index 3d3c88df..43353c9f 100644 --- a/vendor/core/src/extension/alterschema/CMakeLists.txt +++ b/vendor/core/src/extension/alterschema/CMakeLists.txt @@ -82,6 +82,8 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME alterschema linter/duplicate_examples.h linter/enum_to_const.h linter/equal_numeric_bounds_to_const.h + linter/forbid_empty_enum.h + linter/invalid_external_ref.h linter/items_array_default.h linter/items_schema_default.h linter/multiple_of_default.h diff --git a/vendor/core/src/extension/alterschema/alterschema.cc b/vendor/core/src/extension/alterschema/alterschema.cc index be4bf3ba..792764d2 100644 --- a/vendor/core/src/extension/alterschema/alterschema.cc +++ b/vendor/core/src/extension/alterschema/alterschema.cc @@ -5,6 +5,7 @@ #include // std::sort, std::unique #include // std::floor #include // std::back_inserter +#include // std::unique_ptr, std::make_unique #include // std::unordered_map #include // std::unordered_set #include // std::move @@ -110,6 +111,8 @@ inline auto APPLIES_TO_POINTERS(std::vector &&keywords) #include "linter/duplicate_examples.h" #include "linter/enum_to_const.h" #include "linter/equal_numeric_bounds_to_const.h" +#include "linter/forbid_empty_enum.h" +#include "linter/invalid_external_ref.h" #include "linter/items_array_default.h" #include "linter/items_schema_default.h" #include "linter/multiple_of_default.h" @@ -226,6 +229,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); bundle.add(); bundle.add(); bundle.add(); @@ -237,6 +241,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); } bundle.add(); diff --git a/vendor/core/src/extension/alterschema/linter/forbid_empty_enum.h b/vendor/core/src/extension/alterschema/linter/forbid_empty_enum.h new file mode 100644 index 00000000..279d20f5 --- /dev/null +++ b/vendor/core/src/extension/alterschema/linter/forbid_empty_enum.h @@ -0,0 +1,36 @@ +class ForbidEmptyEnum final : public SchemaTransformRule { +public: + using mutates = std::true_type; + using reframe_after_transform = std::true_type; + ForbidEmptyEnum() + : SchemaTransformRule{"forbid_empty_enum", + "An empty `enum` validates nothing and is " + "unsatisfiable"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::core::Vocabularies &vocabularies, + const sourcemeta::core::SchemaFrame &frame, + const sourcemeta::core::SchemaFrame::Location &location, + const sourcemeta::core::SchemaWalker &, + const sourcemeta::core::SchemaResolver &) const + -> sourcemeta::core::SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Validation, + Vocabularies::Known::JSON_Schema_2019_09_Validation, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4}) && + schema.is_object() && !schema.defines("not") && + schema.defines("enum") && schema.at("enum").is_array() && + schema.at("enum").empty()); + ONLY_CONTINUE_IF(!frame.has_references_through(location.pointer)); + return APPLIES_TO_KEYWORDS("enum"); + } + + auto transform(JSON &schema, const Result &) const -> void override { + schema.at("enum").into(JSON::make_object()); + schema.rename("enum", "not"); + } +}; diff --git a/vendor/core/src/extension/alterschema/linter/invalid_external_ref.h b/vendor/core/src/extension/alterschema/linter/invalid_external_ref.h new file mode 100644 index 00000000..d0dbcf50 --- /dev/null +++ b/vendor/core/src/extension/alterschema/linter/invalid_external_ref.h @@ -0,0 +1,112 @@ +class InvalidExternalRef final : public SchemaTransformRule { +public: + using mutates = std::false_type; + using reframe_after_transform = std::false_type; + InvalidExternalRef() + : SchemaTransformRule{ + "invalid_external_ref", + "External references must point to schemas that can be " + "resolved"} {}; + + [[nodiscard]] auto + condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies, + const SchemaFrame &frame, const SchemaFrame::Location &location, + const SchemaWalker &walker, const SchemaResolver &resolver) const + -> SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(!frame.standalone()); + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Core, + Vocabularies::Known::JSON_Schema_2019_09_Core, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_3})); + ONLY_CONTINUE_IF(schema.is_object() && schema.defines(KEYWORD) && + schema.at(KEYWORD).is_string()); + + auto keyword_pointer{location.pointer}; + keyword_pointer.push_back(std::cref(KEYWORD)); + const auto reference_entry{ + frame.reference(SchemaReferenceType::Static, keyword_pointer)}; + ONLY_CONTINUE_IF(reference_entry.has_value()); + + // If the destination exists in the frame, it is an internal reference + ONLY_CONTINUE_IF( + !frame.traverse(reference_entry->get().destination).has_value()); + + const auto &reference_base{reference_entry->get().base}; + + // Empty base with unresolvable destination is a local reference problem + ONLY_CONTINUE_IF(!reference_base.empty()); + + // Known official metaschemas are always resolvable + ONLY_CONTINUE_IF(!is_known_schema(reference_base)); + + // If the base exists in the frame, the reference is internal (e.g. an + // embedded $id). A bad fragment on an internal base is handled by the + // unknown_local_ref rule instead + ONLY_CONTINUE_IF(!frame.traverse(reference_base).has_value()); + + const auto &has_fragment{reference_entry->get().fragment.has_value()}; + const JSON::String base_key{reference_base}; + + // Check the resolver cache to avoid redundant lookups + const auto cached{this->resolver_cache_.find(base_key)}; + if (cached != this->resolver_cache_.end()) { + if (!cached->second.has_value()) { + return APPLIES_TO_KEYWORDS(KEYWORD); + } + + if (has_fragment) { + return this->is_fragment_invalid(reference_entry->get(), cached->second, + base_key, walker, resolver, location) + ? APPLIES_TO_KEYWORDS(KEYWORD) + : false; + } + + return false; + } + + auto remote{resolver(reference_base)}; + const auto &[entry, + _]{this->resolver_cache_.emplace(base_key, std::move(remote))}; + if (!entry->second.has_value()) { + return APPLIES_TO_KEYWORDS(KEYWORD); + } + + if (has_fragment) { + return this->is_fragment_invalid(reference_entry->get(), entry->second, + base_key, walker, resolver, location) + ? APPLIES_TO_KEYWORDS(KEYWORD) + : false; + } + + return false; + } + +private: + static inline const std::string KEYWORD{"$ref"}; + mutable std::unordered_map> resolver_cache_; + mutable std::unordered_map> + frame_cache_; + + [[nodiscard]] auto + is_fragment_invalid(const SchemaFrame::ReferencesEntry &reference_entry, + const std::optional &remote, + const JSON::String &base_key, const SchemaWalker &walker, + const SchemaResolver &resolver, + const SchemaFrame::Location &location) const -> bool { + auto frame_iterator{this->frame_cache_.find(base_key)}; + if (frame_iterator == this->frame_cache_.end()) { + auto remote_frame{ + std::make_unique(SchemaFrame::Mode::Locations)}; + remote_frame->analyse(remote.value(), walker, resolver, location.dialect, + base_key); + frame_iterator = + this->frame_cache_.emplace(base_key, std::move(remote_frame)).first; + } + + return !frame_iterator->second->traverse(reference_entry.destination) + .has_value(); + } +};