From c3e0deadbb48f0f307006753bb6a29b4c9956071 Mon Sep 17 00:00:00 2001 From: Ben Spoor <37540691+ben-edna@users.noreply.github.com> Date: Wed, 7 Jan 2026 17:33:50 +0000 Subject: [PATCH 1/6] Add fuzzing --- .gitignore | 1 + pyproject.toml | 1 + tests/test_fuzzing.py | 353 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 355 insertions(+) create mode 100644 tests/test_fuzzing.py diff --git a/.gitignore b/.gitignore index efb8b658..9038e54b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ example/Tests/ venv* *.cdx.json release_notes.txt +.hypothesis diff --git a/pyproject.toml b/pyproject.toml index 57c5933c..02678b04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ development = [ "tomli; python_version < '3.11'", # Tomllib is default in 3.11, required for letting codespell read the pyproject.toml 'pre-commit==4.5.1', 'ruff==0.14.10', + 'hypothesis==6.150.0', ] docs = [ 'sphinx==8.2.3', diff --git a/tests/test_fuzzing.py b/tests/test_fuzzing.py new file mode 100644 index 00000000..a6a8bcf9 --- /dev/null +++ b/tests/test_fuzzing.py @@ -0,0 +1,353 @@ +"""Fuzz test the manifest.""" + +from __future__ import annotations + +import os +import re +import tempfile +from contextlib import suppress +from typing import Any, Dict, Mapping, Tuple + +import yaml +from hypothesis import given, settings +from hypothesis import strategies as st + +# StrictYAML is a runtime dependency: +# pip install strictyaml hypothesis +from strictyaml import Any as SyAny +from strictyaml import ( + Bool, + EmptyList, + EmptyNone, + Enum, + Float, + Int, + Map, + MapPattern, + Regex, + Seq, + Str, + as_document, + load, +) + +from dfetch.__main__ import DfetchFatalException, run +from dfetch.manifest.manifest import Manifest +from dfetch.manifest.schema import MANIFEST_SCHEMA as schema +from dfetch.util.util import in_directory + +settings.register_profile( + "ci", + max_examples=30, + deadline=None, + print_blob=True, +) +settings.register_profile( + "dev", + max_examples=100, + deadline=None, +) +settings.register_profile( + "manual", + max_examples=300, + deadline=None, +) +if os.getenv("CI"): + settings.load_profile("ci") +else: + settings.load_profile("dev") + + +def _classname(obj: Any) -> str: + return obj.__class__.__name__ + + +def _get_map_items(m: Map) -> Mapping[Any, Any]: + """ + StrictYAML's Map stores the key->validator mapping internally. + It has varied attribute names across versions; try common ones. + """ + for attr in ("_validator", "_map", "map", "mapping"): + val = getattr(m, attr, None) + if isinstance(val, Mapping): + return val + raise TypeError("Unsupported StrictYAML Map internals; cannot find mapping dict.") + + +def _unwrap_optional_key(k: Any) -> Tuple[str, bool]: + """ + Returns (key_name, is_optional). + Optional('b', default=...) is used *as a key* inside Map({...}). + """ + if _classname(k) == "Optional": + for attr in ("_key", "key"): + name = getattr(k, attr, None) + if isinstance(name, str): + return name, True + return str(k), True + if isinstance(k, str): + return k, False + return str(k), False + + +def _enum_values(e: Enum) -> Any: + vals = getattr(e, "_restricted_to", None) + if vals: + return list(vals) + raise TypeError("Unsupported StrictYAML Enum internals; cannot read choices.") + + +def _regex_pattern(r: Regex) -> re.Pattern: + for attr in ("_regex", "regex", "pattern"): + pat = getattr(r, attr, None) + if isinstance(pat, (str, re.Pattern)): + return re.compile(pat) if isinstance(pat, str) else pat + raise TypeError("Unsupported StrictYAML Regex internals; cannot read pattern.") + + +def _mappattern_parts(mp: MapPattern) -> Tuple[Any, Any, int | None, int | None]: + key_v = None + val_v = None + min_k = getattr(mp, "minimum_keys", None) + max_k = getattr(mp, "maximum_keys", None) + for attr in ("_key_validator", "key_validator"): + key_v = getattr(mp, attr, None) or key_v + for attr in ("_value_validator", "value_validator"): + val_v = getattr(mp, attr, None) or val_v + if key_v is None or val_v is None: + raise TypeError("Unsupported StrictYAML MapPattern internals.") + return key_v, val_v, min_k, max_k + + +def strictyaml_to_strategy( + validator: Any, *, default_text_alphabet=st.characters(), default_max_list=5 +): + """ + Convert a StrictYAML validator into a Hypothesis strategy that yields + *Python data structures* which conform to the schema. + """ + name = _classname(validator) + + if isinstance(validator, Str): + return st.text(alphabet=default_text_alphabet) + + if isinstance(validator, Int): + return st.integers() + + if isinstance(validator, Float): + return st.floats(allow_nan=False, allow_infinity=False) + + if isinstance(validator, Bool): + return st.booleans() + + if isinstance(validator, Enum): + values = _enum_values(validator) + return st.sampled_from(values) + + if isinstance(validator, Regex): + pattern = _regex_pattern(validator) + return st.from_regex(pattern, fullmatch=True) + + if isinstance(validator, Seq): + item_v = None + for attr in ("_validator", "validator", "_item_validator", "item_validator"): + item_v = getattr(validator, attr, None) or item_v + if item_v is None: + raise TypeError( + "Unsupported StrictYAML Seq internals; cannot find item validator." + ) + return st.lists( + strictyaml_to_strategy( + item_v, + default_text_alphabet=default_text_alphabet, + default_max_list=default_max_list, + ), + min_size=1, + max_size=default_max_list, + ) + + if isinstance(validator, EmptyList): + return st.just([]) + + if isinstance(validator, Map): + items = _get_map_items(validator) + required: Dict[str, Any] = {} + optional: Dict[str, Any] = {} + + for raw_key, val_validator in items.items(): + key_name, is_opt = _unwrap_optional_key(raw_key) + if is_opt: + optional[key_name] = strictyaml_to_strategy( + val_validator, + default_text_alphabet=default_text_alphabet, + default_max_list=default_max_list, + ) + else: + required[key_name] = strictyaml_to_strategy( + val_validator, + default_text_alphabet=default_text_alphabet, + default_max_list=default_max_list, + ) + + base = st.fixed_dictionaries(required) + + def with_optional(base_dict: Dict[str, Any]): + if not optional: + return st.just(base_dict) + opt_kv_strats = [st.tuples(st.just(k), s) for k, s in optional.items()] + + chosen = st.lists(st.one_of(*opt_kv_strats), unique_by=lambda kv: kv[0]) + return chosen.map(lambda kvs: {**base_dict, **dict(kvs)}) + + return base.flatmap(with_optional) + + if isinstance(validator, MapPattern): + key_v, val_v, min_k, max_k = _mappattern_parts(validator) + key_strat = strictyaml_to_strategy( + key_v, + default_text_alphabet=default_text_alphabet, + default_max_list=default_max_list, + ) + val_strat = strictyaml_to_strategy( + val_v, + default_text_alphabet=default_text_alphabet, + default_max_list=default_max_list, + ) + + return st.dictionaries( + keys=key_strat, + values=val_strat, + min_size=min_k or 0, + max_size=max_k or default_max_list, + ) + + if _classname(validator) in ("OrValidator", "Or"): + children = None + + for attr in ("validators", "_validators", "choices", "_choices"): + vs = getattr(validator, attr, None) + if isinstance(vs, (list, tuple)) and len(vs) > 0: + children = list(vs) + break + + if children is None: + left = None + right = None + for la in ("_a", "a", "_left", "left", "_lhs", "lhs", "_validator_a"): + if getattr(validator, la, None) is not None: + left = getattr(validator, la) + break + for ra in ("_b", "b", "_right", "right", "_rhs", "rhs", "_validator_b"): + if getattr(validator, ra, None) is not None: + right = getattr(validator, ra) + break + if left is not None and right is not None: + children = [left, right] + + if not children: + raise TypeError( + "Unsupported StrictYAML OrValidator internals; no children found." + ) + + branch_strats = [ + strictyaml_to_strategy( + c, + default_text_alphabet=default_text_alphabet, + default_max_list=default_max_list, + ) + for c in children + ] + return st.one_of(branch_strats) + + if isinstance(validator, SyAny): + leaf = st.one_of( + st.booleans(), + st.integers(), + st.floats(allow_nan=False, allow_infinity=False), + st.text(), + ) + return st.recursive( + leaf, + lambda inner: st.one_of( + st.lists(inner, max_size=3), + st.dictionaries(st.text(), inner, max_size=3), + ), + max_leaves=10, + ) + + if isinstance(validator, EmptyNone): + return st.none() + + # If we reach here, add more mappings (e.g., Decimal, Datetime, Email, etc.) as needed. + raise NotImplementedError( + f"No strategy mapping implemented for StrictYAML validator: {name}" + ) + + +def validate_with_strictyaml(data: Any, yaml_schema: Any) -> None: + """ + Ensure that 'data' is serializable with the given StrictYAML schema. + If it doesn't conform, as_document will raise. + """ + as_document(data, yaml_schema) # will raise YAMLSerializationError on mismatch + + +data_strategy = strictyaml_to_strategy(schema) + + +@given(data_strategy) +def test_data_conforms_to_schema(data): + """Validate by attempting to serialize via StrictYAML.""" + # If data violates the schema, this raises and Hypothesis will shrink to a minimal counterexample. + validate_with_strictyaml(data, schema) + + +@given(data_strategy) +def test_manifest_can_be_created(data): + """Validate by attempting to construct a Manifest.""" + try: + Manifest(data) + except KeyError: + pass + + +@given(data_strategy) +def test_check(data): + """Validate check comand.""" + with suppress(DfetchFatalException): + with tempfile.TemporaryDirectory() as tmpdir: + with in_directory(tmpdir): + with open("dfetch.yaml", "w", encoding="UTF-8") as manifest_file: + yaml.dump(data, manifest_file) + run(["check"]) + + +@given(data_strategy) +def test_update(data): + """Validate update comand.""" + with suppress(DfetchFatalException): + with tempfile.TemporaryDirectory() as tmpdir: + with in_directory(tmpdir): + with open("dfetch.yaml", "w", encoding="UTF-8") as manifest_file: + yaml.dump(data, manifest_file) + run(["update"]) + + +if __name__ == "__main__": + + settings.load_profile("manual") + + example = data_strategy.example() + print("One generated example:\n", example) + + # Show the YAML StrictYAML would emit for the example: + print("\nYAML output:\n", as_document(example, schema).as_yaml()) + + # And ensure parse+validate round-trip works: + parsed = load(as_document(example, schema).as_yaml(), schema) + print("\nRound-trip parsed .data:\n", parsed.data) + + test_data_conforms_to_schema() + test_manifest_can_be_created() + test_check() + test_update() From d6b0cd981410e521bd2e9103f8ded6000afcf82a Mon Sep 17 00:00:00 2001 From: Ben Spoor <37540691+ben-edna@users.noreply.github.com> Date: Wed, 7 Jan 2026 17:34:15 +0000 Subject: [PATCH 2/6] Only accept safe strings (no strange chars) --- CHANGELOG.rst | 2 ++ dfetch/manifest/schema.py | 31 +++++++++++++++++-------------- doc/manifest.rst | 4 ++++ 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4799d6ae..3c98fcfc 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ Release 0.12.0 (unreleased) * Internal refactoring: introduce superproject & subproject (#896) * Switch from pykwalify to StrictYAML (#922) * Show line number when manifest validation fails (#36) +* Add Fuzzing (#819) +* Don't allow NULL or control characters in manifest (#114) Release 0.11.0 (released 2026-01-03) ==================================== diff --git a/dfetch/manifest/schema.py b/dfetch/manifest/schema.py index bb5cc9a6..a010e81e 100644 --- a/dfetch/manifest/schema.py +++ b/dfetch/manifest/schema.py @@ -1,31 +1,34 @@ """StrictYAML schema for the manifest.""" -from strictyaml import Bool, Enum, Float, Int, Map, Optional, Seq, Str +from strictyaml import Bool, Enum, Float, Int, Map, Optional, Regex, Seq NUMBER = Int() | Float() +# A safe string: no NUL, no control chars +SAFE_STR = Regex(r"^[^\x00]*$") + REMOTE_SCHEMA = Map( { - "name": Str(), - "url-base": Str(), + "name": SAFE_STR, + "url-base": SAFE_STR, Optional("default"): Bool(), } ) PROJECT_SCHEMA = Map( { - "name": Str(), - Optional("dst"): Str(), - Optional("branch"): Str(), - Optional("tag"): Str(), - Optional("revision"): Str(), - Optional("url"): Str(), - Optional("repo-path"): Str(), - Optional("remote"): Str(), - Optional("patch"): Str(), + "name": SAFE_STR, + Optional("dst"): SAFE_STR, + Optional("branch"): SAFE_STR, + Optional("tag"): SAFE_STR, + Optional("revision"): SAFE_STR, + Optional("url"): SAFE_STR, + Optional("repo-path"): SAFE_STR, + Optional("remote"): SAFE_STR, + Optional("patch"): SAFE_STR, Optional("vcs"): Enum(["git", "svn"]), - Optional("src"): Str(), - Optional("ignore"): Seq(Str()), + Optional("src"): SAFE_STR, + Optional("ignore"): Seq(SAFE_STR), } ) diff --git a/doc/manifest.rst b/doc/manifest.rst index cde2426e..66e7669a 100644 --- a/doc/manifest.rst +++ b/doc/manifest.rst @@ -17,6 +17,10 @@ Schema Below an overview of all possible fields on the manifest. The bold items are mandatory. +.. note:: + + A ``string`` should be a regular string without NULL or control characters. + .. jsonschema:: :auto_reference: From 49cda94cf15615de5c62eb36fa88f213dcc2bd9a Mon Sep 17 00:00:00 2001 From: Ben Spoor <37540691+ben-edna@users.noreply.github.com> Date: Wed, 7 Jan 2026 17:40:25 +0000 Subject: [PATCH 3/6] Use hardcoded schema --- tests/test_fuzzing.py | 339 +++++++++++------------------------------- 1 file changed, 90 insertions(+), 249 deletions(-) diff --git a/tests/test_fuzzing.py b/tests/test_fuzzing.py index a6a8bcf9..ac900dae 100644 --- a/tests/test_fuzzing.py +++ b/tests/test_fuzzing.py @@ -3,33 +3,14 @@ from __future__ import annotations import os -import re import tempfile from contextlib import suppress -from typing import Any, Dict, Mapping, Tuple +from typing import Any import yaml from hypothesis import given, settings from hypothesis import strategies as st - -# StrictYAML is a runtime dependency: -# pip install strictyaml hypothesis -from strictyaml import Any as SyAny -from strictyaml import ( - Bool, - EmptyList, - EmptyNone, - Enum, - Float, - Int, - Map, - MapPattern, - Regex, - Seq, - Str, - as_document, - load, -) +from strictyaml import as_document, load from dfetch.__main__ import DfetchFatalException, run from dfetch.manifest.manifest import Manifest @@ -57,231 +38,94 @@ else: settings.load_profile("dev") +# Avoid control chars and NUL to prevent OS/path/subprocess issues in tests +SAFE_TEXT = st.text( + alphabet=st.characters( + min_codepoint=32, blacklist_categories=("Cs",) + ), # no controls/surrogates + min_size=0, + max_size=64, +) -def _classname(obj: Any) -> str: - return obj.__class__.__name__ +# NUMBER = Int() | Float() with finite floats +SAFE_NUMBER = st.one_of( + st.integers(), + st.floats(allow_nan=False, allow_infinity=False), +) -def _get_map_items(m: Map) -> Mapping[Any, Any]: - """ - StrictYAML's Map stores the key->validator mapping internally. - It has varied attribute names across versions; try common ones. - """ - for attr in ("_validator", "_map", "map", "mapping"): - val = getattr(m, attr, None) - if isinstance(val, Mapping): - return val - raise TypeError("Unsupported StrictYAML Map internals; cannot find mapping dict.") +def opt_str(): + """Small helper for optional text fields.""" + return st.none() | SAFE_TEXT + + +remote_entry = st.builds( + lambda name, url_base, default: { + k: v + for k, v in { + "name": name, + "url-base": url_base, + "default": default, + }.items() + if v is not None + }, + name=SAFE_TEXT.filter(lambda s: len(s) > 0), + url_base=SAFE_TEXT.filter(lambda s: len(s) > 0), + default=st.none() | st.booleans(), +) +vcs_enum = st.sampled_from(["git", "svn"]) + +ignore_list = st.lists(SAFE_TEXT, min_size=1, max_size=5) + +project_entry = st.builds( + lambda name, dst, branch, tag, revision, url, repo_path, remote, patch, vcs, src, ignore: { + k: v + for k, v in { + "name": name, + "dst": dst, + "branch": branch, + "tag": tag, + "revision": revision, + "url": url, + "repo-path": repo_path, + "remote": remote, + "patch": patch, + "vcs": vcs, + "src": src, + "ignore": ignore, + }.items() + if v is not None + }, + name=SAFE_TEXT.filter(lambda s: len(s) > 0), + dst=opt_str(), + branch=opt_str(), + tag=opt_str(), + revision=opt_str(), + url=opt_str(), + repo_path=opt_str(), + remote=opt_str(), + patch=opt_str(), + vcs=st.none() | vcs_enum, + src=opt_str(), + ignore=st.one_of(ignore_list, st.just([])), +) -def _unwrap_optional_key(k: Any) -> Tuple[str, bool]: - """ - Returns (key_name, is_optional). - Optional('b', default=...) is used *as a key* inside Map({...}). - """ - if _classname(k) == "Optional": - for attr in ("_key", "key"): - name = getattr(k, attr, None) - if isinstance(name, str): - return name, True - return str(k), True - if isinstance(k, str): - return k, False - return str(k), False - - -def _enum_values(e: Enum) -> Any: - vals = getattr(e, "_restricted_to", None) - if vals: - return list(vals) - raise TypeError("Unsupported StrictYAML Enum internals; cannot read choices.") - - -def _regex_pattern(r: Regex) -> re.Pattern: - for attr in ("_regex", "regex", "pattern"): - pat = getattr(r, attr, None) - if isinstance(pat, (str, re.Pattern)): - return re.compile(pat) if isinstance(pat, str) else pat - raise TypeError("Unsupported StrictYAML Regex internals; cannot read pattern.") - - -def _mappattern_parts(mp: MapPattern) -> Tuple[Any, Any, int | None, int | None]: - key_v = None - val_v = None - min_k = getattr(mp, "minimum_keys", None) - max_k = getattr(mp, "maximum_keys", None) - for attr in ("_key_validator", "key_validator"): - key_v = getattr(mp, attr, None) or key_v - for attr in ("_value_validator", "value_validator"): - val_v = getattr(mp, attr, None) or val_v - if key_v is None or val_v is None: - raise TypeError("Unsupported StrictYAML MapPattern internals.") - return key_v, val_v, min_k, max_k - - -def strictyaml_to_strategy( - validator: Any, *, default_text_alphabet=st.characters(), default_max_list=5 -): - """ - Convert a StrictYAML validator into a Hypothesis strategy that yields - *Python data structures* which conform to the schema. - """ - name = _classname(validator) - - if isinstance(validator, Str): - return st.text(alphabet=default_text_alphabet) - - if isinstance(validator, Int): - return st.integers() - - if isinstance(validator, Float): - return st.floats(allow_nan=False, allow_infinity=False) - - if isinstance(validator, Bool): - return st.booleans() - - if isinstance(validator, Enum): - values = _enum_values(validator) - return st.sampled_from(values) - - if isinstance(validator, Regex): - pattern = _regex_pattern(validator) - return st.from_regex(pattern, fullmatch=True) - - if isinstance(validator, Seq): - item_v = None - for attr in ("_validator", "validator", "_item_validator", "item_validator"): - item_v = getattr(validator, attr, None) or item_v - if item_v is None: - raise TypeError( - "Unsupported StrictYAML Seq internals; cannot find item validator." - ) - return st.lists( - strictyaml_to_strategy( - item_v, - default_text_alphabet=default_text_alphabet, - default_max_list=default_max_list, - ), - min_size=1, - max_size=default_max_list, - ) - - if isinstance(validator, EmptyList): - return st.just([]) - - if isinstance(validator, Map): - items = _get_map_items(validator) - required: Dict[str, Any] = {} - optional: Dict[str, Any] = {} - - for raw_key, val_validator in items.items(): - key_name, is_opt = _unwrap_optional_key(raw_key) - if is_opt: - optional[key_name] = strictyaml_to_strategy( - val_validator, - default_text_alphabet=default_text_alphabet, - default_max_list=default_max_list, - ) - else: - required[key_name] = strictyaml_to_strategy( - val_validator, - default_text_alphabet=default_text_alphabet, - default_max_list=default_max_list, - ) - - base = st.fixed_dictionaries(required) - - def with_optional(base_dict: Dict[str, Any]): - if not optional: - return st.just(base_dict) - opt_kv_strats = [st.tuples(st.just(k), s) for k, s in optional.items()] - - chosen = st.lists(st.one_of(*opt_kv_strats), unique_by=lambda kv: kv[0]) - return chosen.map(lambda kvs: {**base_dict, **dict(kvs)}) - - return base.flatmap(with_optional) - - if isinstance(validator, MapPattern): - key_v, val_v, min_k, max_k = _mappattern_parts(validator) - key_strat = strictyaml_to_strategy( - key_v, - default_text_alphabet=default_text_alphabet, - default_max_list=default_max_list, - ) - val_strat = strictyaml_to_strategy( - val_v, - default_text_alphabet=default_text_alphabet, - default_max_list=default_max_list, - ) - - return st.dictionaries( - keys=key_strat, - values=val_strat, - min_size=min_k or 0, - max_size=max_k or default_max_list, - ) - - if _classname(validator) in ("OrValidator", "Or"): - children = None - - for attr in ("validators", "_validators", "choices", "_choices"): - vs = getattr(validator, attr, None) - if isinstance(vs, (list, tuple)) and len(vs) > 0: - children = list(vs) - break - - if children is None: - left = None - right = None - for la in ("_a", "a", "_left", "left", "_lhs", "lhs", "_validator_a"): - if getattr(validator, la, None) is not None: - left = getattr(validator, la) - break - for ra in ("_b", "b", "_right", "right", "_rhs", "rhs", "_validator_b"): - if getattr(validator, ra, None) is not None: - right = getattr(validator, ra) - break - if left is not None and right is not None: - children = [left, right] - - if not children: - raise TypeError( - "Unsupported StrictYAML OrValidator internals; no children found." - ) - - branch_strats = [ - strictyaml_to_strategy( - c, - default_text_alphabet=default_text_alphabet, - default_max_list=default_max_list, - ) - for c in children - ] - return st.one_of(branch_strats) - - if isinstance(validator, SyAny): - leaf = st.one_of( - st.booleans(), - st.integers(), - st.floats(allow_nan=False, allow_infinity=False), - st.text(), - ) - return st.recursive( - leaf, - lambda inner: st.one_of( - st.lists(inner, max_size=3), - st.dictionaries(st.text(), inner, max_size=3), - ), - max_leaves=10, - ) - - if isinstance(validator, EmptyNone): - return st.none() - - # If we reach here, add more mappings (e.g., Decimal, Datetime, Email, etc.) as needed. - raise NotImplementedError( - f"No strategy mapping implemented for StrictYAML validator: {name}" - ) +remotes_seq = st.none() | st.lists(remote_entry, min_size=1, max_size=4) +projects_seq = st.lists(project_entry, min_size=1, max_size=6) + +manifest_strategy = st.builds( + lambda version, remotes, projects: { + "manifest": { + "version": version, + **({"remotes": remotes} if remotes is not None else {}), + "projects": projects, + } + }, + version=SAFE_NUMBER, + remotes=remotes_seq, + projects=projects_seq, +) def validate_with_strictyaml(data: Any, yaml_schema: Any) -> None: @@ -292,17 +136,14 @@ def validate_with_strictyaml(data: Any, yaml_schema: Any) -> None: as_document(data, yaml_schema) # will raise YAMLSerializationError on mismatch -data_strategy = strictyaml_to_strategy(schema) - - -@given(data_strategy) +@given(manifest_strategy) def test_data_conforms_to_schema(data): """Validate by attempting to serialize via StrictYAML.""" # If data violates the schema, this raises and Hypothesis will shrink to a minimal counterexample. validate_with_strictyaml(data, schema) -@given(data_strategy) +@given(manifest_strategy) def test_manifest_can_be_created(data): """Validate by attempting to construct a Manifest.""" try: @@ -311,7 +152,7 @@ def test_manifest_can_be_created(data): pass -@given(data_strategy) +@given(manifest_strategy) def test_check(data): """Validate check comand.""" with suppress(DfetchFatalException): @@ -322,7 +163,7 @@ def test_check(data): run(["check"]) -@given(data_strategy) +@given(manifest_strategy) def test_update(data): """Validate update comand.""" with suppress(DfetchFatalException): @@ -337,7 +178,7 @@ def test_update(data): settings.load_profile("manual") - example = data_strategy.example() + example = manifest_strategy.example() print("One generated example:\n", example) # Show the YAML StrictYAML would emit for the example: From c128ed52460c77726119621ef2ff25a6022921ba Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 7 Jan 2026 18:47:28 +0100 Subject: [PATCH 4/6] Update test_fuzzing.py fix typo --- tests/test_fuzzing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_fuzzing.py b/tests/test_fuzzing.py index ac900dae..7b136d00 100644 --- a/tests/test_fuzzing.py +++ b/tests/test_fuzzing.py @@ -154,7 +154,7 @@ def test_manifest_can_be_created(data): @given(manifest_strategy) def test_check(data): - """Validate check comand.""" + """Validate check command.""" with suppress(DfetchFatalException): with tempfile.TemporaryDirectory() as tmpdir: with in_directory(tmpdir): @@ -165,7 +165,7 @@ def test_check(data): @given(manifest_strategy) def test_update(data): - """Validate update comand.""" + """Validate update command.""" with suppress(DfetchFatalException): with tempfile.TemporaryDirectory() as tmpdir: with in_directory(tmpdir): From 3915afe6056a9edbe2931066766e3fd6bc3733c1 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 7 Jan 2026 19:22:33 +0100 Subject: [PATCH 5/6] Update test_fuzzing.py ignore list should never be empty --- tests/test_fuzzing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_fuzzing.py b/tests/test_fuzzing.py index 7b136d00..7a495c05 100644 --- a/tests/test_fuzzing.py +++ b/tests/test_fuzzing.py @@ -108,7 +108,7 @@ def opt_str(): patch=opt_str(), vcs=st.none() | vcs_enum, src=opt_str(), - ignore=st.one_of(ignore_list, st.just([])), + ignore=ignore_list, ) remotes_seq = st.none() | st.lists(remote_entry, min_size=1, max_size=4) From 6ccc317f053cd6e36fe946073be561eee1d4479b Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 7 Jan 2026 19:16:16 +0000 Subject: [PATCH 6/6] Review comments --- dfetch/manifest/schema.py | 2 +- tests/test_fuzzing.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dfetch/manifest/schema.py b/dfetch/manifest/schema.py index a010e81e..cb1040d7 100644 --- a/dfetch/manifest/schema.py +++ b/dfetch/manifest/schema.py @@ -5,7 +5,7 @@ NUMBER = Int() | Float() # A safe string: no NUL, no control chars -SAFE_STR = Regex(r"^[^\x00]*$") +SAFE_STR = Regex(r"^[^\x00-\x1F\x7F-\x9F]*$") REMOTE_SCHEMA = Map( { diff --git a/tests/test_fuzzing.py b/tests/test_fuzzing.py index 7a495c05..1e34ad19 100644 --- a/tests/test_fuzzing.py +++ b/tests/test_fuzzing.py @@ -41,8 +41,10 @@ # Avoid control chars and NUL to prevent OS/path/subprocess issues in tests SAFE_TEXT = st.text( alphabet=st.characters( - min_codepoint=32, blacklist_categories=("Cs",) - ), # no controls/surrogates + min_codepoint=0x20, + blacklist_characters=[chr(c) for c in range(0x7F, 0xA0)], + blacklist_categories=("Cs",), + ), min_size=0, max_size=64, ) @@ -76,7 +78,7 @@ def opt_str(): vcs_enum = st.sampled_from(["git", "svn"]) -ignore_list = st.lists(SAFE_TEXT, min_size=1, max_size=5) +ignore_list = st.none() | st.lists(SAFE_TEXT, min_size=1, max_size=5) project_entry = st.builds( lambda name, dst, branch, tag, revision, url, repo_path, remote, patch, vcs, src, ignore: {