From eda15ef48380fee6286195a2661fc1a82e2604df Mon Sep 17 00:00:00 2001 From: Fernando Lins Date: Mon, 13 Apr 2026 12:37:19 -0300 Subject: [PATCH 1/3] Ignore non-ASCII chars in ascii_percent_encoding --- percent_encoding/src/ascii_set.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percent_encoding/src/ascii_set.rs b/percent_encoding/src/ascii_set.rs index 26c66dde7..5292b7f05 100644 --- a/percent_encoding/src/ascii_set.rs +++ b/percent_encoding/src/ascii_set.rs @@ -50,7 +50,7 @@ impl AsciiSet { } pub(crate) fn should_percent_encode(&self, byte: u8) -> bool { - !byte.is_ascii() || self.contains(byte) + byte.is_ascii() && self.contains(byte) } pub const fn add(&self, byte: u8) -> Self { From 4a04d9c099eb49ebc2d1846d1b13b792c8c37e7b Mon Sep 17 00:00:00 2001 From: Fernando Lins Date: Wed, 22 Apr 2026 11:06:41 -0300 Subject: [PATCH 2/3] feat(percent-encoding): add optional 'iri' feature for IRI-style encoding --- percent_encoding/Cargo.toml | 3 +++ percent_encoding/src/ascii_set.rs | 34 ++++++++++++++++++++++++++++++- percent_encoding/src/lib.rs | 13 +++++++++++- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/percent_encoding/Cargo.toml b/percent_encoding/Cargo.toml index 562ba5f86..88501a4e0 100644 --- a/percent_encoding/Cargo.toml +++ b/percent_encoding/Cargo.toml @@ -13,6 +13,9 @@ rust-version = "1.51" default = ["std"] std = ["alloc"] alloc = [] +# Encode only ASCII code units in `AsciiSet`; leave UTF-8 non-ASCII bytes literal (IRI-style). +# Used by Anki for Unicode paths in file:/media references; see `AsciiSet::should_percent_encode`. +iri = [] [package.metadata.docs.rs] rustdoc-args = ["--generate-link-to-definition"] diff --git a/percent_encoding/src/ascii_set.rs b/percent_encoding/src/ascii_set.rs index 5292b7f05..ad4ef22ae 100644 --- a/percent_encoding/src/ascii_set.rs +++ b/percent_encoding/src/ascii_set.rs @@ -50,7 +50,14 @@ impl AsciiSet { } pub(crate) fn should_percent_encode(&self, byte: u8) -> bool { - byte.is_ascii() && self.contains(byte) + #[cfg(feature = "iri")] + { + byte.is_ascii() && self.contains(byte) + } + #[cfg(not(feature = "iri"))] + { + !byte.is_ascii() || self.contains(byte) + } } pub const fn add(&self, byte: u8) -> Self { @@ -211,3 +218,28 @@ mod tests { assert!(COMPLEMENT.contains(b'C')); } } + +#[cfg(all(test, feature = "iri"))] +mod iri_tests { + use super::*; + + #[test] + fn should_percent_encode_leaves_non_ascii_utf8_unencoded() { + let set = AsciiSet::EMPTY.add(b'/').add(b'%'); + for &byte in "日本語.mp3".as_bytes() { + assert!( + !set.should_percent_encode(byte), + "byte {:#x} should not be percent-encoded", + byte + ); + } + } + + #[test] + fn should_percent_encode_still_encodes_ascii_in_set() { + let set = AsciiSet::EMPTY.add(b' ').add(b'?'); + assert!(set.should_percent_encode(b' ')); + assert!(set.should_percent_encode(b'?')); + assert!(!set.should_percent_encode(b'a')); + } +} diff --git a/percent_encoding/src/lib.rs b/percent_encoding/src/lib.rs index ee36e0c5a..5750b4636 100644 --- a/percent_encoding/src/lib.rs +++ b/percent_encoding/src/lib.rs @@ -98,7 +98,8 @@ pub fn percent_encode_byte(byte: u8) -> &'static str { /// Percent-encode the given bytes with the given set. /// -/// Non-ASCII bytes and bytes in `ascii_set` are encoded. +/// Bytes in `ascii_set` are encoded. Non-ASCII bytes are also encoded unless the crate +/// feature `iri` is enabled (IRI-style: UTF-8 non-ASCII octets pass through). /// /// The return type: /// @@ -418,6 +419,16 @@ mod tests { ); } + #[cfg(feature = "iri")] + #[test] + fn utf8_percent_encode_unicode_filename_unchanged_for_path_set() { + const PATHISH: &AsciiSet = &CONTROLS.add(b'#').add(b'?').add(b'{').add(b'}'); + assert_eq!( + super::utf8_percent_encode("日本語.mp3", PATHISH).collect::(), + "日本語.mp3" + ); + } + #[test] fn percent_decode() { assert_eq!( From e79448afb151350fdb8b88cddf1bac0c984c841f Mon Sep 17 00:00:00 2001 From: Fernando Lins Date: Wed, 22 Apr 2026 11:09:09 -0300 Subject: [PATCH 3/3] ci: run percent-encoding tests with feature --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7c5e5a1f0..04c41b04c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -67,6 +67,8 @@ jobs: # Run tests - name: Run tests run: cargo test + - name: Run percent-encoding IRI (iri) tests + run: cargo test -p percent-encoding --features iri # Run tests enabling the serde feature - name: Run tests with the serde feature run: cargo test --features "url/serde,url/expose_internals"