diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index adc32581..19ebffd8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,11 +34,19 @@ jobs: - name: Enable type layout randomization run: echo RUSTFLAGS=${RUSTFLAGS}\ -Zrandomize-layout >> $GITHUB_ENV if: matrix.rust == 'nightly' - - run: cargo test + - run: cargo test --features borsh if: matrix.rust != '1.68.0' - run: cargo check --no-default-features - run: cargo check --features serde + - run: cargo check --features borsh + if: matrix.rust != '1.68.0' # Some borsh dependency do not compile in this rustc version + - run: cargo check --features serde,borsh + if: matrix.rust != '1.68.0' # Some borsh dependency do not compile in this rustc version - run: cargo check --no-default-features --features serde + - run: cargo check --no-default-features --features borsh + if: matrix.rust != '1.68.0' # Some borsh dependency do not compile in this rustc version + - run: cargo check --no-default-features --features serde,borsh + if: matrix.rust != '1.68.0' # Some borsh dependency do not compile in this rustc version - uses: actions/upload-artifact@v6 if: matrix.rust == 'nightly' && always() with: @@ -56,7 +64,7 @@ jobs: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable - run: npm install semver - - run: cargo test + - run: cargo test --features serde,borsh env: RUSTFLAGS: --cfg test_node_semver ${{env.RUSTFLAGS}} diff --git a/Cargo.toml b/Cargo.toml index 43cf2eb2..56261e01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,10 +15,12 @@ rust-version = "1.68" [features] default = ["std"] serde = ["dep:serde"] +borsh = ["dep:borsh"] std = [] [dependencies] serde = { package = "serde_core", version = "1.0.220", optional = true, default-features = false } +borsh = { version = "^1", optional = true, default-features = false, features = ["derive"]} [target.'cfg(any())'.dependencies] serde = { version = "1.0.220", optional = true, default-features = false } @@ -41,4 +43,4 @@ rustdoc-args = [ ] [package.metadata.playground] -features = ["serde"] +features = ["serde", "borsh"] diff --git a/src/identifier.rs b/src/identifier.rs index 18612c27..168b20a9 100644 --- a/src/identifier.rs +++ b/src/identifier.rs @@ -271,6 +271,118 @@ impl PartialEq for Identifier { } } +#[cfg(feature = "borsh")] +impl borsh::ser::BorshSerialize for Identifier { + fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { + let internals = self.as_str().as_bytes(); + assert!(internals.len() <= u32::MAX as usize); + + // Write the size of the identifier + let size = (internals.len() as u32).to_le_bytes(); + writer.write_all(&size)?; + + // Write the content of the identifier if non-empty + if !internals.is_empty() { + writer.write_all(self.as_str().as_bytes())?; + } + Ok(()) + } +} + +#[cfg(feature = "borsh")] +impl borsh::de::BorshDeserialize for Identifier { + fn deserialize_reader(reader: &mut R) -> borsh::io::Result { + // First we read the size of the identifier + let mut len = [0u8; 4]; + reader.read_exact(&mut len)?; + let len = u32::from_le_bytes(len) as usize; + + // Allocate an Identifier of the correct size + match len { + 0 => Ok(Self::empty()), + 1..9 => { + // Short string representation no allocation required + let mut buffer = [0u8; 8]; + let buffer_slice = &mut buffer[0..len]; + reader.read_exact(buffer_slice)?; + + // Ensure no '\0' is present + for char in buffer_slice.iter() { + if !char.is_ascii() || *char == 0u8 { + unreachable!( + "Corrupted data: should only contain ASCII character and no NIL bytes" + ); + } + } + + // SAFETY: Bytes are valid UTF-8 as they are non-null ASCII characters + let buffer_slice = unsafe { str::from_utf8_unchecked(buffer_slice) }; + Ok(unsafe { Self::new_unchecked(buffer_slice) }) + } + 9..=0xff_ffff_ffff_ffff => { + // SAFETY: len is in a range that does not contain 0. + let size = bytes_for_varint(unsafe { NonZeroUsize::new_unchecked(len) }) + len; + let align = 2; + + // On 32-bit and 16-bit architecture, check for size overflowing + // isize::MAX. Making an allocation request bigger than this to + // the allocator is considered UB. All allocations (including + // static ones) are limited to isize::MAX so we're guaranteed + // len <= isize::MAX, and we know bytes_for_varint(len) <= 5 + // because 128**5 > isize::MAX, which means the only problem + // that can arise is when isize::MAX - 5 <= len <= isize::MAX. + // This is pretty much guaranteed to be malicious input so we + // don't need to care about returning a good error message. + if mem::size_of::() < 8 { + let max_alloc = usize::MAX / 2 - align; + assert!(size <= max_alloc); + } + + // SAFETY: align is not zero, align is a power of two, and + // rounding size up to align does not overflow isize::MAX. + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + // SAFETY: layout's size is nonzero. + let ptr = unsafe { alloc(layout) }; + if ptr.is_null() { + handle_alloc_error(layout); + } + let mut write = ptr; + let mut varint_remaining = len; + while varint_remaining > 0 { + // SAFETY: size is bytes_for_varint(len) bytes + len bytes. + // This is writing the first bytes_for_varint(len) bytes. + unsafe { ptr::write(write, varint_remaining as u8 | 0x80) }; + varint_remaining >>= 7; + // SAFETY: still in bounds of the same allocation. + write = unsafe { write.add(1) }; + } + + // SAFETY: data is non-null and size is bytes_for_varint(len) bytes + len bytes. + // This is writing to the last len bytes + let buffer = unsafe { core::slice::from_raw_parts_mut(write, len) }; + reader.read_exact(buffer)?; + + // Check that all bytes are either + for char in buffer { + if !char.is_ascii() || *char == 0u8 { + unreachable!( + "Corrupted data: should only contain ASCII character and no NIL bytes" + ); + } + } + + Ok(Identifier { + head: ptr_to_repr(ptr), + tail: [0; TAIL_BYTES], + }) + } + _ => { + unreachable!("you should really refrain from storing >64 petabytes of text in semver version"); + } + } + } +} + unsafe impl Send for Identifier {} unsafe impl Sync for Identifier {} diff --git a/src/lib.rs b/src/lib.rs index 4ef6cde0..2f852683 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,6 +100,8 @@ mod serde; use crate::identifier::Identifier; use alloc::vec::Vec; +#[cfg(feature = "borsh")] +use borsh::{BorshDeserialize, BorshSerialize}; use core::cmp::Ordering; use core::str::FromStr; @@ -155,6 +157,7 @@ pub use crate::parse::Error; /// /// Example: `1.0.0-alpha` < `1.0.0-alpha.1` < `1.0.0-alpha.beta` < `1.0.0-beta` < `1.0.0-beta.2` < `1.0.0-beta.11` < `1.0.0-rc.1` < `1.0.0` #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr(feature = "borsh", derive(BorshSerialize, BorshDeserialize))] pub struct Version { pub major: u64, pub minor: u64, @@ -181,6 +184,7 @@ pub struct Version { /// not permitted within a partial version, i.e. anywhere between the major /// version number and its minor, patch, pre-release, or build metadata. #[derive(Clone, Eq, PartialEq, Hash, Debug)] +#[cfg_attr(feature = "borsh", derive(BorshSerialize, BorshDeserialize))] pub struct VersionReq { pub comparators: Vec, } @@ -188,6 +192,7 @@ pub struct VersionReq { /// A pair of comparison operator and partial version, such as `>=1.2`. Forms /// one piece of a VersionReq. #[derive(Clone, Eq, PartialEq, Hash, Debug)] +#[cfg_attr(feature = "borsh", derive(BorshSerialize, BorshDeserialize))] pub struct Comparator { pub op: Op, pub major: u64, @@ -244,6 +249,7 @@ pub struct Comparator { /// -  **`I.J.*`** — equivalent to `=I.J` /// -  **`I.*`** or **`I.*.*`** — equivalent to `=I` #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +#[cfg_attr(feature = "borsh", derive(BorshSerialize, BorshDeserialize))] #[non_exhaustive] pub enum Op { Exact, @@ -306,6 +312,7 @@ pub enum Op { /// /// Example: `alpha` < `alpha.85` < `alpha.90` < `alpha.200` < `alpha.0a` < `alpha.1a0` < `alpha.a` < `beta` #[derive(Default, Clone, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "borsh", derive(BorshSerialize, BorshDeserialize))] pub struct Prerelease { identifier: Identifier, } @@ -364,6 +371,7 @@ pub struct Prerelease { /// /// Example: `demo` < `demo.85` < `demo.90` < `demo.090` < `demo.200` < `demo.1a0` < `demo.a` < `memo` #[derive(Default, Clone, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "borsh", derive(BorshSerialize, BorshDeserialize))] pub struct BuildMetadata { identifier: Identifier, } diff --git a/tests/test_borsh_identifier.rs b/tests/test_borsh_identifier.rs new file mode 100644 index 00000000..81315089 --- /dev/null +++ b/tests/test_borsh_identifier.rs @@ -0,0 +1,50 @@ +#![cfg(feature = "borsh")] + +mod util; + +use borsh::{BorshDeserialize, BorshSerialize}; +use semver::{BuildMetadata, Prerelease}; +use util::{build_metadata, prerelease}; + +fn expected_identifier_encoding(text: &str) -> Vec { + let mut buf = Vec::with_capacity(4 + text.len()); + buf.extend_from_slice(&(text.len() as u32).to_le_bytes()); + buf.extend_from_slice(text.as_bytes()); + buf +} + +fn serialize_to_vec(value: &impl BorshSerialize) -> Vec { + let mut out = Vec::new(); + value.serialize(&mut out).expect("borsh serialize"); + out +} + +#[test] +fn prerelease_identifier_serializes_inline_and_heap() { + for text in ["abcd", "abcdefgh", "stage.alpha.segment9"] { + let value = prerelease(text); + let bytes = serialize_to_vec(&value); + assert_eq!(bytes, expected_identifier_encoding(text)); + + let round_trip = Prerelease::try_from_slice(&bytes).expect("deserialize prerelease"); + assert_eq!(round_trip, value); + } +} + +#[test] +fn build_metadata_identifier_serializes_empty_and_long() { + let empty_bytes = serialize_to_vec(&BuildMetadata::EMPTY); + assert_eq!(empty_bytes, expected_identifier_encoding("")); + let decoded_empty = + BuildMetadata::try_from_slice(&empty_bytes).expect("deserialize empty build metadata"); + assert!(decoded_empty.is_empty()); + + let text = "build.20240101.commit.abcdef"; + let metadata = build_metadata(text); + let bytes = serialize_to_vec(&metadata); + assert_eq!(bytes, expected_identifier_encoding(text)); + + let round_trip = + BuildMetadata::try_from_slice(&bytes).expect("deserialize populated build metadata"); + assert_eq!(round_trip, metadata); +}