From 41df1a1df911792bf36c76127e492522ce3be3ed Mon Sep 17 00:00:00 2001 From: Pierre Tondereau Date: Wed, 22 Apr 2026 19:19:08 +0200 Subject: [PATCH] fix: heap corruption when returning Binary of 0/1 packed bytes Returning a Binary from a #[php_function] crashed PHP with "zend_mm_heap corrupted" when the packed byte length was 0 or 1. This was a regression introduced in 0.15.8 by PR #701, which short-circuits ext_php_rs_zend_string_init to return PHP's interned permanent statics (zend_empty_string, zend_one_char_string[c]) for length 0 and 1. Zval::set_zend_string was updated in that PR to detect these statics via GC_IMMUTABLE and flag the zval as InternedStringEx, but Zval::set_binary was missed and always flagged the zval as refcounted. On drop PHP tried to free a process-global static, leading to heap corruption. Zval::set_binary now wraps the raw pointer from Pack::pack_into in ZBox and delegates to set_zend_string, so every path that assigns a zend_string to a zval shares one flag-selection site, matching PHP's own ZVAL_STR macro in Zend/zend_types.h. Also tightened the NULL guard in wrapper.c so the fast path is safe when only one of the two weak symbols resolves at link time, and fixed an adjacent correctness issue where Zval::binary::() and Zval::binary_slice::() silently truncated trailing bytes when the byte length was not a multiple of size_of::(). Both functions now return None in that case rather than dropping data. Fixes #729 --- src/types/zval.rs | 133 +++++++++++++++++++++++- src/wrapper.c | 1 + tests/src/integration/binary/binary.php | 15 +++ tests/src/integration/binary/mod.rs | 28 ++++- 4 files changed, 172 insertions(+), 5 deletions(-) diff --git a/src/types/zval.rs b/src/types/zval.rs index 47a2eed466..186925adae 100644 --- a/src/types/zval.rs +++ b/src/types/zval.rs @@ -197,6 +197,11 @@ impl Zval { /// a vector of a given type. Similar to the [`unpack`] function in PHP, /// except you can only unpack one type. /// + /// Returns `None` if the zval is not a string, or if the byte length of + /// the string is not a whole multiple of `size_of::()`. The previous + /// behavior (silent truncation of trailing bytes) hid off-by-one input + /// errors and was a correctness hazard. + /// /// # Safety /// /// There is no way to tell if the data stored in the string is actually of @@ -207,8 +212,13 @@ impl Zval { /// /// [`pack`]: https://www.php.net/manual/en/function.pack.php /// [`unpack`]: https://www.php.net/manual/en/function.unpack.php + #[must_use] pub fn binary(&self) -> Option> { - self.zend_str().map(T::unpack_into) + let s = self.zend_str()?; + if !s.len.is_multiple_of(std::mem::size_of::()) { + return None; + } + Some(T::unpack_into(s)) } /// Returns the value of the zval if it is a string and can be unpacked into @@ -219,6 +229,9 @@ impl Zval { /// returned instead of a vector, meaning the contents of the string is /// not copied. /// + /// Returns `None` if the zval is not a string, or if the byte length of + /// the string is not a whole multiple of `size_of::()`. + /// /// # Safety /// /// There is no way to tell if the data stored in the string is actually of @@ -229,8 +242,13 @@ impl Zval { /// /// [`pack`]: https://www.php.net/manual/en/function.pack.php /// [`unpack`]: https://www.php.net/manual/en/function.unpack.php + #[must_use] pub fn binary_slice(&self) -> Option<&[T]> { - self.zend_str().map(T::unpack_into) + let s = self.zend_str()?; + if !s.len.is_multiple_of(std::mem::size_of::()) { + return None; + } + Some(T::unpack_into(s)) } /// Returns the value of the zval if it is a resource. @@ -869,6 +887,15 @@ impl Zval { /// The Zval takes ownership of the string. When the Zval is dropped, /// the string will be released. /// + /// This is the canonical path for assigning any raw `zend_string` to a + /// zval from Rust: it reads the string's `GC_IMMUTABLE` flag and picks + /// [`ZvalTypeFlags::InternedStringEx`] (non-refcounted) for interned + /// permanent statics like `zend_empty_string` / `zend_one_char_string[c]`, + /// otherwise [`ZvalTypeFlags::StringEx`] (refcounted). This mirrors + /// PHP's own `ZVAL_STR` macro in `Zend/zend_types.h`. Other `set_*` + /// helpers that produce strings (including [`Zval::set_binary`]) route + /// through this function to stay in lockstep with upstream flag logic. + /// /// # Parameters /// /// * `val` - String content. @@ -890,9 +917,15 @@ impl Zval { /// /// * `val` - The value to set the zval as. pub fn set_binary(&mut self, val: Vec) { - self.change_type(ZvalTypeFlags::StringEx); let ptr = T::pack_into(val); - self.value.str_ = ptr; + // SAFETY: Pack::pack_into returns a non-null zend_string from + // ext_php_rs_zend_string_init: either a freshly allocated refcounted + // string or an interned permanent static (zend_empty_string or + // zend_one_char_string[c]) for packed length 0 or 1. Both are valid + // inputs for ZBox::from_raw; zend_string_release short-circuits on + // interned strings, matching PHP's own zend_string_release. + let zs = unsafe { ZBox::from_raw(ptr) }; + self.set_zend_string(zs); } /// Sets the value of the zval as an interned string. Returns nothing in a @@ -1945,4 +1978,96 @@ mod tests { assert_eq!(php_float_to_string(0.001), "0.001"); assert_eq!(php_float_to_string(0.0001), "0.0001"); // 1e-4 is decimal } + + #[test] + fn test_set_binary_interned_flag() { + Embed::run(|| { + // Regression for #729: packed length 0 or 1 returns an interned + // permanent static; flagging the zval as refcounted corrupts the + // heap on drop. Mirrors Zval::set_zend_string's GC_IMMUTABLE check. + + let assert_interned = |zv: &Zval, label: &str| { + let flags = ZvalTypeFlags::from_bits_truncate(unsafe { zv.u1.type_info }); + assert!( + flags.contains(ZvalTypeFlags::String), + "{label}: expected String type, got {flags:?}", + ); + assert!( + !flags.contains(ZvalTypeFlags::RefCounted), + "{label}: interned string zval must not carry RefCounted, got {flags:?}", + ); + }; + + let mut zv = Zval::new(); + zv.set_binary::(vec![]); + assert_eq!(zv.get_type(), DataType::String); + assert_interned(&zv, "empty u8"); + + let mut zv = Zval::new(); + zv.set_binary::(vec![42u8]); + assert_eq!(zv.get_type(), DataType::String); + assert_interned(&zv, "single u8"); + assert_eq!(zv.binary::(), Some(vec![42u8])); + + let mut zv = Zval::new(); + zv.set_binary::(Vec::::new()); + assert_eq!(zv.get_type(), DataType::String); + assert_interned(&zv, "empty u32"); + + // A packed payload of 2+ bytes must still take the refcounted path. + let mut zv = Zval::new(); + zv.set_binary::(vec![42u32]); + let flags = ZvalTypeFlags::from_bits_truncate(unsafe { zv.u1.type_info }); + assert!( + flags.contains(ZvalTypeFlags::RefCounted), + "multi-byte packed string must be refcounted, got {flags:?}", + ); + }); + } + + #[test] + fn test_binary_roundtrip_and_size_mismatch() { + Embed::run(|| { + // Roundtrip at the native widths used by Pack. + let u32s = vec![1u32, 0xdead_beef, 42u32, 0u32]; + let mut zv = Zval::new(); + zv.set_binary::(u32s.clone()); + assert_eq!(zv.binary::(), Some(u32s)); + + let u16s = vec![0x1234u16, 0xabcdu16, 0xffffu16]; + let mut zv = Zval::new(); + zv.set_binary::(u16s.clone()); + assert_eq!(zv.binary::(), Some(u16s)); + + // Empty string roundtrips cleanly at every width. + for width in [1usize, 2, 4, 8] { + let mut zv = Zval::new(); + match width { + 1 => zv.set_binary::(vec![]), + 2 => zv.set_binary::(vec![]), + 4 => zv.set_binary::(vec![]), + 8 => zv.set_binary::(vec![]), + _ => unreachable!(), + } + assert_eq!(zv.binary::(), Some(vec![])); + } + + // A byte count that isn't a multiple of size_of::() must return + // None rather than silently dropping trailing bytes. 5 bytes of u8 + // should unpack as u8 (len=5) but refuse to unpack as u32 (5 % 4 != 0). + let mut zv = Zval::new(); + zv.set_binary::(vec![1u8, 2, 3, 4, 5]); + assert_eq!(zv.binary::(), Some(vec![1u8, 2, 3, 4, 5])); + assert!(zv.binary::().is_none()); + assert!(zv.binary::().is_none()); + assert!(zv.binary::().is_none()); + + // Single-byte payload must fail cleanly for any width > 1. + let mut zv = Zval::new(); + zv.set_binary::(vec![42u8]); + assert_eq!(zv.binary::(), Some(vec![42u8])); + assert!(zv.binary::().is_none()); + assert!(zv.binary::().is_none()); + }); + } } diff --git a/src/wrapper.c b/src/wrapper.c index 0135d9c850..5f295ff9a2 100644 --- a/src/wrapper.c +++ b/src/wrapper.c @@ -5,6 +5,7 @@ zend_string *ext_php_rs_zend_string_init(const char *str, size_t len, bool persistent) { if (!persistent && len <= 1 + && zend_empty_string != NULL && zend_one_char_string != NULL && zend_one_char_string[0] != NULL) { return len == 0 ? zend_empty_string : ZSTR_CHAR((zend_uchar) *str); } diff --git a/tests/src/integration/binary/binary.php b/tests/src/integration/binary/binary.php index a8ae8717fd..98254b7a60 100644 --- a/tests/src/integration/binary/binary.php +++ b/tests/src/integration/binary/binary.php @@ -9,3 +9,18 @@ assert(in_array(3, $result)); assert(in_array(4, $result)); assert(in_array(5, $result)); + +// Regression for #729: returning a Binary whose packed length is 0 or 1 +// must not corrupt the Zend heap on zval destruction. +$empty_u8 = test_binary_empty_u8(); +assert(is_string($empty_u8)); +assert(strlen($empty_u8) === 0); + +$single_u8 = test_binary_single_u8(); +assert(is_string($single_u8)); +assert(strlen($single_u8) === 1); +assert(ord($single_u8) === 42); + +$empty_u32 = test_binary_empty_u32(); +assert(is_string($empty_u32)); +assert(strlen($empty_u32) === 0); diff --git a/tests/src/integration/binary/mod.rs b/tests/src/integration/binary/mod.rs index d441af58f3..412fe470d5 100644 --- a/tests/src/integration/binary/mod.rs +++ b/tests/src/integration/binary/mod.rs @@ -5,8 +5,34 @@ pub fn test_binary(a: Binary) -> Binary { a } +// Regression coverage for #729: packed length 0 or 1 returns an interned +// permanent static from ext_php_rs_zend_string_init; prior to the fix, +// Zval::set_binary flagged these as refcounted and caused heap corruption +// on drop. +#[php_function] +#[php(name = "test_binary_empty_u8")] +pub fn test_binary_empty_u8() -> Binary { + Binary::from(Vec::::new()) +} + +#[php_function] +#[php(name = "test_binary_single_u8")] +pub fn test_binary_single_u8() -> Binary { + Binary::from(vec![42u8]) +} + +#[php_function] +#[php(name = "test_binary_empty_u32")] +pub fn test_binary_empty_u32() -> Binary { + Binary::from(Vec::::new()) +} + pub fn build_module(builder: ModuleBuilder) -> ModuleBuilder { - builder.function(wrap_function!(test_binary)) + builder + .function(wrap_function!(test_binary)) + .function(wrap_function!(test_binary_empty_u8)) + .function(wrap_function!(test_binary_single_u8)) + .function(wrap_function!(test_binary_empty_u32)) } #[cfg(test)]