From bc12fb850be21928a2662946d5995d048d09b7b5 Mon Sep 17 00:00:00 2001 From: David Bushong Date: Fri, 12 Sep 2025 13:35:10 -0700 Subject: [PATCH 1/2] fix: allow UTF-16 surrogates to be passed through --- src/dnfile/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dnfile/stream.py b/src/dnfile/stream.py index 30f0a9a..19ad7af 100644 --- a/src/dnfile/stream.py +++ b/src/dnfile/stream.py @@ -234,7 +234,7 @@ def __init__(self, data: Union[bytes, HeapItemBinary], rva: Optional[int] = None str_buf = buf try: - self.value = str_buf.decode(encoding) + self.value = str_buf.decode(encoding, errors="surrogatepass") except UnicodeDecodeError as e: logger.warning(f"UserString decode error (rva:0x{self.rva:08x}): {e}") self.value = None From 1d99746e6a9707d2503a84fef52fab318dff8ae7 Mon Sep 17 00:00:00 2001 From: David Bushong Date: Mon, 15 Sep 2025 10:08:58 -0700 Subject: [PATCH 2/2] fix: make error_handler configurable --- src/dnfile/stream.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/dnfile/stream.py b/src/dnfile/stream.py index 19ad7af..462b90f 100644 --- a/src/dnfile/stream.py +++ b/src/dnfile/stream.py @@ -192,7 +192,13 @@ class UserString(HeapItemBinary, HeapItemString): flag: Optional[int] = None - def __init__(self, data: Union[bytes, HeapItemBinary], rva: Optional[int] = None, encoding="utf-16"): + def __init__( + self, + data: Union[bytes, HeapItemBinary], + rva: Optional[int] = None, + encoding: str = "utf-16", + error_handler: str = "strict", + ): self.encoding = encoding if isinstance(data, bytes): HeapItemBinary.__init__(self, data, rva=rva) @@ -234,7 +240,7 @@ def __init__(self, data: Union[bytes, HeapItemBinary], rva: Optional[int] = None str_buf = buf try: - self.value = str_buf.decode(encoding, errors="surrogatepass") + self.value = str_buf.decode(encoding, errors=error_handler) except UnicodeDecodeError as e: logger.warning(f"UserString decode error (rva:0x{self.rva:08x}): {e}") self.value = None @@ -257,12 +263,12 @@ def get_bytes(self, index) -> Optional[bytes]: return item.value_bytes() - def get(self, index, encoding="utf-16") -> Optional[UserString]: + def get(self, index: int, encoding: str = "utf-16", error_handler: str = "strict") -> Optional[UserString]: bin_item = super().get(index) if bin_item is None: return None - us_item = UserString(bin_item, encoding=encoding) + us_item = UserString(bin_item, encoding=encoding, error_handler=error_handler) return us_item