ByteVeda · pratyush618 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/crates/paperjam-epub/Cargo.toml b/crates/paperjam-epub/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 description = "EPUB e-book processing for the paperjam ecosystem"
 
 [dependencies]
-paperjam-model = { path = "../paperjam-model" }
+paperjam-model = { path = "../paperjam-model", features = ["zip_safety"] }
 paperjam-html = { path = "../paperjam-html" }
 zip = { version = "2", default-features = false, features = ["deflate"] }
 quick-xml = "0.37"

diff --git a/crates/paperjam-epub/src/error.rs b/crates/paperjam-epub/src/error.rs
@@ -1,25 +1,30 @@
 #[derive(Debug, thiserror::Error)]
 pub enum EpubError {
+    /// The outer archive header is invalid or could not be opened.
     #[error("invalid ZIP archive: {0}")]
     Zip(#[from] zip::result::ZipError),
 
+    /// Non-archive I/O failure (e.g. cloning raw bytes).
     #[error("I/O error: {0}")]
     Io(#[from] std::io::Error),
 
+    /// XML parse failure.
     #[error("XML parse error: {0}")]
     Xml(String),
 
+    /// Chapter content failed to parse as HTML.
     #[error("HTML parse error: {0}")]
     Html(#[from] paperjam_html::HtmlError),
 
-    #[error("missing entry in EPUB: {0}")]
-    MissingEntry(String),
-
+    /// The EPUB layout (container.xml, OPF, spine) was malformed.
     #[error("invalid EPUB structure: {0}")]
     InvalidStructure(String),
 
-    #[error("EPUB entry `{name}` is too large ({size} bytes, limit {limit})")]
-    EntryTooLarge { name: String, size: u64, limit: u64 },
+    /// A bounded archive read hit one of the configured safety limits
+    /// (per-entry size, total-byte budget, entry count, or compression
+    /// ratio), or could not locate a named entry.
+    #[error(transparent)]
+    Archive(#[from] paperjam_model::zip_safety::ZipSafetyError),
 }
 
 impl From<quick_xml::Error> for EpubError {

diff --git a/crates/paperjam-epub/src/lib.rs b/crates/paperjam-epub/src/lib.rs
@@ -13,7 +13,6 @@ mod image;
 mod markdown;
 mod metadata;
 pub mod parser;
-mod safe_read;
 mod structure;
 mod table;
 mod text;

diff --git a/crates/paperjam-epub/src/parser.rs b/crates/paperjam-epub/src/parser.rs
@@ -5,35 +5,36 @@ use quick_xml::Reader;
 
 use crate::document::{ChapterData, EpubDocument, OpfMetadata, TocEntry};
 use crate::error::{EpubError, Result};
-use crate::safe_read::{read_entry_bytes, read_entry_string};
 use crate::toc;
+use paperjam_model::zip_safety::{ArchiveLimits, SafeArchive};
 
 /// Parse an EPUB document from raw bytes.
 pub fn parse_epub(bytes: &[u8]) -> Result<EpubDocument> {
     let cursor = std::io::Cursor::new(bytes);
     let mut archive = zip::ZipArchive::new(cursor)?;
+    let mut safe = SafeArchive::new(&mut archive, ArchiveLimits::DEFAULT);
 
     // 1. Find the OPF path from container.xml.
-    let container_xml = read_entry_string(&mut archive, "META-INF/container.xml")?;
+    let container_xml = safe.read_entry_string("META-INF/container.xml")?;
     let opf_path = parse_container_xml(&container_xml)?;
     let opf_base_dir = opf_path
         .rsplit_once('/')
         .map(|(d, _)| d.to_string())
         .unwrap_or_default();
 
     // 2. Parse OPF: metadata, manifest, spine.
-    let opf_xml = read_entry_string(&mut archive, &opf_path)?;
+    let opf_xml = safe.read_entry_string(&opf_path)?;
     let (opf_metadata, manifest, spine) = parse_opf(&opf_xml)?;
 
     // 3. Parse TOC.
-    let toc_entries = parse_toc_from_manifest(&mut archive, &manifest, &opf_base_dir);
+    let toc_entries = parse_toc_from_manifest(&mut safe, &manifest, &opf_base_dir);
 
     // 4. Read chapters in spine order.
     let mut chapters = Vec::new();
     for (idx, spine_idref) in spine.iter().enumerate() {
         if let Some(href) = manifest.get(spine_idref) {
             let full_path = resolve_path(&opf_base_dir, href);
-            match read_entry_bytes(&mut archive, &full_path) {
+            match safe.read_entry_bytes(&full_path) {
                 Ok(html_bytes) => {
                     let html_doc = paperjam_html::HtmlDocument::from_bytes(&html_bytes)?;
                     let title = find_toc_title(&toc_entries, href);
@@ -52,7 +53,7 @@ pub fn parse_epub(bytes: &[u8]) -> Result<EpubDocument> {
     }
 
     // 5. Collect archive images.
-    let archive_images = collect_images(&mut archive, &manifest, &opf_base_dir);
+    let archive_images = collect_images(&mut safe, &manifest, &opf_base_dir);
 
     Ok(EpubDocument {
         chapters,
@@ -252,15 +253,15 @@ fn find_toc_title(entries: &[TocEntry], href: &str) -> Option<String> {
 
 /// Parse TOC from the manifest, trying NCX first then nav.xhtml.
 fn parse_toc_from_manifest(
-    archive: &mut zip::ZipArchive<std::io::Cursor<&[u8]>>,
+    safe: &mut SafeArchive<'_, std::io::Cursor<&[u8]>>,
     manifest: &HashMap<String, String>,
     opf_base_dir: &str,
 ) -> Vec<TocEntry> {
     // Look for NCX file (usually id="ncx" or ends with .ncx).
     for (id, href) in manifest {
         if id == "ncx" || href.ends_with(".ncx") {
             let full_path = resolve_path(opf_base_dir, href);
-            if let Ok(xml) = read_entry_string(archive, &full_path) {
+            if let Ok(xml) = safe.read_entry_string(&full_path) {
                 let entries = toc::parse_ncx(&xml);
                 if !entries.is_empty() {
                     return entries;
@@ -273,7 +274,7 @@ fn parse_toc_from_manifest(
     for href in manifest.values() {
         if href.contains("nav") && (href.ends_with(".xhtml") || href.ends_with(".html")) {
             let full_path = resolve_path(opf_base_dir, href);
-            if let Ok(html_bytes) = read_entry_bytes(archive, &full_path) {
+            if let Ok(html_bytes) = safe.read_entry_bytes(&full_path) {
                 let entries = toc::parse_nav_xhtml(&html_bytes);
                 if !entries.is_empty() {
                     return entries;
@@ -287,7 +288,7 @@ fn parse_toc_from_manifest(
 
 /// Collect image files from the archive based on manifest media types.
 fn collect_images(
-    archive: &mut zip::ZipArchive<std::io::Cursor<&[u8]>>,
+    safe: &mut SafeArchive<'_, std::io::Cursor<&[u8]>>,
     manifest: &HashMap<String, String>,
     opf_base_dir: &str,
 ) -> Vec<(String, Vec<u8>)> {
@@ -298,7 +299,7 @@ fn collect_images(
         let lower = href.to_ascii_lowercase();
         if image_extensions.iter().any(|ext| lower.ends_with(ext)) {
             let full_path = resolve_path(opf_base_dir, href);
-            if let Ok(data) = read_entry_bytes(archive, &full_path) {
+            if let Ok(data) = safe.read_entry_bytes(&full_path) {
                 images.push((href.clone(), data));
             }
         }

diff --git a/crates/paperjam-epub/src/safe_read.rs b/crates/paperjam-epub/src/safe_read.rs
diff --git a/crates/paperjam-model/Cargo.toml b/crates/paperjam-model/Cargo.toml
@@ -7,3 +7,15 @@ license.workspace = true
 description = "Pure data types and traits for the paperjam document processing ecosystem"
 
 [dependencies]
+thiserror = { workspace = true, optional = true }
+zip = { version = "2", default-features = false, features = ["deflate"], optional = true }
+
+[features]
+# Shared hardened ZIP reader used by the OOXML/EPUB format crates.
+# Off by default so the PDF engine and other non-zip consumers stay
+# dependency-free.
+zip_safety = ["dep:zip", "dep:thiserror"]
+
+[dev-dependencies]
+thiserror = { workspace = true }
+zip = { version = "2", default-features = false, features = ["deflate"] }
diff --git a/crates/paperjam-model/src/lib.rs b/crates/paperjam-model/src/lib.rs
@@ -11,6 +11,9 @@
 pub mod document;
 pub mod format;
 
+#[cfg(feature = "zip_safety")]
+pub mod zip_safety;
+
 pub mod annotations;
 pub mod bookmarks;
 pub mod conversion;