diff --git a/Cargo.lock b/Cargo.lock index 2179a79e4..ca91fc15a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9684,6 +9684,7 @@ checksum = "dcab981e19633ebcf0b001ddd37dd802996098bc1864f90b7c5d970ce76c1d59" dependencies = [ "bzip2", "crc32fast", + "flate2", "indexmap 2.14.0", "memchr", "typed-path", diff --git a/Cargo.toml b/Cargo.toml index 1ecb78692..1c4a3458f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,7 +65,7 @@ toml = "1.0.0" tracing = "0.1.37" url = { version = "2.1.1", features = ["serde"] } walkdir = "2" -zip = { version = "8.0.0", default-features = false, features = ["bzip2"] } +zip = { version = "8.0.0", default-features = false, features = ["bzip2", "deflate-flate2"] } [workspace.lints.clippy] dbg_macro = "warn" diff --git a/crates/bin/docs_rs_builder/src/docbuilder/rustwide_builder.rs b/crates/bin/docs_rs_builder/src/docbuilder/rustwide_builder.rs index 3ebe436e0..911bc90dc 100644 --- a/crates/bin/docs_rs_builder/src/docbuilder/rustwide_builder.rs +++ b/crates/bin/docs_rs_builder/src/docbuilder/rustwide_builder.rs @@ -1810,7 +1810,7 @@ mod tests { &ReleaseData::default(), true, false, - iter::once(CompressionAlgorithm::Bzip2), + iter::once(CompressionAlgorithm::Deflate), None, true, 42, diff --git a/crates/bin/docs_rs_web/templates/core/about/download.html b/crates/bin/docs_rs_web/templates/core/about/download.html index 96e41335a..0e076a480 100644 --- a/crates/bin/docs_rs_web/templates/core/about/download.html +++ b/crates/bin/docs_rs_web/templates/core/about/download.html @@ -38,7 +38,7 @@

URLs

processing / caveats

To unpack the ZIP file you need any zip utility that supports - PKZIP version 4.6 and BZIP2 compression. + PKZIP version 4.6 and BZIP2 & Deflate compression.

The archives will contain all the documentation HTML files for all @@ -49,7 +49,7 @@

processing / caveats

Docs.rs is running rustdoc with --static-root-path "/-/rustdoc.static/", which leads to all references to static assets breaking if they are not - available under that path. Older builds used --static-root-path "/", which + available under that path. Older builds used --static-root-path "/", which means you will have to handle both.

diff --git a/crates/lib/docs_rs_storage/Cargo.toml b/crates/lib/docs_rs_storage/Cargo.toml index 8a22eb248..91dbd0459 100644 --- a/crates/lib/docs_rs_storage/Cargo.toml +++ b/crates/lib/docs_rs_storage/Cargo.toml @@ -14,7 +14,7 @@ testing = [ [dependencies] anyhow = { workspace = true } -async-compression = { version = "0.4.32", features = ["bzip2", "gzip", "tokio", "zstd"] } +async-compression = { version = "0.4.32", features = ["bzip2", "deflate", "gzip", "tokio", "zstd"] } async-stream = { workspace = true } # The default `rustls` feature pulls in the legacy hyper 0.14 + rustls 0.21 # stack via `aws-smithy-runtime/tls-rustls`, which includes the vulnerable diff --git a/crates/lib/docs_rs_storage/benches/compression.rs b/crates/lib/docs_rs_storage/benches/compression.rs index 051e378ad..9a66e21da 100644 --- a/crates/lib/docs_rs_storage/benches/compression.rs +++ b/crates/lib/docs_rs_storage/benches/compression.rs @@ -7,19 +7,29 @@ pub fn regex_capture_matches(c: &mut Criterion) { // this isn't a great benchmark because it only tests on one file // ideally we would build a whole crate and compress each file, taking the average let html = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap(); + let html = html.repeat(100); // 100 KiB * 100 => ~10 MiB let html_slice = html.as_bytes(); + let max_size = html.len() + 1; + + // Pre-compress data for decompression benchmarks + let compressed_zstd = compress(html_slice, CompressionAlgorithm::Zstd).unwrap(); + let compressed_bzip2 = compress(html_slice, CompressionAlgorithm::Bzip2).unwrap(); + let compressed_gzip = compress(html_slice, CompressionAlgorithm::Gzip).unwrap(); + let compressed_deflate = compress(html_slice, CompressionAlgorithm::Deflate).unwrap(); + c.benchmark_group("regex html") .throughput(Throughput::Bytes(html_slice.len() as u64)) + .sample_size(10) .bench_function("compress zstd", |b| { b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Zstd)); }) .bench_function("decompress zstd", |b| { b.iter(|| { decompress( - black_box(html_slice), + black_box(compressed_zstd.as_slice()), CompressionAlgorithm::Zstd, - 5 * 1024 * 1024, + max_size, ) }); }) @@ -29,9 +39,9 @@ pub fn regex_capture_matches(c: &mut Criterion) { .bench_function("decompress bzip2", |b| { b.iter(|| { decompress( - black_box(html_slice), + black_box(compressed_bzip2.as_slice()), CompressionAlgorithm::Bzip2, - 5 * 1024 * 1024, + max_size, ) }); }) @@ -41,9 +51,21 @@ pub fn regex_capture_matches(c: &mut Criterion) { .bench_function("decompress gzip", |b| { b.iter(|| { decompress( - black_box(html_slice), + black_box(compressed_gzip.as_slice()), CompressionAlgorithm::Gzip, - 5 * 1024 * 1024, + max_size, + ) + }); + }) + .bench_function("compress deflate", |b| { + b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Deflate)); + }) + .bench_function("decompress deflate", |b| { + b.iter(|| { + decompress( + black_box(compressed_deflate.as_slice()), + CompressionAlgorithm::Deflate, + max_size, ) }); }); diff --git a/crates/lib/docs_rs_storage/src/archive_index.rs b/crates/lib/docs_rs_storage/src/archive_index.rs index 218fb3fe1..5324c9298 100644 --- a/crates/lib/docs_rs_storage/src/archive_index.rs +++ b/crates/lib/docs_rs_storage/src/archive_index.rs @@ -724,7 +724,6 @@ where .execute(&mut *tx) .await?; - let compression_bzip = CompressionAlgorithm::Bzip2 as i32; let (tx_entries, mut rx_entries) = mpsc::channel::<(String, u64, u64, i32)>(1000); let zip_task = spawn_blocking(move || { @@ -738,7 +737,8 @@ where .ok_or_else(|| anyhow!("missing data_start in zip directory"))?; let end = start + entry.compressed_size() - 1; let compression_raw = match entry.compression() { - zip::CompressionMethod::Bzip2 => compression_bzip, + zip::CompressionMethod::Bzip2 => CompressionAlgorithm::Bzip2 as i32, + zip::CompressionMethod::Deflated => CompressionAlgorithm::Deflate as i32, c => bail!("unsupported compression algorithm {} in zip-file", c), }; @@ -983,6 +983,13 @@ mod tests { } async fn create_test_archive(file_count: u32) -> Result { + create_test_archive_with_compression(file_count, zip::CompressionMethod::Deflated).await + } + + async fn create_test_archive_with_compression( + file_count: u32, + compression: zip::CompressionMethod, + ) -> Result { let writer = spawn_blocking(move || { use std::io::Write as _; @@ -996,7 +1003,7 @@ mod tests { archive.start_file( format!("testfile{i}"), SimpleFileOptions::default() - .compression_method(zip::CompressionMethod::Bzip2) + .compression_method(compression) .compression_level(Some(1)), )?; archive.write_all(&objectcontent)?; @@ -1168,7 +1175,22 @@ mod tests { let mut index = Index::open(&tempfile).await?; let fi = index.find("testfile0").await?.unwrap(); - assert_eq!(fi.range, FileRange::new(39, 459)); + assert_eq!(fi.compression, CompressionAlgorithm::Deflate); + + assert!(index.find("some_other_file",).await?.is_none()); + Ok(()) + } + + #[tokio::test] + async fn index_create_save_load_sqlite_legacy_bzip2() -> Result<()> { + let tf = create_test_archive_with_compression(1, zip::CompressionMethod::Bzip2).await?; + + let tempfile = tempfile::NamedTempFile::new()?.into_temp_path(); + create(tf, &tempfile).await?; + + let mut index = Index::open(&tempfile).await?; + let fi = index.find("testfile0").await?.unwrap(); + assert_eq!(fi.compression, CompressionAlgorithm::Bzip2); assert!(index.find("some_other_file").await?.is_none()); diff --git a/crates/lib/docs_rs_storage/src/compression.rs b/crates/lib/docs_rs_storage/src/compression.rs index 077ffb126..3076ca571 100644 --- a/crates/lib/docs_rs_storage/src/compression.rs +++ b/crates/lib/docs_rs_storage/src/compression.rs @@ -1,7 +1,7 @@ use anyhow::Error; use bzip2::read::{BzDecoder, BzEncoder}; use docs_rs_types::CompressionAlgorithm; -use flate2::read::{GzDecoder, GzEncoder}; +use flate2::read::{DeflateDecoder, DeflateEncoder, GzDecoder, GzEncoder}; use std::io::{self, Read}; use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite}; @@ -22,6 +22,12 @@ pub fn compress(content: impl Read, algorithm: CompressionAlgorithm) -> Result { + let mut compressor = DeflateEncoder::new(content, flate2::Compression::default()); + let mut data = vec![]; + compressor.read_to_end(&mut data)?; + Ok(data) + } } } @@ -54,6 +60,11 @@ where io::copy(&mut reader, &mut enc).await?; enc.shutdown().await?; } + CompressionAlgorithm::Deflate => { + let mut enc = write::DeflateEncoder::new(writer); + io::copy(&mut reader, &mut enc).await?; + enc.shutdown().await?; + } } Ok(()) @@ -78,6 +89,9 @@ pub fn wrap_reader_for_decompression<'a>( CompressionAlgorithm::Gzip => { Box::new(io::BufReader::new(bufread::GzipDecoder::new(input))) } + CompressionAlgorithm::Deflate => { + Box::new(io::BufReader::new(bufread::DeflateDecoder::new(input))) + } } } @@ -97,6 +111,9 @@ pub fn decompress( CompressionAlgorithm::Gzip => { io::copy(&mut GzDecoder::new(content), &mut buffer)?; } + CompressionAlgorithm::Deflate => { + io::copy(&mut DeflateDecoder::new(content), &mut buffer)?; + } } Ok(buffer.into_inner()) @@ -167,6 +184,7 @@ mod tests { #[test_case(CompressionAlgorithm::Zstd)] #[test_case(CompressionAlgorithm::Bzip2)] #[test_case(CompressionAlgorithm::Gzip)] + #[test_case(CompressionAlgorithm::Deflate)] async fn test_async_compression(alg: CompressionAlgorithm) -> Result<()> { const CONTENT: &[u8] = b"Hello, world! Hello, world! Hello, world! Hello, world!"; diff --git a/crates/lib/docs_rs_storage/src/storage/non_blocking.rs b/crates/lib/docs_rs_storage/src/storage/non_blocking.rs index 2feca14fe..206793304 100644 --- a/crates/lib/docs_rs_storage/src/storage/non_blocking.rs +++ b/crates/lib/docs_rs_storage/src/storage/non_blocking.rs @@ -347,8 +347,8 @@ impl AsyncStorage { info_span!("create_zip_archive", %archive_path, root_dir=%root_dir.display()).entered(); let options = zip::write::SimpleFileOptions::default() - .compression_method(zip::CompressionMethod::Bzip2) - .compression_level(Some(3)); + .compression_method(zip::CompressionMethod::Deflated) + .compression_level(Some(6)); // rustdoc archives can become a couple of GiB big, so we better use a tempfile. let zip_file = fs::File::create(&zip_path)?; @@ -415,7 +415,7 @@ impl AsyncStorage { }) )?; - Ok((file_paths, CompressionAlgorithm::Bzip2)) + Ok((file_paths, CompressionAlgorithm::Deflate)) } /// Store all files in `root_dir` into the backend under `prefix`. @@ -860,7 +860,7 @@ mod backend_tests { .await? ); - assert_eq!(compression_alg, CompressionAlgorithm::Bzip2); + assert_eq!(compression_alg, CompressionAlgorithm::Deflate); assert_eq!(stored_files.len(), files.len()); for name in &files { assert!(get_file_info(&stored_files, name).is_some()); diff --git a/crates/lib/docs_rs_types/src/compression_algorithm.rs b/crates/lib/docs_rs_types/src/compression_algorithm.rs index de0782d77..849f21b1e 100644 --- a/crates/lib/docs_rs_types/src/compression_algorithm.rs +++ b/crates/lib/docs_rs_types/src/compression_algorithm.rs @@ -21,6 +21,7 @@ pub enum CompressionAlgorithm { Zstd = 0, Bzip2 = 1, Gzip = 2, + Deflate = 3, } impl CompressionAlgorithm { @@ -29,6 +30,7 @@ impl CompressionAlgorithm { CompressionAlgorithm::Zstd => "zst", CompressionAlgorithm::Bzip2 => "bz2", CompressionAlgorithm::Gzip => "gz", + CompressionAlgorithm::Deflate => "deflate", } } } @@ -52,6 +54,7 @@ pub fn compression_from_file_extension(ext: &str) -> Option Some(CompressionAlgorithm::Zstd), "bz2" => Some(CompressionAlgorithm::Bzip2), "gz" => Some(CompressionAlgorithm::Gzip), + "deflate" => Some(CompressionAlgorithm::Deflate), _ => None, } } @@ -64,6 +67,7 @@ mod tests { #[test_case(CompressionAlgorithm::Zstd, "Zstd")] #[test_case(CompressionAlgorithm::Bzip2, "Bzip2")] #[test_case(CompressionAlgorithm::Gzip, "Gzip")] + #[test_case(CompressionAlgorithm::Deflate, "Deflate")] fn test_enum_display(alg: CompressionAlgorithm, expected: &str) { assert_eq!(alg.to_string(), expected); } @@ -71,6 +75,7 @@ mod tests { #[test_case(CompressionAlgorithm::Zstd, "zst")] #[test_case(CompressionAlgorithm::Bzip2, "bz2")] #[test_case(CompressionAlgorithm::Gzip, "gz")] + #[test_case(CompressionAlgorithm::Deflate, "deflate")] fn test_file_extensions(alg: CompressionAlgorithm, expected: &str) { assert_eq!(alg.file_extension(), expected); assert_eq!(compression_from_file_extension(expected), Some(alg));