Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ toml = "1.0.0"
tracing = "0.1.37"
url = { version = "2.1.1", features = ["serde"] }
walkdir = "2"
zip = { version = "8.0.0", default-features = false, features = ["bzip2"] }
zip = { version = "8.0.0", default-features = false, features = ["bzip2", "deflate-flate2"] }

[workspace.lints.clippy]
dbg_macro = "warn"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1810,7 +1810,7 @@ mod tests {
&ReleaseData::default(),
true,
false,
iter::once(CompressionAlgorithm::Bzip2),
iter::once(CompressionAlgorithm::Deflate),
None,
true,
42,
Expand Down
4 changes: 2 additions & 2 deletions crates/bin/docs_rs_web/templates/core/about/download.html
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ <h2>URLs</h2>
<h2>processing / caveats</h2>
<p>
To unpack the ZIP file you need any zip utility that supports
PKZIP version 4.6 and BZIP2 compression.
PKZIP version 4.6 and BZIP2 & Deflate compression.
</p>
<p>
The archives will contain all the documentation HTML files for all
Expand All @@ -49,7 +49,7 @@ <h2>processing / caveats</h2>
<p>
Docs.rs is running rustdoc with <code>--static-root-path "/-/rustdoc.static/"</code>,
which leads to all references to static assets breaking if they are not
available under that path. Older builds used <code>--static-root-path "/"</code>, which
available under that path. Older builds used <code>--static-root-path "/"</code>, which
means you will have to handle both.
</p>
<p>
Expand Down
2 changes: 1 addition & 1 deletion crates/lib/docs_rs_storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ testing = [

[dependencies]
anyhow = { workspace = true }
async-compression = { version = "0.4.32", features = ["bzip2", "gzip", "tokio", "zstd"] }
async-compression = { version = "0.4.32", features = ["bzip2", "deflate", "gzip", "tokio", "zstd"] }
async-stream = { workspace = true }
# The default `rustls` feature pulls in the legacy hyper 0.14 + rustls 0.21
# stack via `aws-smithy-runtime/tls-rustls`, which includes the vulnerable
Expand Down
34 changes: 28 additions & 6 deletions crates/lib/docs_rs_storage/benches/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,29 @@ pub fn regex_capture_matches(c: &mut Criterion) {
// this isn't a great benchmark because it only tests on one file
// ideally we would build a whole crate and compress each file, taking the average
let html = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap();
let html = html.repeat(100); // 100 KiB * 100 => ~10 MiB
let html_slice = html.as_bytes();

let max_size = html.len() + 1;

// Pre-compress data for decompression benchmarks
let compressed_zstd = compress(html_slice, CompressionAlgorithm::Zstd).unwrap();
let compressed_bzip2 = compress(html_slice, CompressionAlgorithm::Bzip2).unwrap();
let compressed_gzip = compress(html_slice, CompressionAlgorithm::Gzip).unwrap();
let compressed_deflate = compress(html_slice, CompressionAlgorithm::Deflate).unwrap();

c.benchmark_group("regex html")
.throughput(Throughput::Bytes(html_slice.len() as u64))
.sample_size(10)
.bench_function("compress zstd", |b| {
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Zstd));
})
.bench_function("decompress zstd", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
black_box(compressed_zstd.as_slice()),
CompressionAlgorithm::Zstd,
5 * 1024 * 1024,
max_size,
)
});
})
Expand All @@ -29,9 +39,9 @@ pub fn regex_capture_matches(c: &mut Criterion) {
.bench_function("decompress bzip2", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
black_box(compressed_bzip2.as_slice()),
CompressionAlgorithm::Bzip2,
5 * 1024 * 1024,
max_size,
)
});
})
Expand All @@ -41,9 +51,21 @@ pub fn regex_capture_matches(c: &mut Criterion) {
.bench_function("decompress gzip", |b| {
b.iter(|| {
decompress(
black_box(html_slice),
black_box(compressed_gzip.as_slice()),
CompressionAlgorithm::Gzip,
5 * 1024 * 1024,
max_size,
)
});
})
.bench_function("compress deflate", |b| {
b.iter(|| compress(black_box(html_slice), CompressionAlgorithm::Deflate));
})
.bench_function("decompress deflate", |b| {
b.iter(|| {
decompress(
black_box(compressed_deflate.as_slice()),
CompressionAlgorithm::Deflate,
max_size,
)
});
});
Expand Down
30 changes: 26 additions & 4 deletions crates/lib/docs_rs_storage/src/archive_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,6 @@ where
.execute(&mut *tx)
.await?;

let compression_bzip = CompressionAlgorithm::Bzip2 as i32;
let (tx_entries, mut rx_entries) = mpsc::channel::<(String, u64, u64, i32)>(1000);

let zip_task = spawn_blocking(move || {
Expand All @@ -738,7 +737,8 @@ where
.ok_or_else(|| anyhow!("missing data_start in zip directory"))?;
let end = start + entry.compressed_size() - 1;
let compression_raw = match entry.compression() {
zip::CompressionMethod::Bzip2 => compression_bzip,
zip::CompressionMethod::Bzip2 => CompressionAlgorithm::Bzip2 as i32,
zip::CompressionMethod::Deflated => CompressionAlgorithm::Deflate as i32,
c => bail!("unsupported compression algorithm {} in zip-file", c),
};

Expand Down Expand Up @@ -983,6 +983,13 @@ mod tests {
}

async fn create_test_archive(file_count: u32) -> Result<fs::File> {
create_test_archive_with_compression(file_count, zip::CompressionMethod::Deflated).await
}

async fn create_test_archive_with_compression(
file_count: u32,
compression: zip::CompressionMethod,
) -> Result<fs::File> {
let writer = spawn_blocking(move || {
use std::io::Write as _;

Expand All @@ -996,7 +1003,7 @@ mod tests {
archive.start_file(
format!("testfile{i}"),
SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Bzip2)
.compression_method(compression)
.compression_level(Some(1)),
)?;
archive.write_all(&objectcontent)?;
Expand Down Expand Up @@ -1168,7 +1175,22 @@ mod tests {
let mut index = Index::open(&tempfile).await?;
let fi = index.find("testfile0").await?.unwrap();

assert_eq!(fi.range, FileRange::new(39, 459));
assert_eq!(fi.compression, CompressionAlgorithm::Deflate);

assert!(index.find("some_other_file",).await?.is_none());
Ok(())
}

#[tokio::test]
async fn index_create_save_load_sqlite_legacy_bzip2() -> Result<()> {
let tf = create_test_archive_with_compression(1, zip::CompressionMethod::Bzip2).await?;

let tempfile = tempfile::NamedTempFile::new()?.into_temp_path();
create(tf, &tempfile).await?;

let mut index = Index::open(&tempfile).await?;
let fi = index.find("testfile0").await?.unwrap();

assert_eq!(fi.compression, CompressionAlgorithm::Bzip2);

assert!(index.find("some_other_file").await?.is_none());
Expand Down
20 changes: 19 additions & 1 deletion crates/lib/docs_rs_storage/src/compression.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use anyhow::Error;
use bzip2::read::{BzDecoder, BzEncoder};
use docs_rs_types::CompressionAlgorithm;
use flate2::read::{GzDecoder, GzEncoder};
use flate2::read::{DeflateDecoder, DeflateEncoder, GzDecoder, GzEncoder};
use std::io::{self, Read};
use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite};

Expand All @@ -22,6 +22,12 @@ pub fn compress(content: impl Read, algorithm: CompressionAlgorithm) -> Result<V
compressor.read_to_end(&mut data)?;
Ok(data)
}
CompressionAlgorithm::Deflate => {
let mut compressor = DeflateEncoder::new(content, flate2::Compression::default());
let mut data = vec![];
compressor.read_to_end(&mut data)?;
Ok(data)
}
}
}

Expand Down Expand Up @@ -54,6 +60,11 @@ where
io::copy(&mut reader, &mut enc).await?;
enc.shutdown().await?;
}
CompressionAlgorithm::Deflate => {
let mut enc = write::DeflateEncoder::new(writer);
io::copy(&mut reader, &mut enc).await?;
enc.shutdown().await?;
}
}

Ok(())
Expand All @@ -78,6 +89,9 @@ pub fn wrap_reader_for_decompression<'a>(
CompressionAlgorithm::Gzip => {
Box::new(io::BufReader::new(bufread::GzipDecoder::new(input)))
}
CompressionAlgorithm::Deflate => {
Box::new(io::BufReader::new(bufread::DeflateDecoder::new(input)))
}
}
}

Expand All @@ -97,6 +111,9 @@ pub fn decompress(
CompressionAlgorithm::Gzip => {
io::copy(&mut GzDecoder::new(content), &mut buffer)?;
}
CompressionAlgorithm::Deflate => {
io::copy(&mut DeflateDecoder::new(content), &mut buffer)?;
}
}

Ok(buffer.into_inner())
Expand Down Expand Up @@ -167,6 +184,7 @@ mod tests {
#[test_case(CompressionAlgorithm::Zstd)]
#[test_case(CompressionAlgorithm::Bzip2)]
#[test_case(CompressionAlgorithm::Gzip)]
#[test_case(CompressionAlgorithm::Deflate)]
async fn test_async_compression(alg: CompressionAlgorithm) -> Result<()> {
const CONTENT: &[u8] = b"Hello, world! Hello, world! Hello, world! Hello, world!";

Expand Down
8 changes: 4 additions & 4 deletions crates/lib/docs_rs_storage/src/storage/non_blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,8 @@ impl AsyncStorage {
info_span!("create_zip_archive", %archive_path, root_dir=%root_dir.display()).entered();

let options = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Bzip2)
.compression_level(Some(3));
.compression_method(zip::CompressionMethod::Deflated)
.compression_level(Some(6));

// rustdoc archives can become a couple of GiB big, so we better use a tempfile.
let zip_file = fs::File::create(&zip_path)?;
Expand Down Expand Up @@ -415,7 +415,7 @@ impl AsyncStorage {
})
)?;

Ok((file_paths, CompressionAlgorithm::Bzip2))
Ok((file_paths, CompressionAlgorithm::Deflate))
}

/// Store all files in `root_dir` into the backend under `prefix`.
Expand Down Expand Up @@ -860,7 +860,7 @@ mod backend_tests {
.await?
);

assert_eq!(compression_alg, CompressionAlgorithm::Bzip2);
assert_eq!(compression_alg, CompressionAlgorithm::Deflate);
assert_eq!(stored_files.len(), files.len());
for name in &files {
assert!(get_file_info(&stored_files, name).is_some());
Expand Down
5 changes: 5 additions & 0 deletions crates/lib/docs_rs_types/src/compression_algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub enum CompressionAlgorithm {
Zstd = 0,
Bzip2 = 1,
Gzip = 2,
Deflate = 3,
}

impl CompressionAlgorithm {
Expand All @@ -29,6 +30,7 @@ impl CompressionAlgorithm {
CompressionAlgorithm::Zstd => "zst",
CompressionAlgorithm::Bzip2 => "bz2",
CompressionAlgorithm::Gzip => "gz",
CompressionAlgorithm::Deflate => "deflate",
}
}
}
Expand All @@ -52,6 +54,7 @@ pub fn compression_from_file_extension(ext: &str) -> Option<CompressionAlgorithm
"zst" => Some(CompressionAlgorithm::Zstd),
"bz2" => Some(CompressionAlgorithm::Bzip2),
"gz" => Some(CompressionAlgorithm::Gzip),
"deflate" => Some(CompressionAlgorithm::Deflate),
_ => None,
}
}
Expand All @@ -64,13 +67,15 @@ mod tests {
#[test_case(CompressionAlgorithm::Zstd, "Zstd")]
#[test_case(CompressionAlgorithm::Bzip2, "Bzip2")]
#[test_case(CompressionAlgorithm::Gzip, "Gzip")]
#[test_case(CompressionAlgorithm::Deflate, "Deflate")]
fn test_enum_display(alg: CompressionAlgorithm, expected: &str) {
assert_eq!(alg.to_string(), expected);
}

#[test_case(CompressionAlgorithm::Zstd, "zst")]
#[test_case(CompressionAlgorithm::Bzip2, "bz2")]
#[test_case(CompressionAlgorithm::Gzip, "gz")]
#[test_case(CompressionAlgorithm::Deflate, "deflate")]
fn test_file_extensions(alg: CompressionAlgorithm, expected: &str) {
assert_eq!(alg.file_extension(), expected);
assert_eq!(compression_from_file_extension(expected), Some(alg));
Expand Down
Loading