diff --git a/crates/bin/docs_rs_web/src/handlers/rustdoc.rs b/crates/bin/docs_rs_web/src/handlers/rustdoc.rs index 99ee16e74..d5262f055 100644 --- a/crates/bin/docs_rs_web/src/handlers/rustdoc.rs +++ b/crates/bin/docs_rs_web/src/handlers/rustdoc.rs @@ -1132,7 +1132,7 @@ mod test { ) -> Result { try_latest_version_redirect(krate, path, web, config) .await? - .with_context(|| anyhow::anyhow!("no redirect found for {}", path)) + .with_context(|| anyhow!("no redirect found for {}", path)) } #[test_case(true)] diff --git a/crates/bin/docs_rs_web/src/handlers/source.rs b/crates/bin/docs_rs_web/src/handlers/source.rs index be702cf76..84aad8e0a 100644 --- a/crates/bin/docs_rs_web/src/handlers/source.rs +++ b/crates/bin/docs_rs_web/src/handlers/source.rs @@ -18,42 +18,40 @@ use askama::Template; use axum::{Extension, response::IntoResponse}; use axum_extra::{TypedHeader, headers::HeaderMapExt}; use docs_rs_headers::{CanonicalUrl, IfNoneMatch}; -use docs_rs_storage::{AsyncStorage, PathNotFoundError}; +use docs_rs_storage::{AsyncStorage, FolderEntry, PathNotFoundError, source_archive_path}; use docs_rs_types::{BuildId, KrateName, ReqVersion, Version}; -use mime::Mime; -use std::{cmp::Ordering, sync::Arc}; +use futures_util::TryStreamExt as _; +use std::sync::Arc; use tracing::instrument; -/// A source file's name and mime type -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] -struct File { - name: String, - mime: String, -} - -impl File { - fn from_path_and_mime(path: &str, mime: &Mime) -> File { - let (name, mime) = if let Some((dir, _)) = path.split_once('/') { - (dir, "dir") - } else { - (path, mime.as_ref()) - }; - - Self { - name: name.to_owned(), - mime: mime.to_owned(), - } - } -} - /// A list of source files #[derive(Debug, Clone, PartialEq, Default)] struct FileList { - files: Vec, + files: Vec, } impl FileList { - /// Gets FileList from a request path + async fn from_archive_index( + storage: &AsyncStorage, + name: &KrateName, + version: &Version, + latest_build_id: Option, + folder: Option<&str>, + ) -> Result { + let mut files = storage + .find_archive_index(&source_archive_path(name, version), latest_build_id) + .await? + .folder_contents(folder) + .try_filter(|entry| std::future::ready(entry.name() != ".cargo-ok")) + .try_collect::>() + .await?; + + files.sort_unstable(); + + Ok(Self { files }) + } + + /// Gets legacy FileList from a request path /// /// All paths stored in database have this format: /// @@ -104,11 +102,6 @@ impl FileList { for file in files { if let Some(file) = file.as_array() { - let mime: Mime = file[0] - .as_str() - .unwrap() - .parse() - .unwrap_or(mime::APPLICATION_OCTET_STREAM); let path = file[1].as_str().unwrap(); // skip .cargo-ok generated by cargo @@ -118,7 +111,7 @@ impl FileList { // look only files for req_path if let Some(path) = path.strip_prefix(folder) { - let file = File::from_path_and_mime(path, &mime); + let file = FolderEntry::from_path(path); // avoid adding duplicates, a directory may occur more than once if !file_list.contains(&file) { @@ -132,16 +125,7 @@ impl FileList { return Ok(None); } - file_list.sort_by(|a, b| { - // directories must be listed first - if a.mime == "dir" && b.mime != "dir" { - Ordering::Less - } else if a.mime != "dir" && b.mime == "dir" { - Ordering::Greater - } else { - a.name.to_lowercase().cmp(&b.name.to_lowercase()) - } - }); + file_list.sort_unstable(); Ok(Some(FileList { files: file_list })) } else { @@ -157,7 +141,7 @@ struct SourcePage { file_list: FileList, metadata: MetaData, show_parent_link: bool, - file: Option, + file: Option, file_content: Option, canonical_url: CanonicalUrl, is_file_too_large: bool, @@ -246,16 +230,31 @@ pub(crate) async fn source_browser_handler( let inner_path = params.inner_path(); - let current_folder = if let Some(last_slash_pos) = inner_path.rfind('/') { - &inner_path[..last_slash_pos + 1] - } else { - "" - }; - let show_parent_link = !current_folder.is_empty(); + let current_folder = inner_path + .rfind('/') + .map(|last_slash_pos| &inner_path[..last_slash_pos + 1]); - let file_list = FileList::from_path(&mut conn, params.name(), version, current_folder) + let show_parent_link = current_folder.is_some(); + + let file_list = if row.archive_storage { + FileList::from_archive_index( + &storage, + params.name(), + version, + row.latest_build_id, + current_folder, + ) + .await? + } else { + FileList::from_path( + &mut conn, + params.name(), + version, + current_folder.unwrap_or(""), + ) .await? - .unwrap_or_default(); + .unwrap_or_default() + }; let metadata = MetaData::from_crate( &mut conn, @@ -327,7 +326,7 @@ pub(crate) async fn source_browser_handler( .map(|(_, path)| path) .unwrap_or(&blob.path); ( - Some(File::from_path_and_mime(path, &blob.mime)), + Some(FolderEntry::from_path(path)), Some(String::from_utf8_lossy(&blob.content).to_string()), ) } diff --git a/crates/bin/docs_rs_web/src/utils/mod.rs b/crates/bin/docs_rs_web/src/utils/mod.rs index b7181d683..86ac95084 100644 --- a/crates/bin/docs_rs_web/src/utils/mod.rs +++ b/crates/bin/docs_rs_web/src/utils/mod.rs @@ -3,10 +3,42 @@ pub(crate) mod html_rewrite; pub(crate) mod licenses; pub(crate) mod markdown; +use crate::{ + icons::{ + IconFile, IconFileLines, IconFolderOpen, IconGitAlt, IconLock, IconMarkdown, IconRust, + }, + page::templates::{RenderBrands, RenderRegular, RenderSolid}, +}; use anyhow::Result; +use askama::filters::Safe; use chrono::{DateTime, NaiveDate, Utc}; +use docs_rs_mimes as mimes; +use docs_rs_storage::FolderEntry; use docs_rs_utils::rustc_version::parse_rustc_date; +pub fn folder_entry_icon(entry: &FolderEntry) -> Safe { + if entry.is_dir() { + return IconFolderOpen.render_regular(false, false, ""); + } + + let mime = entry.mime().expect("files always have mime"); + let name = entry.name(); + + if *mime == *mimes::TEXT_RUST { + IconRust.render_brands(false, false, "") + } else if *mime == mime::TEXT_PLAIN && name == "Cargo.lock" { + IconLock.render_solid(false, false, "") + } else if *mime == *mimes::TEXT_MARKDOWN { + IconMarkdown.render_brands(false, false, "") + } else if *mime == mime::TEXT_PLAIN && name == ".gitignore" { + IconGitAlt.render_brands(false, false, "") + } else if *mime == mime::TEXT_PLAIN || mime.type_() == "text" { + IconFileLines.render_regular(false, false, "") + } else { + IconFile.render_regular(false, false, "") + } +} + /// Picks the correct "rustdoc.css" static file depending on which rustdoc version was used to /// generate this version of this crate. pub(crate) fn get_correct_docsrs_style_file(version: &str) -> Result { diff --git a/crates/bin/docs_rs_web/templates/crate/source.html b/crates/bin/docs_rs_web/templates/crate/source.html index 70d59d0c0..c496cc250 100644 --- a/crates/bin/docs_rs_web/templates/crate/source.html +++ b/crates/bin/docs_rs_web/templates/crate/source.html @@ -44,55 +44,9 @@ Show a link to the file with a fancy icon. If the file is a directory, `/` is appended to show the contents of the folder #} - - {# Directories #} - {%- if file.mime == "dir" -%} - {{ crate::icons::IconFolderOpen.render_regular(false, false, "") }} - - {# Rust files #} - {%- elif file.mime == "text/rust" -%} - {{ crate::icons::IconRust.render_brands(false, false, "") }} - - {# Cargo.lock #} - {%- elif file.mime == "text/plain" && file.name == "Cargo.lock" -%} - {{ crate::icons::IconLock.render_solid(false, false, "") }} - - {# Markdown files #} - {% elif file.mime == "text/markdown" %} - {{ crate::icons::IconMarkdown.render_brands(false, false, "") }} - - {# .gitignore #} - {% elif file.mime == "text/plain" && file.name == ".gitignore" %} - {{ crate::icons::IconGitAlt.render_brands(false, false, "") }} - - {# - More ideas - FontAwesome v5: - ".application/x-bzip" - |"application/gzip" - |"application/x-bzip2" - |"application/vnd.rar" - |"application/x-tar" - |"application/zip" - |"application/x-7z-compressed" => https://fontawesome.com/icons/file-archive - "text/javascript" => https://fontawesome.com/icons/js - "application/java-archive" => https://fontawesome.com/icons/java - DevOpticons (https://github.com/file-icons/DevOpicons): - "text/rust" => https://github.com/file-icons/DevOpicons/blob/master/charmap.md#Rust - "text/css" => https://github.com/file-icons/DevOpicons/blob/master/charmap.md#CSS3,%20Full - "text/html" => https://github.com/file-icons/DevOpicons/blob/master/charmap.md#HTML5 - #} - - {# Text files or files which mime starts with `text` #} - {%- elif file.mime == "text/plain" || file.mime.split('/').next() == Some("text") -%} - {{ crate::icons::IconFileLines.render_regular(false, false, "") }} - - {# Binary files and any unrecognized types #} - {% else -%} - {{ crate::icons::IconFile.render_regular(false, false, "") }} - {%- endif -%} - - {{ file.name }} + + {{ crate::utils::folder_entry_icon(file) }} + {{ file.name() }} {%- endfor -%} @@ -115,7 +69,7 @@ {%- if let Some(file_content) = file_content -%} {% decl file_name %} {% if let Some(file) = file %} - {% set file_name = file.name.as_str() %} + {% set file_name = file.name() %} {% else %} {% set file_name = "" %} {% endif %} diff --git a/crates/lib/docs_rs_database/src/releases.rs b/crates/lib/docs_rs_database/src/releases.rs index 175762091..b204b6eb9 100644 --- a/crates/lib/docs_rs_database/src/releases.rs +++ b/crates/lib/docs_rs_database/src/releases.rs @@ -25,7 +25,7 @@ use tracing::{debug, error, info, instrument}; /// NOTE: `source_files` refers to the files originally in the crate, /// not the files generated by rustdoc. #[allow(clippy::too_many_arguments)] -#[instrument(skip(conn, compression_algorithms))] +#[instrument(skip(conn, compression_algorithms, source_files))] pub async fn finish_release( conn: &mut sqlx::PgConnection, crate_id: CrateId, diff --git a/crates/lib/docs_rs_storage/src/archive_index.rs b/crates/lib/docs_rs_storage/src/archive_index.rs index 334fb96c6..dc2fe70d4 100644 --- a/crates/lib/docs_rs_storage/src/archive_index.rs +++ b/crates/lib/docs_rs_storage/src/archive_index.rs @@ -1,12 +1,14 @@ use crate::{ - PathNotFoundError, blob::StreamingBlob, config::ArchiveIndexCacheConfig, types::FileRange, - utils::file_list::walk_dir_recursive, + PathNotFoundError, blob::StreamingBlob, config::ArchiveIndexCacheConfig, file::FolderEntry, + types::FileRange, utils::file_list::walk_dir_recursive, }; use anyhow::{Context as _, Result, anyhow, bail}; +use async_stream::try_stream; +use docs_rs_mimes::detect_mime; use docs_rs_opentelemetry::AnyMeterProvider; use docs_rs_types::{BuildId, CompressionAlgorithm}; use docs_rs_utils::spawn_blocking; -use futures_util::TryStreamExt as _; +use futures_util::{Stream, TryStreamExt as _}; use moka::future::Cache as MokaCache; use opentelemetry::{ KeyValue, @@ -14,6 +16,8 @@ use opentelemetry::{ }; use sqlx::{ConnectOptions as _, Connection as _, QueryBuilder, Row as _, Sqlite}; use std::{ + collections::HashSet, + fmt, future::Future, path::{Path, PathBuf}, pin::Pin, @@ -131,12 +135,13 @@ impl Metrics { } #[derive(PartialEq, Eq, Debug)] -pub(crate) struct FileInfo { +pub struct FileInfo { + path: PathBuf, range: FileRange, compression: CompressionAlgorithm, } -struct Entry { +pub(crate) struct Entry { // file size of the local sqlite database. // Will be used to "weigh" cache entries, so that the cache can evict based on // total size of cached files instead of number of entries. @@ -433,18 +438,17 @@ impl Cache { Ok(()) } - async fn find_inner( + pub(crate) async fn find_index( &self, archive_path: &str, latest_build_id: Option, - path_in_archive: &str, downloader: &impl Downloader, - ) -> Result> { + ) -> Result { let local_index_path = self.local_index_path(archive_path, latest_build_id); // fast path: try to use whatever is there, no locking - let force_redownload = match find_in_file(&local_index_path, path_in_archive).await { - Ok(res) => { + let force_redownload = match Index::open(&local_index_path).await { + Ok(index) => { // Keep moka's recency/frequency view in sync with successful fast-path // file lookups so TTI and admission decisions reflect real usage. if self.manager.get(&local_index_path).await.is_none() { @@ -456,11 +460,12 @@ impl Cache { ) .await; } - return Ok(res); + + return Ok(index); } Err(err) => { let force_redownload = !err.is::(); - debug!(?err, "archive index lookup failed, will try repair."); + debug!(?err, "archive index open failed, will try repair."); force_redownload } }; @@ -535,7 +540,7 @@ impl Cache { })?; // Final attempt: if this still fails, bubble the error. - find_in_file(local_index_path, path_in_archive).await + Index::open(local_index_path).await } /// Find the file metadata needed to fetch a certain path inside a remote archive. @@ -550,10 +555,15 @@ impl Cache { downloader: &impl Downloader, ) -> Result> { for attempt in 1..=FIND_ATTEMPTS { - match self - .find_inner(archive_path, latest_build_id, path_in_archive, downloader) - .await - { + let result = async { + let mut index = self + .find_index(archive_path, latest_build_id, downloader) + .await?; + index.find(path_in_archive).await + } + .await; + + match result { Ok(file_info) => { self.metrics.find_calls.add( 1, @@ -568,7 +578,7 @@ impl Cache { warn!( ?err, %attempt, - "error resolving archive index, purging local cache and retrying" + "error in archive index lookup, purging local cache and retrying" ); self.purge(archive_path, latest_build_id).await?; } @@ -780,51 +790,164 @@ where Ok(zipfile) } -async fn find_in_sqlite_index<'e, E>(executor: E, search_for: &str) -> Result> -where - E: sqlx::Executor<'e, Database = sqlx::Sqlite>, -{ - let row = sqlx::query( - " - SELECT start, end, compression - FROM files - WHERE path = ? - ", - ) - .bind(search_for) - .fetch_optional(executor) - .await - .context("error fetching SQLite data")?; - - if let Some(row) = row { - let start: u64 = row.try_get(0)?; - let end: u64 = row.try_get(1)?; - let compression_raw: i32 = row.try_get(2)?; - - Ok(Some(FileInfo { - range: start..=end, - compression: compression_raw.try_into().map_err(|value| { - anyhow::anyhow!(format!( - "invalid compression algorithm '{value}' in database" - )) - })?, - })) - } else { - Ok(None) - } +pub struct Index { + conn: sqlx::SqliteConnection, } -#[instrument] -pub(crate) async fn find_in_file

( - archive_index_path: P, - search_for: &str, -) -> Result> -where - P: AsRef + std::fmt::Debug, -{ - let mut conn = sqlite_open(archive_index_path).await?; +impl Index { + pub(crate) async fn open

(archive_index_path: P) -> Result + where + P: AsRef, + { + let archive_index_path = archive_index_path.as_ref().to_path_buf(); + let conn = sqlite_open(&archive_index_path).await?; + Ok(Self { conn }) + } + + #[instrument(skip(self))] + pub async fn find

(&mut self, search_for: P) -> Result> + where + P: AsRef + fmt::Debug, + { + let search_for = search_for.as_ref(); + + if search_for.is_absolute() { + bail!("search path in archive index has to be relative"); + } + + let search_str = search_for + .to_str() + .ok_or_else(|| anyhow!("non-UTF-8 path in archive index lookup"))?; + + // now actually find the entry in the index + let row = sqlx::query( + "SELECT start, end, compression + FROM files + WHERE path = ?", + ) + .bind(search_str) + .fetch_optional(&mut self.conn) + .await + .context("error fetching SQLite data")?; + + let file_info = if let Some(row) = row { + let start: u64 = row.try_get(0)?; + let end: u64 = row.try_get(1)?; + let compression_raw: i32 = row.try_get(2)?; + + Some(FileInfo { + path: search_for.to_path_buf(), + range: start..=end, + compression: compression_raw.try_into().map_err(|value| { + anyhow!("invalid compression algorithm '{value}' in database") + })?, + }) + } else { + None + }; + + Ok(file_info) + } + + pub fn list(&mut self) -> impl Stream> + '_ { + try_stream! { + let mut rows = sqlx::query( + "SELECT path, start, end, compression FROM files" + ) + .fetch(&mut self.conn); + + while let Some(row) = rows.try_next().await.context("error fetching SQLite data")? { + let path: String = row.try_get(0)?; + let start: u64 = row.try_get(1)?; + let end: u64 = row.try_get(2)?; + let compression_raw: i32 = row.try_get(3)?; + let path = PathBuf::from(path); + debug_assert!(path.is_relative()); + + yield FileInfo { + path, + range: start..=end, + compression: compression_raw.try_into().map_err(|value| { + anyhow!("invalid compression algorithm '{value}' in database") + })?, + }; + } + } + } + + /// get the folder contents inside the zip archive. + /// * missing folder = list the root + /// * given folder: just lists the files in there, and subfolders, but not their contents. + /// + /// You'll need this method when you build a file-browser for the archive, like + /// in our source pages. + #[instrument(skip(self))] + pub fn folder_contents

( + &mut self, + folder: Option

, + ) -> impl Stream> + '_ + where + P: AsRef + std::fmt::Debug, + { + // Build the path prefix string used in GLOB patterns. + // For root (None): prefix = "" + // For a folder: prefix = "some/folder/" + let prefix: Option = folder.as_ref().map(|f| { + let s = f.as_ref().to_string_lossy(); + // Normalize: strip any trailing slash, then re-add exactly one. + format!("{}/", s.trim_end_matches('/')) + }); + + try_stream! { + // Seen-dirs is the only state we must accumulate: one String per unique + // immediate subdirectory name. File rows are yielded as they arrive. + let mut seen_dirs: HashSet = HashSet::new(); + + + let mut rows = if let Some(prefix) = &prefix { + let prefix_upper_bound = format!("{prefix}\u{10ffff}"); + + // NOTE: we're using >= and < for the prefix matching here. + // Using `GLOB` would mean we have to escape the path. + // Other techniques like sqlite string functions would mean the index on the + // table can't be used. + + sqlx::query("SELECT path FROM files WHERE path >= ? AND path < ?") + .bind(prefix) + .bind(prefix_upper_bound) + .fetch(&mut self.conn) + } else { + sqlx::query("SELECT path FROM files") + .fetch(&mut self.conn) + }; + + while let Some(row) = rows.try_next().await.context("error fetching entries from SQLite")? { + let full_path: String = row.try_get(0)?; + // The relative part is everything after the prefix. + let rel = if let Some(prefix) = &prefix { + // Archive paths are stored as UTF-8 strings, and `full_path` comes from + // the same prefix string used in the range query above. + debug_assert!(full_path.is_char_boundary(prefix.len())); + &full_path[prefix.len()..] + } else { + &full_path + }; - find_in_sqlite_index(&mut conn, search_for).await + if let Some(slash_pos) = rel.find('/') { + // It's inside a subdirectory. Extract and deduplicate the first component. + let dir_name = &rel[..slash_pos]; + if seen_dirs.insert(dir_name.to_string()) { + yield FolderEntry::Dir(dir_name.to_string()); + } + } else { + // Direct file — yield only the name relative to the queried folder. + let rel = rel.to_string(); + let mime = detect_mime(&rel); + yield FolderEntry::File(rel, mime); + } + } + } + } } #[cfg(test)] @@ -838,6 +961,27 @@ mod tests { use std::{collections::HashMap, io::Cursor, ops::Deref, pin::Pin, sync::Arc}; use zip::write::SimpleFileOptions; + /// Creates a test archive from a list of (path, content) pairs. + async fn create_archive_from_entries( + entries: Vec<(&'static str, &'static [u8])>, + ) -> Result { + spawn_blocking(move || { + use std::io::Write as _; + let tf = tempfile::tempfile()?; + let mut archive = zip::ZipWriter::new(tf); + let options = SimpleFileOptions::default() + .compression_method(zip::CompressionMethod::Bzip2) + .compression_level(Some(1)); + for (path, content) in entries { + archive.start_file(path, options)?; + archive.write_all(content)?; + } + Ok(archive.finish()?) + }) + .await + .map(fs::File::from_std) + } + async fn create_test_archive(file_count: u32) -> Result { spawn_blocking(move || { use std::io::Write as _; @@ -1019,12 +1163,13 @@ mod tests { let tempfile = tempfile::NamedTempFile::new()?.into_temp_path(); create(tf, &tempfile).await?; - let fi = find_in_file(&tempfile, "testfile0").await?.unwrap(); + let mut index = Index::open(&tempfile).await?; + let fi = index.find("testfile0").await?.unwrap(); assert_eq!(fi.range, FileRange::new(39, 459)); assert_eq!(fi.compression, CompressionAlgorithm::Bzip2); - assert!(find_in_file(&tempfile, "some_other_file",).await?.is_none()); + assert!(index.find("some_other_file").await?.is_none()); Ok(()) } @@ -1545,4 +1690,260 @@ mod tests { Ok(()) } + + /// Build an index from a set of (path, content) pairs and open it as an `Index`. + async fn index_from_entries(entries: Vec<(&'static str, &'static [u8])>) -> Result { + let archive = create_archive_from_entries(entries).await?; + let tmp = tempfile::NamedTempFile::new()?.into_temp_path(); + create(archive, &tmp).await?; + + Index::open(&tmp).await + } + + async fn collect_folder_contents( + index: &mut Index, + folder: Option<&str>, + ) -> Result<(Vec, Vec)> { + let entries: Vec = index + .folder_contents(folder.map(Path::new)) + .try_collect() + .await?; + + let mut files = Vec::new(); + let mut dirs = Vec::new(); + for entry in entries { + match entry { + FolderEntry::File(path, _) => files.push(path), + FolderEntry::Dir(name) => dirs.push(name), + } + } + files.sort(); + dirs.sort(); + Ok((files, dirs)) + } + + #[tokio::test] + async fn folder_contents_root_lists_files_and_dirs() -> Result<()> { + let mut index = index_from_entries(vec![ + ("index.html", b""), + ("style.css", b""), + ("sub/page.html", b""), + ("other/file.js", b""), + ]) + .await?; + + let (files, dirs) = collect_folder_contents(&mut index, None).await?; + + assert_eq!(files, vec!["index.html", "style.css"]); + assert_eq!(dirs, vec!["other", "sub"]); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_subfolder_lists_direct_children_only() -> Result<()> { + let mut index = index_from_entries(vec![ + ("src/main.rs", b""), + ("src/lib.rs", b""), + ("src/utils/helper.rs", b""), + ("src/utils/mod.rs", b""), + ("README.md", b""), + ]) + .await?; + + let (files, dirs) = collect_folder_contents(&mut index, Some("src")).await?; + + assert_eq!(files, vec!["lib.rs", "main.rs"]); + assert_eq!(dirs, vec!["utils"]); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_nested_subfolder() -> Result<()> { + let mut index = index_from_entries(vec![ + ("a/b/c/deep.txt", b""), + ("a/b/file.txt", b""), + ("a/b/other.txt", b""), + ]) + .await?; + + let (files, dirs) = collect_folder_contents(&mut index, Some("a/b")).await?; + + assert_eq!(files, vec!["file.txt", "other.txt"]); + assert_eq!(dirs, vec!["c"]); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_empty_folder_returns_nothing() -> Result<()> { + let mut index = index_from_entries(vec![("a/file.txt", b"")]).await?; + + let (files, dirs) = collect_folder_contents(&mut index, Some("nonexistent")).await?; + + assert!(files.is_empty()); + assert!(dirs.is_empty()); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_root_with_only_files() -> Result<()> { + let mut index = index_from_entries(vec![("a.txt", b""), ("b.txt", b"")]).await?; + + let (files, dirs) = collect_folder_contents(&mut index, None).await?; + + assert_eq!(files, vec!["a.txt", "b.txt"]); + assert!(dirs.is_empty()); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_subdir_deduplicated() -> Result<()> { + let mut index = index_from_entries(vec![ + ("sub/a.txt", b""), + ("sub/b.txt", b""), + ("sub/c.txt", b""), + ]) + .await?; + + let (files, dirs) = collect_folder_contents(&mut index, None).await?; + + assert!(files.is_empty()); + // "sub" should appear exactly once despite three files inside it + assert_eq!(dirs, vec!["sub"]); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_treats_glob_chars_literally() -> Result<()> { + let mut index = index_from_entries(vec![ + ("src[abc]/literal.rs", b""), + ("srca/wildcard.rs", b""), + ("srcb/wildcard.rs", b""), + ("srcc/wildcard.rs", b""), + ("src*/star.rs", b""), + ("srcx/star.rs", b""), + ("src?/question.rs", b""), + ("srcy/question.rs", b""), + ]) + .await?; + + let (files, dirs) = collect_folder_contents(&mut index, Some("src[abc]")).await?; + assert_eq!(files, vec!["literal.rs"]); + assert!(dirs.is_empty()); + + let (files, dirs) = collect_folder_contents(&mut index, Some("src*")).await?; + assert_eq!(files, vec!["star.rs"]); + assert!(dirs.is_empty()); + + let (files, dirs) = collect_folder_contents(&mut index, Some("src?")).await?; + assert_eq!(files, vec!["question.rs"]); + assert!(dirs.is_empty()); + + Ok(()) + } + + #[tokio::test] + async fn list_returns_all_entries() -> Result<()> { + let mut index = index_from_entries(vec![ + ("index.html", b""), + ("src/main.rs", b""), + ("src/lib.rs", b""), + ]) + .await?; + + let mut entries: Vec = index.list().try_collect().await?; + entries.sort_by(|a, b| a.path.cmp(&b.path)); + + let paths: Vec<&Path> = entries.iter().map(|e| e.path.as_path()).collect(); + assert_eq!( + paths, + vec![ + Path::new("index.html"), + Path::new("src/lib.rs"), + Path::new("src/main.rs"), + ] + ); + + Ok(()) + } + + #[tokio::test] + async fn list_empty_archive() -> Result<()> { + let mut index = index_from_entries(vec![]).await?; + let entries: Vec = index.list().try_collect().await?; + assert!(entries.is_empty()); + Ok(()) + } + + #[tokio::test] + async fn list_preserves_range_and_compression() -> Result<()> { + let mut index = index_from_entries(vec![("file.txt", b"hello")]).await?; + let entries: Vec = index.list().try_collect().await?; + + assert_eq!(entries.len(), 1); + // The range should be non-empty and compression should be Bzip2 + // (set by create_archive_from_entries). + let fi = &entries[0]; + assert!(!fi.range().is_empty()); + assert_eq!(fi.compression(), CompressionAlgorithm::Bzip2); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_file_mime_correct() -> Result<()> { + let mut index = index_from_entries(vec![ + ("main.rs", b""), + ("README.md", b""), + ("style.css", b""), + ("data.json", b""), + ("index.html", b""), + ]) + .await?; + + let entries: Vec = index.folder_contents(None::<&Path>).try_collect().await?; + + let mut mime_map: Vec<(&str, String)> = entries + .iter() + .filter_map(|e| match e { + FolderEntry::File(name, mime) => Some((name.as_str(), mime.to_string())), + FolderEntry::Dir(_) => None, + }) + .collect(); + mime_map.sort_by_key(|(name, _)| *name); + + assert_eq!( + mime_map, + vec![ + ("README.md", "text/markdown".to_string()), + ("data.json", "application/json".to_string()), + ("index.html", "text/html".to_string()), + ("main.rs", "text/rust".to_string()), + ("style.css", "text/css".to_string()), + ] + ); + + Ok(()) + } + + #[tokio::test] + async fn folder_contents_dirs_have_no_mime() -> Result<()> { + let mut index = + index_from_entries(vec![("src/main.rs", b""), ("docs/readme.md", b"")]).await?; + + let entries: Vec = index.folder_contents(None::<&Path>).try_collect().await?; + + for entry in &entries { + if let FolderEntry::Dir(_) = entry { + assert!(entry.mime().is_none()); + } + } + + Ok(()) + } } diff --git a/crates/lib/docs_rs_storage/src/file.rs b/crates/lib/docs_rs_storage/src/file.rs index f744022af..b26485d0e 100644 --- a/crates/lib/docs_rs_storage/src/file.rs +++ b/crates/lib/docs_rs_storage/src/file.rs @@ -31,3 +31,151 @@ pub fn file_list_to_json(files: impl IntoIterator) -> Value { .collect(), ) } + +#[derive(Debug, Clone, Eq)] +pub enum FolderEntry { + File(String, Mime), + Dir(String), +} + +impl FolderEntry { + pub fn from_path(path: &str) -> Self { + if let Some((dir, _)) = path.split_once('/') { + Self::Dir(dir.to_string()) + } else { + Self::File(path.to_string(), detect_mime(path)) + } + } + + pub fn name(&self) -> &str { + match self { + FolderEntry::File(name, _) => name, + FolderEntry::Dir(name) => name, + } + } + + pub fn is_dir(&self) -> bool { + matches!(self, Self::Dir(_)) + } + + pub fn mime(&self) -> Option<&Mime> { + match self { + Self::File(_, mime) => Some(mime), + Self::Dir(_) => None, + } + } +} + +impl PartialEq for FolderEntry { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (FolderEntry::File(a, _), FolderEntry::File(b, _)) => a == b, + (FolderEntry::Dir(a), FolderEntry::Dir(b)) => a == b, + _ => false, + } + } +} + +impl PartialOrd for FolderEntry { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for FolderEntry { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + fn cmp_name(a: &str, b: &str) -> std::cmp::Ordering { + a.chars() + .flat_map(char::to_lowercase) + .cmp(b.chars().flat_map(char::to_lowercase)) + .then_with(|| a.cmp(b)) + } + + match (self, other) { + (FolderEntry::Dir(a), FolderEntry::Dir(b)) => cmp_name(a, b), + (FolderEntry::File(a, _), FolderEntry::File(b, _)) => cmp_name(a, b), + (FolderEntry::Dir(_), FolderEntry::File(_, _)) => std::cmp::Ordering::Less, + (FolderEntry::File(_, _), FolderEntry::Dir(_)) => std::cmp::Ordering::Greater, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn file(name: &str) -> FolderEntry { + FolderEntry::File(name.to_string(), detect_mime(name)) + } + + fn dir(name: &str) -> FolderEntry { + FolderEntry::Dir(name.to_string()) + } + + #[test] + fn folder_entry_ordering_sorts_dirs_before_files() { + let mut entries = vec![ + file("apple.txt"), + dir("zulu"), + file("zebra.txt"), + dir("alpha"), + ]; + + entries.sort(); + + assert_eq!( + entries, + vec![ + dir("alpha"), + dir("zulu"), + file("apple.txt"), + file("zebra.txt") + ] + ); + } + + #[test] + fn folder_entry_ordering_is_case_insensitive() { + let mut entries = vec![file("lib.rs"), file("Cargo.toml"), file("apple.txt")]; + + entries.sort(); + + assert_eq!( + entries, + vec![file("apple.txt"), file("Cargo.toml"), file("lib.rs")] + ); + } + + #[test] + fn folder_entry_ordering_dirs_is_case_insensitive() { + let mut entries = vec![dir("src"), dir("Cargo"), dir("apple")]; + + entries.sort(); + + assert_eq!(entries, vec![dir("apple"), dir("Cargo"), dir("src")]); + } + + #[test] + fn folder_entry_ordering_uses_name_tie_breaker() { + let mut entries = vec![file("readme.md"), file("README.md")]; + + entries.sort(); + + assert_eq!(entries, vec![file("README.md"), file("readme.md")]); + } + + #[test] + fn folder_entry_equality_ignores_mime() { + assert_eq!( + FolderEntry::File("same".to_string(), mime::TEXT_PLAIN), + FolderEntry::File("same".to_string(), mime::TEXT_HTML), + ); + } + + #[test] + fn folder_entry_equality_compares_variants_and_names() { + assert_eq!(dir("src"), dir("src")); + assert_ne!(dir("src"), dir("lib")); + assert_ne!(file("src"), dir("src")); + } +} diff --git a/crates/lib/docs_rs_storage/src/lib.rs b/crates/lib/docs_rs_storage/src/lib.rs index 3c70b88b1..5a1f47720 100644 --- a/crates/lib/docs_rs_storage/src/lib.rs +++ b/crates/lib/docs_rs_storage/src/lib.rs @@ -16,8 +16,7 @@ pub use blob::{Blob, BlobUpload, StreamingBlob}; pub use compression::{compress, compress_async, decompress}; pub use config::Config; pub use errors::{PathNotFoundError, SizeLimitReached}; -pub use file::FileEntry; -pub use file::file_list_to_json; +pub use file::{FileEntry, FolderEntry, file_list_to_json}; pub use storage::blocking::Storage; pub use storage::non_blocking::AsyncStorage; pub use types::StorageKind; diff --git a/crates/lib/docs_rs_storage/src/storage/non_blocking.rs b/crates/lib/docs_rs_storage/src/storage/non_blocking.rs index c7cc48bd0..16679735c 100644 --- a/crates/lib/docs_rs_storage/src/storage/non_blocking.rs +++ b/crates/lib/docs_rs_storage/src/storage/non_blocking.rs @@ -2,7 +2,7 @@ use crate::backends::memory::MemoryBackend; use crate::{ Config, - archive_index::{self, ARCHIVE_INDEX_FILE_EXTENSION}, + archive_index::{self, ARCHIVE_INDEX_FILE_EXTENSION, Index}, backends::{StorageBackend, StorageBackendMethods, s3::S3Backend}, blob::{Blob, BlobUpload, StreamingBlob}, compression::{compress, compress_async}, @@ -55,6 +55,16 @@ impl AsyncStorage { &self.config } + pub async fn find_archive_index( + &self, + archive_path: &str, + latest_build_id: Option, + ) -> Result { + self.archive_index_cache + .find_index(archive_path, latest_build_id, self) + .await + } + #[instrument(skip(self))] pub async fn exists(&self, path: &str) -> Result { self.backend.exists(path).await