diff --git a/Cargo.toml b/Cargo.toml index 43136b9..c53e70f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lfspull" -version = "0.3.1" +version = "0.4.1" edition = "2021" license = "MIT" authors = ["Volume Graphics GmbH"] @@ -11,9 +11,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on [dependencies] clap = { version = "4.1", features = ["derive", "env"] } -thiserror = "1" -reqwest = { version="0.11" , features = ["json", "stream"] } -http = "0.2" +thiserror = "2" +reqwest = { version="0.12" , features = ["json", "stream"] } +http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" bytes = "1.4" @@ -30,7 +30,7 @@ futures-util = "0.3.30" tempfile = "3.12" [dev-dependencies] -cucumber = "0.19.1" +cucumber = "0.21" tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] } uuid = { version = "1.2", features = ["serde", "v4"] } diff --git a/README.md b/README.md index 1f5737c..f1b58a7 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,10 @@ The CLI is pretty straight forward. - e.g. 'lfspull -r "**/*.tgz"' downloads all .tgz files this folder and all subfolders - '-b / --random-bytes [RANDOM_BYTES]' for temp file name. See https://docs.rs/tempfile/latest/tempfile/struct.Builder.html#method.rand_bytes - '-a / --access-token [TOKEN]' sets the token - can also be set via $ACCESS_TOKEN from env +- '-m / --max-retry [NUMBER]' max number of download attempts if fail +- '-t / --timeout [NUMBER]' set timeout in seconds for git lfs pull request + - When None given, the timeout is calculated automatically based on lfs object size + - When 0 given, there is no timeout - '-v' for verbose mode ## Library API guide @@ -32,6 +36,17 @@ Please see our docs.rs for example code and the gherkin tests for how to check t ## Changelog +### 0.4.1 + +- add rust-toolchain 1.88 +- read git config for lfs storage path +- add timeout + +### 0.4.0 + +- upgrade a few dependencies +- add retry attempt when failing fetching from git + ### 0.3.1 - fix bug when trying to rename temp file to cache file, but cache file is already created and locked by other parallel job diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..7e8a18c --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "1.88.0" +components = ["clippy"] diff --git a/src/lib.rs b/src/lib.rs index 46f647d..73fb7ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,6 +81,12 @@ pub mod prelude { /// something failed while creating tempfile #[error("TempFile error: {0}")] TempFile(String), + /// all download attempts have failed + #[error("Maximum download attempts reached")] + ReachedMaxDownloadAttempt, + /// Timeout error + #[error("Download failed due to timeout")] + Timeout, } } pub use prelude::FilePullMode; diff --git a/src/main.rs b/src/main.rs index 0e276d0..5db5a79 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,9 +22,19 @@ struct Args { #[clap(short = 'b', long)] random_bytes: Option, + ///max number of retry attempt when http request fails + #[clap(short, long, default_value_t = 3)] + max_retry: u32, + /// Print debug information #[clap(short, long)] verbose: bool, + + ///timeout in seconds for git lfs pull request + ///When None given, the timeout is calculated automatically based on lfs object size + ///When 0 given, there is no timeout + #[clap(short, long)] + timeout: Option, } #[tokio::main] @@ -48,14 +58,26 @@ pub async fn main() -> Result<(), LFSError> { let access_token = args.access_token.as_deref(); if let Some(file) = args.file_to_pull { info!("Single file mode: {}", file.to_string_lossy()); - let result = lfspull::pull_file(file, access_token, args.random_bytes).await?; + let result = lfspull::pull_file( + file, + access_token, + args.max_retry, + args.random_bytes, + args.timeout, + ) + .await?; info!("Result: {}", result); } if let Some(recurse_pattern) = args.recurse_pattern { info!("Glob-recurse mode: {}", &recurse_pattern); - let results = - lfspull::glob_recurse_pull_directory(&recurse_pattern, access_token, args.random_bytes) - .await?; + let results = lfspull::glob_recurse_pull_directory( + &recurse_pattern, + access_token, + args.max_retry, + args.random_bytes, + args.timeout, + ) + .await?; info!("Pulling finished! Listing files and sources: "); results.into_iter().enumerate().for_each(|(id, (n, r))| { diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index e8eb5d4..25c112e 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -6,12 +6,13 @@ use glob::glob; use primitives::get_repo_root; use std::path::{Path, PathBuf}; use tokio::fs; -use tracing::{debug, error, info}; +use tokio::fs::read_to_string; +use tracing::{debug, error, info, warn}; use url::Url; use vg_errortools::{fat_io_wrap_tokio, FatIOError}; async fn get_remote_url_from_file(git_file: impl AsRef) -> Result { - let file_buffer = fat_io_wrap_tokio(git_file, fs::read_to_string).await?; + let file_buffer = fat_io_wrap_tokio(git_file, read_to_string).await?; let remote_url = file_buffer .lines() .find(|&line| line.contains("url")) @@ -20,7 +21,7 @@ async fn get_remote_url_from_file(git_file: impl AsRef) -> Result Result { .host_str() .ok_or(LFSError::InvalidFormat("Url had no valid host"))?; let path = input_url.path(); - Ok(format!("https://{}{}", host, path)) + Ok(format!("https://{host}{path}")) } async fn get_cache_dir>( @@ -86,9 +87,35 @@ async fn get_cache_dir>( let oid_1 = &metadata.oid[0..2]; let oid_2 = &metadata.oid[2..4]; - Ok(get_real_repo_root(repo_root) - .await? - .join(".git") + let mut git_folder = get_real_repo_root(repo_root).await?.join(".git"); + let config = git_folder.join("config"); + if config.exists() { + debug!("Read git config file in {}", config.to_string_lossy()); + let config_content = read_to_string(&config).await.unwrap_or_else(|e| { + warn!("Could not read git config: {e}"); + String::new() + }); + let mut config_content = config_content.lines().peekable(); + + while config_content.peek().is_some() { + let line = config_content.next().unwrap_or_default(); + let line = line.trim(); + if line.contains("[lfs]") { + while config_content.peek().is_some() { + let next_line = config_content.next().unwrap_or_default(); + let next_line = next_line.trim(); + if let Some(storage_url) = next_line.strip_prefix("storage = ") { + debug!("Found git lfs storage path: '{storage_url}'"); + git_folder = PathBuf::from(storage_url); + break; + } + } + break; + } + } + } + + Ok(git_folder .join("lfs") .join("objects") .join(oid_1) @@ -99,8 +126,11 @@ async fn get_file_cached>( repo_root: P, metadata: &primitives::MetaData, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, + timeout: Option, ) -> Result<(PathBuf, FilePullMode), LFSError> { + debug!("version: {}", &metadata.version); let cache_dir = get_cache_dir(&repo_root, metadata).await?; debug!("cache dir {:?}", &cache_dir); let cache_file = cache_dir.join(&metadata.oid); @@ -118,8 +148,15 @@ async fn get_file_cached>( ) })?; - let temp_file = - primitives::download_file(metadata, &repo_url, access_token, randomizer_bytes).await?; + let temp_file = primitives::download_file( + metadata, + &repo_url, + access_token, + max_retry, + randomizer_bytes, + timeout, + ) + .await?; if cache_file.exists() { info!( "cache file {:?} is already written from other process", @@ -159,7 +196,9 @@ async fn get_file_cached>( pub async fn pull_file>( lfs_file: P, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, + timeout: Option, ) -> Result { info!("Pulling file {}", lfs_file.as_ref().to_string_lossy()); if !primitives::is_lfs_node_file(&lfs_file).await? { @@ -174,10 +213,17 @@ pub async fn pull_file>( let metadata = primitives::parse_lfs_file(&lfs_file).await?; debug!("Downloading file"); let repo_root = get_repo_root(&lfs_file).await.map_err(|e| { - LFSError::DirectoryTraversalError(format!("Could not find git repo root: {:?}", e)) + LFSError::DirectoryTraversalError(format!("Could not find git repo root: {e:?}")) })?; - let (file_name_cached, origin) = - get_file_cached(&repo_root, &metadata, access_token, randomizer_bytes).await?; + let (file_name_cached, origin) = get_file_cached( + &repo_root, + &metadata, + access_token, + max_retry, + randomizer_bytes, + timeout, + ) + .await?; info!( "Found file (Origin: {:?}), linking to {}", origin, @@ -194,11 +240,11 @@ fn glob_recurse(wildcard_pattern: &str) -> Result, LFSError> { let mut return_vec = Vec::new(); let glob = glob(wildcard_pattern).map_err(|e| { - LFSError::DirectoryTraversalError(format!("Could not parse glob pattern: {}", e)) + LFSError::DirectoryTraversalError(format!("Could not parse glob pattern: {e}")) })?; for entry in glob { return_vec.push(entry.map_err(|e| { - LFSError::DirectoryTraversalError(format!("Error in glob result list: {}", e)) + LFSError::DirectoryTraversalError(format!("Error in glob result list: {e}")) })?); } Ok(return_vec) @@ -212,26 +258,30 @@ fn glob_recurse(wildcard_pattern: &str) -> Result, LFSError> { /// /// * `access_token` - the token for Bearer-Auth via HTTPS /// +/// * `max retry` - max number of retry attempt when http request fails +/// /// * `randomizer bytes` - bytes used to create a randomized named temp file /// /// # Examples /// /// Load all .jpg files from all subdirectories /// ```no_run -/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), Some(5)); +/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), 3, Some(5), Some(0)); /// ``` /// pub async fn glob_recurse_pull_directory( wildcard_pattern: &str, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, + timeout: Option, ) -> Result, LFSError> { let mut result_vec = Vec::new(); let files = glob_recurse(wildcard_pattern)?; for path in files { result_vec.push(( path.to_string_lossy().to_string(), - pull_file(&path, access_token, randomizer_bytes).await?, + pull_file(&path, access_token, max_retry, randomizer_bytes, timeout).await?, )); } diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index 3928c14..bc56b97 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -10,9 +10,11 @@ use std::convert::TryInto; use std::io::Write; use std::path::Path; use std::path::PathBuf; +use std::time::Duration; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; +use tokio::time::{sleep, timeout}; use tracing::{debug, error, info}; use url::Url; use vg_errortools::{fat_io_wrap_tokio, FatIOError}; @@ -119,7 +121,7 @@ fn url_with_auth(url: &str, access_token: Option<&str>) -> Result Ok(url) } -pub async fn download_file( +pub async fn handle_download( meta_data: &MetaData, repo_remote_url: &str, access_token: Option<&str>, @@ -148,7 +150,7 @@ pub async fn download_file( .await?; if !response.status().is_success() { let status = response.status(); - println!( + error!( "Failed to request git lfs actions with status code {} and body {}", status, response.text().await?, @@ -156,7 +158,7 @@ pub async fn download_file( return if status == StatusCode::FORBIDDEN || status == StatusCode::UNAUTHORIZED { Err(LFSError::AccessDenied) } else { - Err(LFSError::ResponseNotOkay(format!("{}", status))) + Err(LFSError::ResponseNotOkay(format!("{status}"))) }; } let parsed_result = response.json::().await?; @@ -237,6 +239,73 @@ pub async fn download_file( } } +pub async fn download_file( + meta_data: &MetaData, + repo_remote_url: &str, + access_token: Option<&str>, + max_retry: u32, + randomizer_bytes: Option, + connection_timeout: Option, +) -> Result { + let effective_timeout = get_effective_timeout(connection_timeout, meta_data.size); + for attempt in 1..=max_retry { + debug!("Download attempt {attempt}"); + let download = handle_download(meta_data, repo_remote_url, access_token, randomizer_bytes); + let result = if let Some(seconds) = effective_timeout { + timeout(Duration::from_secs(seconds), download).await + } else { + Ok(download.await) + }; + + match result { + Ok(download_result) => match download_result { + Ok(tempfile) => { + return Ok(tempfile); + } + Err(e) => { + if matches!(e, LFSError::AccessDenied) { + return Err(e); + } + error!("Download error: {e}"); + } + }, + Err(timeout_err) => { + error!("Timeout reached: {timeout_err}"); + } + } + sleep(Duration::from_secs(1)).await; + } + + Err(LFSError::ReachedMaxDownloadAttempt) +} + +/// Some(0) => no timeout +/// Some(x) => x seconds timeout +/// None => automatic +fn get_effective_timeout(timeout: Option, file_size_in_kb: usize) -> Option { + match timeout { + Some(0) => { + debug!("No timeout"); + None + } + Some(val) => { + debug!("Set timeout to {val} s"); + Some(val) + } + None => { + let min_upload_speed_mb_per_sec = 1.0; + let min_timeout_secs = 30; + let file_size_mb = file_size_in_kb as f64 / (1024.0 * 1024.0); + let timeout_secs = (file_size_mb / min_upload_speed_mb_per_sec).ceil() as u64; + let timeout_secs = timeout_secs.max(min_timeout_secs); + + debug!("Automatic calculated timeout: {timeout_secs} s"); + + Some(timeout_secs) + } + } +} + pub async fn is_lfs_node_file>(path: P) -> Result { if path.as_ref().is_dir() { return Ok(false); @@ -314,7 +383,7 @@ size 226848"#; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn try_pull_from_demo_repo() { let parsed = parse_lfs_string(LFS_TEST_DATA).expect("Could not parse demo-string!"); - let temp_file = download_file(&parsed, URL, None, None) + let temp_file = download_file(&parsed, URL, None, 3, None, Some(0)) .await .expect("could not download file"); let temp_size = temp_file @@ -348,4 +417,19 @@ size 226848"#; .expect("File was not readable"); assert!(!result); } + + #[tokio::test(flavor = "multi_thread", worker_threads = 1)] + async fn test_get_effective_timeout() { + let timeout = get_effective_timeout(Some(9), 1000); + assert_eq!(timeout, Some(9)); + + let timeout = get_effective_timeout(Some(0), 1000); + assert_eq!(timeout, None); + + let timeout = get_effective_timeout(None, 1000); + assert_eq!(timeout, Some(30)); + + let timeout = get_effective_timeout(None, 200000000); + assert_eq!(timeout, Some(191)); + } } diff --git a/tests/lfspull.rs b/tests/lfspull.rs index 72c2970..a3b884b 100644 --- a/tests/lfspull.rs +++ b/tests/lfspull.rs @@ -55,7 +55,7 @@ async fn pull_file_step(world: &mut LFSWorld) { .clone() .join(TEST_LFS_FILE_NAME); world.pull_result = Some( - lfspull::pull_file(file_path, None, Some(5)) + lfspull::pull_file(file_path, None, 3, Some(5), Some(0)) .await .expect("Could not pull file"), ); @@ -65,7 +65,7 @@ async fn pull_file_step(world: &mut LFSWorld) { async fn pull_directory(world: &mut LFSWorld) { let fake_repo = world.current_fake_repo.as_ref().unwrap().to_string_lossy(); let pattern = format!("{}/**/*", fake_repo); - let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, Some(5)) + let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, 3, Some(5), Some(0)) .await .expect("Could not pull directory") .into_iter()