From 1e9506720101616d02fbf00bbaa202979a5a92fd Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Wed, 6 Aug 2025 11:20:25 +0200 Subject: [PATCH 1/5] - upgrade a few dependencies --- Cargo.toml | 10 +++++----- README.md | 5 +++++ src/repo_tools/mod.rs | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 43136b9..ce42b57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lfspull" -version = "0.3.1" +version = "0.4.0" edition = "2021" license = "MIT" authors = ["Volume Graphics GmbH"] @@ -11,9 +11,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on [dependencies] clap = { version = "4.1", features = ["derive", "env"] } -thiserror = "1" -reqwest = { version="0.11" , features = ["json", "stream"] } -http = "0.2" +thiserror = "2" +reqwest = { version="0.12" , features = ["json", "stream"] } +http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" bytes = "1.4" @@ -30,7 +30,7 @@ futures-util = "0.3.30" tempfile = "3.12" [dev-dependencies] -cucumber = "0.19.1" +cucumber = "0.21" tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] } uuid = { version = "1.2", features = ["serde", "v4"] } diff --git a/README.md b/README.md index 1f5737c..10cdda4 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,11 @@ Please see our docs.rs for example code and the gherkin tests for how to check t ## Changelog +### 0.4.0 + +- upgrade a few dependencies +- add retry fetching from git + ### 0.3.1 - fix bug when trying to rename temp file to cache file, but cache file is already created and locked by other parallel job diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index e8eb5d4..4f023d1 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -31,7 +31,7 @@ async fn get_real_repo_root>(repo_path: P) -> Result>( access_token: Option<&str>, randomizer_bytes: Option, ) -> Result<(PathBuf, FilePullMode), LFSError> { + debug!("version: {}", &metadata.version); let cache_dir = get_cache_dir(&repo_root, metadata).await?; debug!("cache dir {:?}", &cache_dir); let cache_file = cache_dir.join(&metadata.oid); From c4aeb610eed6203baf70c4ff3f88ccfd83c0ebc2 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Thu, 7 Aug 2025 07:36:28 +0200 Subject: [PATCH 2/5] - use reqwest middleware and retry --- Cargo.toml | 4 +++- src/lib.rs | 3 +++ src/repo_tools/primitives.rs | 21 ++++++++++++++++++++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ce42b57..0e1bd6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on [dependencies] clap = { version = "4.1", features = ["derive", "env"] } thiserror = "2" -reqwest = { version="0.12" , features = ["json", "stream"] } +reqwest = { version="0.12" , features = ["stream"] } +reqwest-retry = { version="0.7" } +reqwest-middleware = { version="0.4" , features = ["json"] } http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" diff --git a/src/lib.rs b/src/lib.rs index 46f647d..f244aa0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,9 @@ pub mod prelude { /// Forward from the `reqwest` package, something failed while executing the fetch #[error("Request-error: {0}")] RequestError(#[from] reqwest::Error), + /// Forward from the `reqwest-middleware` package, something failed while executing the fetch + #[error("Request-middleware-error: {0}")] + RequestMiddlewareError(#[from] reqwest_middleware::Error), /// You tried to pull a non-existing file from the remote #[error("Remote file not found: {0}")] RemoteFileNotFound(&'static str), diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index 3928c14..ffa5b40 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -2,6 +2,8 @@ use crate::prelude::*; use futures_util::stream::StreamExt; use http::StatusCode; use reqwest::Client; +use reqwest_middleware::ClientBuilder; +use reqwest_retry::{policies::ExponentialBackoff, Jitter, RetryTransientMiddleware}; use serde::{Deserialize, Serialize}; use serde_json::json; use sha2::{Digest, Sha256}; @@ -10,6 +12,7 @@ use std::convert::TryInto; use std::io::Write; use std::path::Path; use std::path::PathBuf; +use std::time::Duration; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; @@ -126,7 +129,7 @@ pub async fn download_file( randomizer_bytes: Option, ) -> Result { const MEDIA_TYPE: &str = "application/vnd.git-lfs+json"; - let client = Client::builder().build()?; + assert_eq!(meta_data.hash, Some(Hash::SHA256)); // we are implementing git-lfs batch API here: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md let request = json!({ @@ -137,8 +140,23 @@ pub async fn download_file( "hash_algo": "sha256" }); + let retry_policy = ExponentialBackoff::builder() + .retry_bounds(Duration::from_secs(1), Duration::from_secs(10)) + .base(1) + .jitter(Jitter::None) + .build_with_max_retries(3); + + debug!("Retry policy: {:?}", retry_policy); + + let client = Client::builder().build()?; + let client = ClientBuilder::new(client) + // Retry failed requests. + .with(RetryTransientMiddleware::new_with_policy(retry_policy)) + .build(); + let request_url = repo_remote_url.to_owned() + "/info/lfs/objects/batch"; let request_url = url_with_auth(&request_url, access_token)?; + let response = client .post(request_url.clone()) .header("Accept", MEDIA_TYPE) @@ -146,6 +164,7 @@ pub async fn download_file( .json(&request) .send() .await?; + if !response.status().is_success() { let status = response.status(); println!( From c72a75c259f55f96ce1f943fdc876851c15b4fa8 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Thu, 7 Aug 2025 09:47:47 +0200 Subject: [PATCH 3/5] - add reqwest_tracing --- Cargo.toml | 3 ++- src/repo_tools/primitives.rs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0e1bd6e..59591c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,8 +13,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on clap = { version = "4.1", features = ["derive", "env"] } thiserror = "2" reqwest = { version="0.12" , features = ["stream"] } -reqwest-retry = { version="0.7" } +reqwest-retry = { version="0.7", features = ["tracing"] } reqwest-middleware = { version="0.4" , features = ["json"] } +reqwest-tracing = "0.5" http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index ffa5b40..eb04638 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -13,6 +13,7 @@ use std::io::Write; use std::path::Path; use std::path::PathBuf; use std::time::Duration; +use reqwest_tracing::TracingMiddleware; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; @@ -150,6 +151,7 @@ pub async fn download_file( let client = Client::builder().build()?; let client = ClientBuilder::new(client) + .with(TracingMiddleware::default()) // Retry failed requests. .with(RetryTransientMiddleware::new_with_policy(retry_policy)) .build(); From ad9defb540aa75d113fb1eb7692665bcd1356c16 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Thu, 7 Aug 2025 10:43:09 +0200 Subject: [PATCH 4/5] - add arguments for max-retry-attempt --- README.md | 2 +- src/main.rs | 17 +++++++++++++---- src/repo_tools/mod.rs | 29 +++++++++++++++++++++++------ src/repo_tools/primitives.rs | 7 ++++--- tests/lfspull.rs | 4 ++-- 5 files changed, 43 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 10cdda4..5b6d6bf 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Please see our docs.rs for example code and the gherkin tests for how to check t ### 0.4.0 - upgrade a few dependencies -- add retry fetching from git +- add retry attempt when failing fetching from git ### 0.3.1 diff --git a/src/main.rs b/src/main.rs index 0e276d0..b5b474d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,6 +22,10 @@ struct Args { #[clap(short = 'b', long)] random_bytes: Option, + ///max number of retry attempt when http request fails + #[clap(short, long, default_value_t = 3)] + max_retry: u32, + /// Print debug information #[clap(short, long)] verbose: bool, @@ -48,14 +52,19 @@ pub async fn main() -> Result<(), LFSError> { let access_token = args.access_token.as_deref(); if let Some(file) = args.file_to_pull { info!("Single file mode: {}", file.to_string_lossy()); - let result = lfspull::pull_file(file, access_token, args.random_bytes).await?; + let result = + lfspull::pull_file(file, access_token, args.max_retry, args.random_bytes).await?; info!("Result: {}", result); } if let Some(recurse_pattern) = args.recurse_pattern { info!("Glob-recurse mode: {}", &recurse_pattern); - let results = - lfspull::glob_recurse_pull_directory(&recurse_pattern, access_token, args.random_bytes) - .await?; + let results = lfspull::glob_recurse_pull_directory( + &recurse_pattern, + access_token, + args.max_retry, + args.random_bytes, + ) + .await?; info!("Pulling finished! Listing files and sources: "); results.into_iter().enumerate().for_each(|(id, (n, r))| { diff --git a/src/repo_tools/mod.rs b/src/repo_tools/mod.rs index 4f023d1..9ac54e0 100644 --- a/src/repo_tools/mod.rs +++ b/src/repo_tools/mod.rs @@ -99,6 +99,7 @@ async fn get_file_cached>( repo_root: P, metadata: &primitives::MetaData, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result<(PathBuf, FilePullMode), LFSError> { debug!("version: {}", &metadata.version); @@ -119,8 +120,14 @@ async fn get_file_cached>( ) })?; - let temp_file = - primitives::download_file(metadata, &repo_url, access_token, randomizer_bytes).await?; + let temp_file = primitives::download_file( + metadata, + &repo_url, + access_token, + max_retry, + randomizer_bytes, + ) + .await?; if cache_file.exists() { info!( "cache file {:?} is already written from other process", @@ -160,6 +167,7 @@ async fn get_file_cached>( pub async fn pull_file>( lfs_file: P, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result { info!("Pulling file {}", lfs_file.as_ref().to_string_lossy()); @@ -177,8 +185,14 @@ pub async fn pull_file>( let repo_root = get_repo_root(&lfs_file).await.map_err(|e| { LFSError::DirectoryTraversalError(format!("Could not find git repo root: {:?}", e)) })?; - let (file_name_cached, origin) = - get_file_cached(&repo_root, &metadata, access_token, randomizer_bytes).await?; + let (file_name_cached, origin) = get_file_cached( + &repo_root, + &metadata, + access_token, + max_retry, + randomizer_bytes, + ) + .await?; info!( "Found file (Origin: {:?}), linking to {}", origin, @@ -213,18 +227,21 @@ fn glob_recurse(wildcard_pattern: &str) -> Result, LFSError> { /// /// * `access_token` - the token for Bearer-Auth via HTTPS /// +/// * `max retry` - max number of retry attempt when http request fails +/// /// * `randomizer bytes` - bytes used to create a randomized named temp file /// /// # Examples /// /// Load all .jpg files from all subdirectories /// ```no_run -/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), Some(5)); +/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), 3, Some(5)); /// ``` /// pub async fn glob_recurse_pull_directory( wildcard_pattern: &str, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result, LFSError> { let mut result_vec = Vec::new(); @@ -232,7 +249,7 @@ pub async fn glob_recurse_pull_directory( for path in files { result_vec.push(( path.to_string_lossy().to_string(), - pull_file(&path, access_token, randomizer_bytes).await?, + pull_file(&path, access_token, max_retry, randomizer_bytes).await?, )); } diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index eb04638..6f81052 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -4,6 +4,7 @@ use http::StatusCode; use reqwest::Client; use reqwest_middleware::ClientBuilder; use reqwest_retry::{policies::ExponentialBackoff, Jitter, RetryTransientMiddleware}; +use reqwest_tracing::TracingMiddleware; use serde::{Deserialize, Serialize}; use serde_json::json; use sha2::{Digest, Sha256}; @@ -13,7 +14,6 @@ use std::io::Write; use std::path::Path; use std::path::PathBuf; use std::time::Duration; -use reqwest_tracing::TracingMiddleware; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; @@ -127,6 +127,7 @@ pub async fn download_file( meta_data: &MetaData, repo_remote_url: &str, access_token: Option<&str>, + max_retry: u32, randomizer_bytes: Option, ) -> Result { const MEDIA_TYPE: &str = "application/vnd.git-lfs+json"; @@ -145,7 +146,7 @@ pub async fn download_file( .retry_bounds(Duration::from_secs(1), Duration::from_secs(10)) .base(1) .jitter(Jitter::None) - .build_with_max_retries(3); + .build_with_max_retries(max_retry); debug!("Retry policy: {:?}", retry_policy); @@ -335,7 +336,7 @@ size 226848"#; #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn try_pull_from_demo_repo() { let parsed = parse_lfs_string(LFS_TEST_DATA).expect("Could not parse demo-string!"); - let temp_file = download_file(&parsed, URL, None, None) + let temp_file = download_file(&parsed, URL, None, 3, None) .await .expect("could not download file"); let temp_size = temp_file diff --git a/tests/lfspull.rs b/tests/lfspull.rs index 72c2970..51ebeaf 100644 --- a/tests/lfspull.rs +++ b/tests/lfspull.rs @@ -55,7 +55,7 @@ async fn pull_file_step(world: &mut LFSWorld) { .clone() .join(TEST_LFS_FILE_NAME); world.pull_result = Some( - lfspull::pull_file(file_path, None, Some(5)) + lfspull::pull_file(file_path, None, 3, Some(5)) .await .expect("Could not pull file"), ); @@ -65,7 +65,7 @@ async fn pull_file_step(world: &mut LFSWorld) { async fn pull_directory(world: &mut LFSWorld) { let fake_repo = world.current_fake_repo.as_ref().unwrap().to_string_lossy(); let pattern = format!("{}/**/*", fake_repo); - let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, Some(5)) + let recurse_pull = lfspull::glob_recurse_pull_directory(&pattern, None, 3, Some(5)) .await .expect("Could not pull directory") .into_iter() From 2e53b60b5ec80fe6e76b6d252d6e0b01dd1b9ec2 Mon Sep 17 00:00:00 2001 From: Adinata Wijaya Date: Tue, 19 Aug 2025 15:08:18 +0200 Subject: [PATCH 5/5] - remove reqwest middleware and co crates. - implements our own retry --- Cargo.toml | 5 +--- src/lib.rs | 6 ++-- src/repo_tools/primitives.rs | 54 ++++++++++++++++++++---------------- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 59591c4..ce42b57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,10 +12,7 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on [dependencies] clap = { version = "4.1", features = ["derive", "env"] } thiserror = "2" -reqwest = { version="0.12" , features = ["stream"] } -reqwest-retry = { version="0.7", features = ["tracing"] } -reqwest-middleware = { version="0.4" , features = ["json"] } -reqwest-tracing = "0.5" +reqwest = { version="0.12" , features = ["json", "stream"] } http = "1.3" serde = {version ="1.0", features=['derive']} serde_json = "1.0" diff --git a/src/lib.rs b/src/lib.rs index f244aa0..d0ebc87 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,9 +53,6 @@ pub mod prelude { /// Forward from the `reqwest` package, something failed while executing the fetch #[error("Request-error: {0}")] RequestError(#[from] reqwest::Error), - /// Forward from the `reqwest-middleware` package, something failed while executing the fetch - #[error("Request-middleware-error: {0}")] - RequestMiddlewareError(#[from] reqwest_middleware::Error), /// You tried to pull a non-existing file from the remote #[error("Remote file not found: {0}")] RemoteFileNotFound(&'static str), @@ -84,6 +81,9 @@ pub mod prelude { /// something failed while creating tempfile #[error("TempFile error: {0}")] TempFile(String), + /// all download attempts have failed + #[error("Maximum download attempts reached")] + ReachedMaxDownloadAttempt, } } pub use prelude::FilePullMode; diff --git a/src/repo_tools/primitives.rs b/src/repo_tools/primitives.rs index 6f81052..a1b5932 100644 --- a/src/repo_tools/primitives.rs +++ b/src/repo_tools/primitives.rs @@ -2,9 +2,6 @@ use crate::prelude::*; use futures_util::stream::StreamExt; use http::StatusCode; use reqwest::Client; -use reqwest_middleware::ClientBuilder; -use reqwest_retry::{policies::ExponentialBackoff, Jitter, RetryTransientMiddleware}; -use reqwest_tracing::TracingMiddleware; use serde::{Deserialize, Serialize}; use serde_json::json; use sha2::{Digest, Sha256}; @@ -17,6 +14,7 @@ use std::time::Duration; use tempfile::NamedTempFile; use tokio::fs; use tokio::io::AsyncReadExt; +use tokio::time::sleep; use tracing::{debug, error, info}; use url::Url; use vg_errortools::{fat_io_wrap_tokio, FatIOError}; @@ -123,15 +121,14 @@ fn url_with_auth(url: &str, access_token: Option<&str>) -> Result Ok(url) } -pub async fn download_file( +pub async fn handle_download( meta_data: &MetaData, repo_remote_url: &str, access_token: Option<&str>, - max_retry: u32, randomizer_bytes: Option, ) -> Result { const MEDIA_TYPE: &str = "application/vnd.git-lfs+json"; - + let client = Client::builder().build()?; assert_eq!(meta_data.hash, Some(Hash::SHA256)); // we are implementing git-lfs batch API here: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md let request = json!({ @@ -142,24 +139,8 @@ pub async fn download_file( "hash_algo": "sha256" }); - let retry_policy = ExponentialBackoff::builder() - .retry_bounds(Duration::from_secs(1), Duration::from_secs(10)) - .base(1) - .jitter(Jitter::None) - .build_with_max_retries(max_retry); - - debug!("Retry policy: {:?}", retry_policy); - - let client = Client::builder().build()?; - let client = ClientBuilder::new(client) - .with(TracingMiddleware::default()) - // Retry failed requests. - .with(RetryTransientMiddleware::new_with_policy(retry_policy)) - .build(); - let request_url = repo_remote_url.to_owned() + "/info/lfs/objects/batch"; let request_url = url_with_auth(&request_url, access_token)?; - let response = client .post(request_url.clone()) .header("Accept", MEDIA_TYPE) @@ -167,10 +148,9 @@ pub async fn download_file( .json(&request) .send() .await?; - if !response.status().is_success() { let status = response.status(); - println!( + error!( "Failed to request git lfs actions with status code {} and body {}", status, response.text().await?, @@ -259,6 +239,32 @@ pub async fn download_file( } } +pub async fn download_file( + meta_data: &MetaData, + repo_remote_url: &str, + access_token: Option<&str>, + max_retry: u32, + randomizer_bytes: Option, +) -> Result { + for attempt in 1..=max_retry { + debug!("Download attempt {attempt}"); + match handle_download(meta_data, repo_remote_url, access_token, randomizer_bytes).await { + Ok(tempfile) => { + return Ok(tempfile); + } + Err(e) => { + if matches!(e, LFSError::AccessDenied) { + return Err(e); + } + error!("Download error: {e}. Attempting another download: {attempt}"); + sleep(Duration::from_secs(1)).await; + } + } + } + + Err(LFSError::ReachedMaxDownloadAttempt) +} + pub async fn is_lfs_node_file>(path: P) -> Result { if path.as_ref().is_dir() { return Ok(false);