Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lfspull"
version = "0.3.1"
version = "0.4.1"
edition = "2021"
license = "MIT"
authors = ["Volume Graphics GmbH"]
Expand All @@ -11,9 +11,9 @@ description = "A simple git lfs file pulling implementation in pure rust. Can on

[dependencies]
clap = { version = "4.1", features = ["derive", "env"] }
thiserror = "1"
reqwest = { version="0.11" , features = ["json", "stream"] }
http = "0.2"
thiserror = "2"
reqwest = { version="0.12" , features = ["json", "stream"] }
http = "1.3"
serde = {version ="1.0", features=['derive']}
serde_json = "1.0"
bytes = "1.4"
Expand All @@ -30,7 +30,7 @@ futures-util = "0.3.30"
tempfile = "3.12"

[dev-dependencies]
cucumber = "0.19.1"
cucumber = "0.21"
tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] }
uuid = { version = "1.2", features = ["serde", "v4"] }

Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ The CLI is pretty straight forward.
- e.g. 'lfspull -r "**/*.tgz"' downloads all .tgz files this folder and all subfolders
- '-b / --random-bytes [RANDOM_BYTES]' for temp file name. See https://docs.rs/tempfile/latest/tempfile/struct.Builder.html#method.rand_bytes
- '-a / --access-token [TOKEN]' sets the token - can also be set via $ACCESS_TOKEN from env
- '-m / --max-retry [NUMBER]' max number of download attempts if fail
- '-t / --timeout [NUMBER]' set timeout in seconds for git lfs pull request
- When None given, the timeout is calculated automatically based on lfs object size
- When 0 given, there is no timeout
- '-v' for verbose mode

## Library API guide
Expand All @@ -32,6 +36,17 @@ Please see our docs.rs for example code and the gherkin tests for how to check t

## Changelog

### 0.4.1

- add rust-toolchain 1.88
- read git config for lfs storage path
- add timeout

### 0.4.0

- upgrade a few dependencies
- add retry attempt when failing fetching from git

### 0.3.1

- fix bug when trying to rename temp file to cache file, but cache file is already created and locked by other parallel job
Expand Down
3 changes: 3 additions & 0 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[toolchain]
channel = "1.88.0"
components = ["clippy"]
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ pub mod prelude {
/// something failed while creating tempfile
#[error("TempFile error: {0}")]
TempFile(String),
/// all download attempts have failed
#[error("Maximum download attempts reached")]
ReachedMaxDownloadAttempt,
/// Timeout error
#[error("Download failed due to timeout")]
Timeout,
}
}
pub use prelude::FilePullMode;
Expand Down
30 changes: 26 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,19 @@ struct Args {
#[clap(short = 'b', long)]
random_bytes: Option<usize>,

///max number of retry attempt when http request fails
#[clap(short, long, default_value_t = 3)]
max_retry: u32,

/// Print debug information
#[clap(short, long)]
verbose: bool,

///timeout in seconds for git lfs pull request
///When None given, the timeout is calculated automatically based on lfs object size
///When 0 given, there is no timeout
#[clap(short, long)]
timeout: Option<u64>,
}

#[tokio::main]
Expand All @@ -48,14 +58,26 @@ pub async fn main() -> Result<(), LFSError> {
let access_token = args.access_token.as_deref();
if let Some(file) = args.file_to_pull {
info!("Single file mode: {}", file.to_string_lossy());
let result = lfspull::pull_file(file, access_token, args.random_bytes).await?;
let result = lfspull::pull_file(
file,
access_token,
args.max_retry,
args.random_bytes,
args.timeout,
)
.await?;
info!("Result: {}", result);
}
if let Some(recurse_pattern) = args.recurse_pattern {
info!("Glob-recurse mode: {}", &recurse_pattern);
let results =
lfspull::glob_recurse_pull_directory(&recurse_pattern, access_token, args.random_bytes)
.await?;
let results = lfspull::glob_recurse_pull_directory(
&recurse_pattern,
access_token,
args.max_retry,
args.random_bytes,
args.timeout,
)
.await?;
info!("Pulling finished! Listing files and sources: ");

results.into_iter().enumerate().for_each(|(id, (n, r))| {
Expand Down
84 changes: 67 additions & 17 deletions src/repo_tools/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ use glob::glob;
use primitives::get_repo_root;
use std::path::{Path, PathBuf};
use tokio::fs;
use tracing::{debug, error, info};
use tokio::fs::read_to_string;
use tracing::{debug, error, info, warn};
use url::Url;
use vg_errortools::{fat_io_wrap_tokio, FatIOError};

async fn get_remote_url_from_file(git_file: impl AsRef<Path>) -> Result<String, LFSError> {
let file_buffer = fat_io_wrap_tokio(git_file, fs::read_to_string).await?;
let file_buffer = fat_io_wrap_tokio(git_file, read_to_string).await?;
let remote_url = file_buffer
.lines()
.find(|&line| line.contains("url"))
Expand All @@ -20,7 +21,7 @@ async fn get_remote_url_from_file(git_file: impl AsRef<Path>) -> Result<String,
".git/config contains no remote url",
))?
.split('=')
.last()
.next_back()
.as_ref()
.ok_or(LFSError::InvalidFormat(".git/config url line malformed"))?
.trim();
Expand All @@ -31,7 +32,7 @@ async fn get_real_repo_root<P: AsRef<Path>>(repo_path: P) -> Result<PathBuf, LFS
let git_path = repo_path.as_ref().join(".git");
let real_git_path = if repo_path.as_ref().join(".git").is_file() {
//worktree case
let worktree_file_contents = fat_io_wrap_tokio(git_path, tokio::fs::read_to_string).await?;
let worktree_file_contents = fat_io_wrap_tokio(git_path, read_to_string).await?;
let worktree_path = worktree_file_contents
.split(':')
.find(|c| c.contains(".git"))
Expand Down Expand Up @@ -76,7 +77,7 @@ fn remote_url_ssh_to_https(repo_url: String) -> Result<String, LFSError> {
.host_str()
.ok_or(LFSError::InvalidFormat("Url had no valid host"))?;
let path = input_url.path();
Ok(format!("https://{}{}", host, path))
Ok(format!("https://{host}{path}"))
}

async fn get_cache_dir<P: AsRef<Path>>(
Expand All @@ -86,9 +87,35 @@ async fn get_cache_dir<P: AsRef<Path>>(
let oid_1 = &metadata.oid[0..2];
let oid_2 = &metadata.oid[2..4];

Ok(get_real_repo_root(repo_root)
.await?
.join(".git")
let mut git_folder = get_real_repo_root(repo_root).await?.join(".git");
let config = git_folder.join("config");
if config.exists() {
debug!("Read git config file in {}", config.to_string_lossy());
let config_content = read_to_string(&config).await.unwrap_or_else(|e| {
warn!("Could not read git config: {e}");
String::new()
});
let mut config_content = config_content.lines().peekable();

while config_content.peek().is_some() {
let line = config_content.next().unwrap_or_default();
let line = line.trim();
if line.contains("[lfs]") {
while config_content.peek().is_some() {
let next_line = config_content.next().unwrap_or_default();
let next_line = next_line.trim();
if let Some(storage_url) = next_line.strip_prefix("storage = ") {
debug!("Found git lfs storage path: '{storage_url}'");
git_folder = PathBuf::from(storage_url);
break;
}
}
break;
}
}
}

Ok(git_folder
.join("lfs")
.join("objects")
.join(oid_1)
Expand All @@ -99,8 +126,11 @@ async fn get_file_cached<P: AsRef<Path>>(
repo_root: P,
metadata: &primitives::MetaData,
access_token: Option<&str>,
max_retry: u32,
randomizer_bytes: Option<usize>,
timeout: Option<u64>,
) -> Result<(PathBuf, FilePullMode), LFSError> {
debug!("version: {}", &metadata.version);
let cache_dir = get_cache_dir(&repo_root, metadata).await?;
debug!("cache dir {:?}", &cache_dir);
let cache_file = cache_dir.join(&metadata.oid);
Expand All @@ -118,8 +148,15 @@ async fn get_file_cached<P: AsRef<Path>>(
)
})?;

let temp_file =
primitives::download_file(metadata, &repo_url, access_token, randomizer_bytes).await?;
let temp_file = primitives::download_file(
metadata,
&repo_url,
access_token,
max_retry,
randomizer_bytes,
timeout,
)
.await?;
if cache_file.exists() {
info!(
"cache file {:?} is already written from other process",
Expand Down Expand Up @@ -159,7 +196,9 @@ async fn get_file_cached<P: AsRef<Path>>(
pub async fn pull_file<P: AsRef<Path>>(
lfs_file: P,
access_token: Option<&str>,
max_retry: u32,
randomizer_bytes: Option<usize>,
timeout: Option<u64>,
) -> Result<FilePullMode, LFSError> {
info!("Pulling file {}", lfs_file.as_ref().to_string_lossy());
if !primitives::is_lfs_node_file(&lfs_file).await? {
Expand All @@ -174,10 +213,17 @@ pub async fn pull_file<P: AsRef<Path>>(
let metadata = primitives::parse_lfs_file(&lfs_file).await?;
debug!("Downloading file");
let repo_root = get_repo_root(&lfs_file).await.map_err(|e| {
LFSError::DirectoryTraversalError(format!("Could not find git repo root: {:?}", e))
LFSError::DirectoryTraversalError(format!("Could not find git repo root: {e:?}"))
})?;
let (file_name_cached, origin) =
get_file_cached(&repo_root, &metadata, access_token, randomizer_bytes).await?;
let (file_name_cached, origin) = get_file_cached(
&repo_root,
&metadata,
access_token,
max_retry,
randomizer_bytes,
timeout,
)
.await?;
info!(
"Found file (Origin: {:?}), linking to {}",
origin,
Expand All @@ -194,11 +240,11 @@ fn glob_recurse(wildcard_pattern: &str) -> Result<Vec<PathBuf>, LFSError> {
let mut return_vec = Vec::new();

let glob = glob(wildcard_pattern).map_err(|e| {
LFSError::DirectoryTraversalError(format!("Could not parse glob pattern: {}", e))
LFSError::DirectoryTraversalError(format!("Could not parse glob pattern: {e}"))
})?;
for entry in glob {
return_vec.push(entry.map_err(|e| {
LFSError::DirectoryTraversalError(format!("Error in glob result list: {}", e))
LFSError::DirectoryTraversalError(format!("Error in glob result list: {e}"))
})?);
}
Ok(return_vec)
Expand All @@ -212,26 +258,30 @@ fn glob_recurse(wildcard_pattern: &str) -> Result<Vec<PathBuf>, LFSError> {
///
/// * `access_token` - the token for Bearer-Auth via HTTPS
///
/// * `max retry` - max number of retry attempt when http request fails
///
/// * `randomizer bytes` - bytes used to create a randomized named temp file
///
/// # Examples
///
/// Load all .jpg files from all subdirectories
/// ```no_run
/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), Some(5));
/// let result = lfspull::glob_recurse_pull_directory("dir/to/pull/**/*.jpg", Some("secret-token"), 3, Some(5), Some(0));
/// ```
///
pub async fn glob_recurse_pull_directory(
wildcard_pattern: &str,
access_token: Option<&str>,
max_retry: u32,
randomizer_bytes: Option<usize>,
timeout: Option<u64>,
) -> Result<Vec<(String, FilePullMode)>, LFSError> {
let mut result_vec = Vec::new();
let files = glob_recurse(wildcard_pattern)?;
for path in files {
result_vec.push((
path.to_string_lossy().to_string(),
pull_file(&path, access_token, randomizer_bytes).await?,
pull_file(&path, access_token, max_retry, randomizer_bytes, timeout).await?,
));
}

Expand Down
Loading
Loading