diff --git a/README.md b/README.md index b447141..cd5c600 100644 --- a/README.md +++ b/README.md @@ -176,6 +176,12 @@ storify touch -t path/to/file # truncate to 0 bytes if exists storify touch -c path/to/missing # do not create; succeed silently storify touch -p path/to/nested/file # create parents when applicable +# Truncate files +storify truncate path/to/file # truncate to 0 bytes +storify truncate --size 1024 path/to/file # truncate to 1024 bytes +storify truncate -c -s 0 missing # no-create when missing +storify truncate --size-limit 1 -f file # size guard and force + ## Command Reference ### Storage Commands @@ -189,6 +195,7 @@ storify touch -p path/to/nested/file # create parents when applicable | `mv` | Move/rename files within storage | | `mkdir` | Create directories | `-p` (create parents) | | `touch` | Create files | +| `truncate` | Truncate/extend files to a target size | `--size `, `-c` (no-create), `-p` (parents), `-s/--size-limit `, `-f` (force) | | `cat` | Display file contents | | `head` | Display beginning of file | `-n` (lines), `-c` (bytes), `-q` (quiet), `-v` (verbose) | | `tail` | Display end of file | `-n` (lines), `-c` (bytes), `-q` (quiet), `-v` (verbose) | diff --git a/src/cli/entry.rs b/src/cli/entry.rs index df8fbed..bdeb490 100644 --- a/src/cli/entry.rs +++ b/src/cli/entry.rs @@ -8,7 +8,7 @@ use super::{ prompts::Prompt, storage::{ self, CatArgs, CpArgs, DiffArgs, DuArgs, GetArgs, GrepArgs, HeadArgs, LsArgs, MkdirArgs, - MvArgs, PutArgs, RmArgs, StatArgs, TailArgs, TouchArgs, TreeArgs, + MvArgs, PutArgs, RmArgs, StatArgs, TailArgs, TouchArgs, TreeArgs, TruncateArgs, }, }; @@ -91,6 +91,8 @@ pub enum Command { Diff(DiffArgs), /// Create empty files or update metadata (best-effort) Touch(TouchArgs), + /// Truncate files to a specific size (default 0) + Truncate(TruncateArgs), } #[derive(Subcommand, Debug, Clone)] diff --git a/src/cli/storage.rs b/src/cli/storage.rs index 6d8e511..fe17380 100644 --- a/src/cli/storage.rs +++ b/src/cli/storage.rs @@ -296,6 +296,33 @@ pub struct TouchArgs { pub parents: bool, } +#[derive(ClapArgs, Debug, Clone)] +pub struct TruncateArgs { + /// Remote path(s) to truncate + #[arg(value_name = "PATH", value_parser = parse_validated_path)] + pub paths: Vec, + + /// Target size in bytes (default: 0) + #[arg(long = "size", value_name = "BYTES", default_value_t = 0)] + pub size: u64, + + /// Do not create files; succeed silently if they do not exist + #[arg(short = 'c', long = "no-create")] + pub no_create: bool, + + /// Create parent directories when needed (filesystem providers) + #[arg(short = 'p', long = "parents")] + pub parents: bool, + + /// Limit total bytes written per file in MB (0 disables) + #[arg(short = 's', long = "size-limit", default_value_t = 10)] + pub size_limit_mb: u64, + + /// Bypass size-limit check + #[arg(short = 'f', long)] + pub force: bool, +} + pub async fn execute(command: &Command, ctx: &CliContext) -> Result<()> { let config = ctx.storage_config()?; let client = StorageClient::new(config.clone()).await?; @@ -452,6 +479,23 @@ pub async fn execute(command: &Command, ctx: &CliContext) -> Result<()> { ) .await?; } + Command::Truncate(trunc_args) => { + if trunc_args.paths.is_empty() { + return Err(Error::InvalidArgument { + message: "missing PATH".to_string(), + }); + } + client + .truncate_files( + &trunc_args.paths, + trunc_args.size, + trunc_args.no_create, + trunc_args.parents, + trunc_args.size_limit_mb, + trunc_args.force, + ) + .await?; + } Command::Config(_) => { unreachable!("Config commands are handled separately") } diff --git a/src/error.rs b/src/error.rs index a3070f3..960e28f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -103,6 +103,9 @@ pub enum Error { #[snafu(display("Failed to touch '{path}': {source}"))] TouchFailed { path: String, source: Box }, + #[snafu(display("Failed to truncate '{path}': {source}"))] + TruncateFailed { path: String, source: Box }, + #[snafu(display("Invalid argument: {message}"))] InvalidArgument { message: String }, diff --git a/src/storage.rs b/src/storage.rs index abbacaa..709fe71 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -22,11 +22,12 @@ use self::operations::mv::OpenDalMover; use self::operations::tail::OpenDalTailReader; use self::operations::touch::OpenDalToucher; use self::operations::tree::OpenDalTreer; +use self::operations::truncate::OpenDalTruncater; use self::operations::upload::OpenDalUploader; use self::operations::usage::OpenDalUsageCalculator; use self::operations::{ Cater, Copier, Deleter, Differ, Downloader, Greper, Header, Lister, Mkdirer, Mover, Stater, - Tailer, Toucher, Treer, Uploader, UsageCalculator, + Tailer, Toucher, Treer, Truncater, Uploader, UsageCalculator, }; use crate::storage::utils::error::IntoStorifyError; use crate::wrap_err; @@ -787,4 +788,56 @@ impl StorageClient { .try_for_each(|_| async { Ok(()) }) .await } + + pub async fn truncate_files( + &self, + paths: &[String], + size: u64, + no_create: bool, + parents: bool, + size_limit_mb: u64, + force: bool, + ) -> Result<()> { + log::debug!( + "truncate_files provider={:?} paths_count={} size={} no_create={} parents={} size_limit_mb={} force={}", + self.provider, + paths.len(), + size, + no_create, + parents, + size_limit_mb, + force + ); + + if size_limit_mb > 0 && !force { + let total_mb = size.div_ceil(1024 * 1024); + if total_mb > size_limit_mb { + return Err(Error::InvalidArgument { + message: format!( + "Files too large ({}MB > {}MB). Use --force to override", + total_mb, size_limit_mb + ), + }); + } + } + + let concurrency: usize = 8; + futures::stream::iter(paths.iter().cloned()) + .map(|p| { + let op = self.operator.clone(); + async move { + let truncater = OpenDalTruncater::new(op); + truncater + .truncate(&p, size, no_create, parents) + .await + .map_err(|e| Error::TruncateFailed { + path: p.clone(), + source: Box::new(e), + }) + } + }) + .buffer_unordered(concurrency) + .try_for_each(|_| async { Ok(()) }) + .await + } } diff --git a/src/storage/operations/mod.rs b/src/storage/operations/mod.rs index 955e6c9..66d2251 100644 --- a/src/storage/operations/mod.rs +++ b/src/storage/operations/mod.rs @@ -14,6 +14,7 @@ pub mod stat; pub mod tail; pub mod touch; pub mod tree; +pub mod truncate; pub mod upload; pub mod usage; @@ -32,5 +33,6 @@ pub use stat::Stater; pub use tail::Tailer; pub use touch::Toucher; pub use tree::Treer; +pub use truncate::Truncater; pub use upload::Uploader; pub use usage::UsageCalculator; diff --git a/src/storage/operations/touch.rs b/src/storage/operations/touch.rs index c2e9a99..97b2737 100644 --- a/src/storage/operations/touch.rs +++ b/src/storage/operations/touch.rs @@ -1,6 +1,7 @@ use crate::error::{Error, Result}; use crate::storage::operations::Mkdirer; use crate::storage::operations::mkdir::OpenDalMkdirer; +use crate::storage::utils::path::parent_dir_of; use opendal::{ErrorKind, Operator}; /// Trait for touching files in storage (create or truncate) @@ -22,20 +23,6 @@ impl OpenDalToucher { pub fn new(operator: Operator) -> Self { Self { operator } } - - fn parent_dir_of(path: &str) -> Option { - let trimmed = path.trim_matches('/'); - if let Some(idx) = trimmed.rfind('/') { - let (dir, _) = trimmed.split_at(idx); - if dir.is_empty() { - Some(String::new()) - } else { - Some(format!("{}/", dir)) - } - } else { - None - } - } } impl Toucher for OpenDalToucher { @@ -75,7 +62,7 @@ impl Toucher for OpenDalToucher { } if parents - && let Some(parent) = Self::parent_dir_of(path) + && let Some(parent) = parent_dir_of(path) && !parent.is_empty() { let mkdirer = OpenDalMkdirer::new(self.operator.clone()); diff --git a/src/storage/operations/truncate.rs b/src/storage/operations/truncate.rs new file mode 100644 index 0000000..3635c6e --- /dev/null +++ b/src/storage/operations/truncate.rs @@ -0,0 +1,145 @@ +use crate::error::{Error, Result}; +use crate::storage::constants::{DEFAULT_BUFFER_SIZE, DEFAULT_CHUNK_SIZE}; +use crate::storage::operations::mkdir::OpenDalMkdirer; +use crate::storage::operations::mv::OpenDalMover; +use crate::storage::operations::{Mkdirer, Mover}; +use crate::storage::utils::path::parent_dir_of; +use opendal::{ErrorKind, Operator}; +use uuid::Uuid; + +pub trait Truncater { + /// Truncate or extend a file to the specified size in bytes. + /// + /// - If the file is missing and `no_create` is true, this is a no-op. + /// - If `parents` is true, attempt to create parent directories when needed. + async fn truncate(&self, path: &str, size: u64, no_create: bool, parents: bool) -> Result<()>; +} + +pub struct OpenDalTruncater { + operator: Operator, +} + +impl OpenDalTruncater { + pub fn new(operator: Operator) -> Self { + Self { operator } + } + + fn temp_path_for(path: &str) -> String { + format!("{}.truncate.tmp-{}", path, Uuid::new_v4().simple()) + } + + async fn write_zeros( + &self, + writer: &mut opendal::Writer, + mut remaining: u64, + ) -> opendal::Result<()> { + if remaining == 0 { + return Ok(()); + } + while remaining > 0 { + let to_write = std::cmp::min(remaining, DEFAULT_CHUNK_SIZE as u64) as usize; + let block = vec![0u8; to_write]; + writer.write(block).await?; + remaining -= to_write as u64; + } + Ok(()) + } +} + +impl Truncater for OpenDalTruncater { + async fn truncate(&self, path: &str, size: u64, no_create: bool, parents: bool) -> Result<()> { + if path.ends_with('/') { + return Err(Error::InvalidArgument { + message: "truncate does not support directories; use mkdir".to_string(), + }); + } + + match self.operator.stat(path).await { + Ok(meta) => { + if meta.mode().is_dir() { + return Err(Error::InvalidArgument { + message: "Path is a directory; use mkdir".to_string(), + }); + } + + let orig_size = meta.content_length(); + // No-op when size is unchanged + if size == orig_size { + return Ok(()); + } + + // Fast path for truncating to zero + if size == 0 { + let mut writer = self.operator.writer(path).await?; + writer.close().await?; + println!("Truncated: {} -> 0", path); + return Ok(()); + } + + // General path: create a temp object with desired content then move over + let temp_path = Self::temp_path_for(path); + let mut writer = self.operator.writer(&temp_path).await?; + + let copy_len = std::cmp::min(size, orig_size); + + // Copy prefix from existing file in ranges + let mut offset: u64 = 0; + while offset < copy_len { + let end = std::cmp::min(copy_len, offset + DEFAULT_BUFFER_SIZE as u64); + let chunk = self.operator.read_with(path).range(offset..end).await?; + if chunk.is_empty() { + break; + } + writer.write(chunk).await?; + offset = end; + } + + // Zero padding if we need to extend + if size > copy_len { + let pad = size - copy_len; + self.write_zeros(&mut writer, pad).await?; + } + + writer.close().await?; + + // Move temp over original + let mover = OpenDalMover::new(self.operator.clone()); + if let Err(e) = Mover::mover(&mover, &temp_path, path).await { + // Best-effort cleanup of temp object + let _ = self.operator.delete(&temp_path).await; + return Err(e); + } + println!("Truncated: {} -> {}", path, size); + Ok(()) + } + Err(e) if e.kind() == ErrorKind::NotFound => { + if no_create { + return Ok(()); + } + + if parents + && let Some(parent) = parent_dir_of(path) + && !parent.is_empty() + { + let mkdirer = OpenDalMkdirer::new(self.operator.clone()); + Mkdirer::mkdir(&mkdirer, &parent, true).await?; + } + + // Create new file with given size + if size == 0 { + let mut writer = self.operator.writer(path).await?; + writer.close().await?; + println!("Created: {} (size 0)", path); + return Ok(()); + } + + let mut writer = self.operator.writer(path).await?; + self.write_zeros(&mut writer, size).await?; + writer.close().await?; + println!("Created: {} (size {})", path, size); + Ok(()) + } + Err(e) => Err(e.into()), + } + } +} diff --git a/src/storage/utils/path.rs b/src/storage/utils/path.rs index 171b0d4..277ee8d 100644 --- a/src/storage/utils/path.rs +++ b/src/storage/utils/path.rs @@ -55,3 +55,19 @@ pub fn get_root_relative_path(full_path: &str, base_path: &str) -> String { rel = rel.replace("//", "/"); rel } + +/// Return parent directory (with trailing '/') for a remote path, if any. +/// Returns `None` when the path has no parent component. +pub fn parent_dir_of(path: &str) -> Option { + let trimmed = path.trim_matches('/'); + if let Some(idx) = trimmed.rfind('/') { + let (dir, _) = trimmed.split_at(idx); + if dir.is_empty() { + Some(String::new()) + } else { + Some(format!("{}/", dir)) + } + } else { + None + } +} diff --git a/tests/behavior/main.rs b/tests/behavior/main.rs index 1529a41..944df01 100644 --- a/tests/behavior/main.rs +++ b/tests/behavior/main.rs @@ -31,6 +31,7 @@ fn main() -> Result<()> { operations::tree::tests(&client, &mut tests); operations::diff::tests(&client, &mut tests); operations::touch::tests(&client, &mut tests); + operations::truncate::tests(&client, &mut tests); let _ = tracing_subscriber::fmt() .pretty() diff --git a/tests/behavior/operations/mod.rs b/tests/behavior/operations/mod.rs index 25fb337..e5e37a2 100644 --- a/tests/behavior/operations/mod.rs +++ b/tests/behavior/operations/mod.rs @@ -13,5 +13,6 @@ pub mod stat; pub mod tail; pub mod touch; pub mod tree; +pub mod truncate; pub mod upload; pub mod usage; diff --git a/tests/behavior/operations/truncate.rs b/tests/behavior/operations/truncate.rs new file mode 100644 index 0000000..520a8a5 --- /dev/null +++ b/tests/behavior/operations/truncate.rs @@ -0,0 +1,98 @@ +use crate::*; +use assert_cmd::prelude::*; +use storify::error::Result; +use storify::storage::StorageClient; + +pub fn tests(client: &StorageClient, tests: &mut Vec) { + tests.extend(async_trials!( + client, + test_truncate_create_default_zero, + test_truncate_to_smaller_size, + test_truncate_to_larger_with_padding, + test_truncate_no_create_is_noop, + test_truncate_parents, + test_truncate_size_limit_guard + )); +} + +async fn test_truncate_create_default_zero(client: StorageClient) -> Result<()> { + let path = TEST_FIXTURE.new_file_path(); + + storify_cmd().arg("truncate").arg(&path).assert().success(); + + let meta = client.operator().stat(&path).await?; + assert!(meta.mode().is_file()); + assert_eq!(meta.content_length(), 0); + Ok(()) +} + +async fn test_truncate_to_smaller_size(client: StorageClient) -> Result<()> { + let (path, content, _size) = TEST_FIXTURE.new_file(client.operator()); + client.operator().write(&path, content).await?; + + let target: u64 = 128; + storify_cmd() + .arg("truncate") + .args(["--size", &target.to_string(), &path]) + .assert() + .success(); + + let meta = client.operator().stat(&path).await?; + assert_eq!(meta.content_length(), target); + Ok(()) +} + +async fn test_truncate_to_larger_with_padding(client: StorageClient) -> Result<()> { + let (path, content, _size) = TEST_FIXTURE.new_file_with_range("pad-file", 64..256); + client.operator().write(&path, content).await?; + + let target: u64 = 2048; + storify_cmd() + .arg("truncate") + .args(["--size", &target.to_string(), &path]) + .assert() + .success(); + + let meta = client.operator().stat(&path).await?; + assert_eq!(meta.content_length(), target); + Ok(()) +} + +async fn test_truncate_no_create_is_noop(client: StorageClient) -> Result<()> { + let path = TEST_FIXTURE.new_file_path(); + + storify_cmd() + .arg("truncate") + .args(["-c", "--size", "0", &path]) + .assert() + .success(); + + // Should still not exist + let res = client.operator().stat(&path).await; + assert!(res.is_err()); + Ok(()) +} + +async fn test_truncate_parents(client: StorageClient) -> Result<()> { + let dir = TEST_FIXTURE.new_dir_path(); + let nested = format!("{dir}nested/dirs/file.bin"); + storify_cmd() + .arg("truncate") + .args(["-p", "--size", "100", &nested]) + .assert() + .success(); + + let meta = client.operator().stat(&nested).await?; + assert_eq!(meta.content_length(), 100); + Ok(()) +} + +async fn test_truncate_size_limit_guard(_client: StorageClient) -> Result<()> { + // This should fail fast due to size-limit without writing + storify_cmd() + .arg("truncate") + .args(["-s", "2000000", "--size-limit", "1", "/guard-too-big"]) + .assert() + .failure(); + Ok(()) +}