From ceac3cca40e362e0aefae7c139a927acba176ab6 Mon Sep 17 00:00:00 2001 From: grumbach Date: Fri, 17 Apr 2026 12:26:33 +0900 Subject: [PATCH 1/4] feat: add file cost estimation command and public API Add `ant file cost ` to estimate upload cost without uploading. Encrypts the file locally to determine chunk count, requests a single quote from the network for a representative chunk, and extrapolates the total storage cost. No wallet required. New public API: `Client::estimate_upload_cost(path, mode)` returns `UploadCostEstimate` with file size, chunk count, storage cost in atto, estimated gas in wei, and payment mode. Gas estimation uses a conservative heuristic based on chunk count and payment mode (merkle vs single). Storage cost is the median quoted price multiplied by chunk count. Supports --json for structured output and --merkle/--no-merkle to override payment mode selection. --- .github/workflows/ci.yml | 2 +- ant-cli/src/commands/data/file.rs | 65 +++++++ ant-core/src/data/client/file.rs | 151 +++++++++++++++- ant-core/src/data/client/merkle.rs | 3 +- ant-core/src/data/mod.rs | 3 +- ant-core/tests/e2e_cost_estimate.rs | 259 ++++++++++++++++++++++++++++ 6 files changed, 478 insertions(+), 5 deletions(-) create mode 100644 ant-core/tests/e2e_cost_estimate.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index df12d29..7325c58 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,7 +69,7 @@ jobs: with: version: nightly - name: Run E2E tests (serial) - run: cargo test -p ant-core --test e2e_chunk --test e2e_data --test e2e_file --test e2e_payment --test e2e_security -- --test-threads=1 + run: cargo test -p ant-core --test e2e_chunk --test e2e_data --test e2e_file --test e2e_payment --test e2e_security --test e2e_cost_estimate -- --test-threads=1 test-merkle: name: Merkle E2E (${{ matrix.os }}) diff --git a/ant-cli/src/commands/data/file.rs b/ant-cli/src/commands/data/file.rs index c59bba4..a2ad07f 100644 --- a/ant-cli/src/commands/data/file.rs +++ b/ant-cli/src/commands/data/file.rs @@ -52,6 +52,20 @@ pub enum FileAction { #[arg(short, long)] output: PathBuf, }, + /// Estimate the cost of uploading a file without uploading. + /// + /// Encrypts the file locally to determine chunk count, then queries + /// the network for a price quote. No payment or wallet required. + Cost { + /// Path to the file to estimate. + path: PathBuf, + /// Force merkle batch payment mode for the estimate. + #[arg(long, conflicts_with = "no_merkle")] + merkle: bool, + /// Force single payment mode for the estimate. + #[arg(long, conflicts_with = "merkle")] + no_merkle: bool, + }, } impl FileAction { @@ -101,6 +115,20 @@ impl FileAction { ) .await } + FileAction::Cost { + path, + merkle, + no_merkle, + } => { + let mode = if merkle { + PaymentMode::Merkle + } else if no_merkle { + PaymentMode::Single + } else { + PaymentMode::Auto + }; + handle_file_cost(client, &path, mode, json).await + } } } } @@ -430,6 +458,43 @@ async fn handle_file_download( Ok(()) } +async fn handle_file_cost( + client: &Client, + path: &Path, + mode: PaymentMode, + json_output: bool, +) -> anyhow::Result<()> { + let spinner = if !json_output { + Some(new_spinner("Encrypting file to estimate cost...")) + } else { + None + }; + + let estimate = client + .estimate_upload_cost(path, mode, None) + .await + .map_err(|e| anyhow::anyhow!("Cost estimation failed: {e}"))?; + + if let Some(s) = &spinner { + s.finish_and_clear(); + } + + if json_output { + println!("{}", serde_json::to_string(&estimate)?); + } else { + let gas_wei: u128 = estimate.estimated_gas_cost_wei.parse().unwrap_or(0); + let cost_display = format_cost(&estimate.storage_cost_atto, gas_wei); + + println!(); + println!("Estimated upload cost for {}", path.display()); + println!(" Size: {}", format_size(estimate.file_size)); + println!(" Chunks: {}", estimate.chunk_count); + println!(" Cost: {cost_display}"); + } + + Ok(()) +} + #[derive(Serialize)] struct UploadJsonResult { #[serde(skip_serializing_if = "Option::is_none")] diff --git a/ant-core/src/data/client/file.rs b/ant-core/src/data/client/file.rs index acc4c31..8f019e8 100644 --- a/ant-core/src/data/client/file.rs +++ b/ant-core/src/data/client/file.rs @@ -20,8 +20,8 @@ use crate::data::error::{Error, Result}; use ant_node::ant_protocol::DATA_TYPE_CHUNK; use ant_node::client::compute_address; use bytes::Bytes; -use evmlib::common::QuoteHash; -use evmlib::common::TxHash; +use evmlib::common::{Amount, QuoteHash, TxHash}; +use evmlib::merkle_payments::MAX_LEAVES; use fs2::FileExt; use futures::stream::{self, StreamExt}; use self_encryption::{get_root_data_map_parallel, stream_encrypt, streaming_decrypt, DataMap}; @@ -373,6 +373,24 @@ fn check_disk_space_for_spill(file_size: u64) -> Result<()> { Ok(()) } +/// Estimated cost of uploading a file, returned by +/// [`Client::estimate_upload_cost`]. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct UploadCostEstimate { + /// Original file size in bytes. + pub file_size: u64, + /// Number of chunks the file would be split into (data chunks only, + /// does not include the DataMap chunk added during public uploads). + pub chunk_count: usize, + /// Estimated total storage cost in atto (token smallest unit). + pub storage_cost_atto: String, + /// Estimated gas cost in wei as a string. This is a rough heuristic + /// based on chunk count and payment mode, NOT a live gas price query. + pub estimated_gas_cost_wei: String, + /// Payment mode that would be used. + pub payment_mode: PaymentMode, +} + /// Result of a file upload: the `DataMap` needed to retrieve the file. #[derive(Debug, Clone)] pub struct FileUploadResult { @@ -528,6 +546,135 @@ impl Client { self.file_upload_with_mode(path, PaymentMode::Auto).await } + /// Estimate the cost of uploading a file without actually uploading. + /// + /// Encrypts the file to determine chunk count and sizes, then requests + /// a single quote from the network for a representative chunk. The + /// per-chunk price is extrapolated to the total chunk count. + /// + /// The estimate is fast (~2-5s) and does not require a wallet. Spilled + /// chunks are cleaned up automatically when the function returns. + /// + /// Gas cost is a rough heuristic (150k gas per transaction at 1 gwei), + /// not a live gas price query. Actual gas varies by network conditions. + /// + /// # Errors + /// + /// Returns an error if the file cannot be read, encryption fails, + /// or the network cannot provide a quote. + pub async fn estimate_upload_cost( + &self, + path: &Path, + mode: PaymentMode, + progress: Option>, + ) -> Result { + let file_size = std::fs::metadata(path).map_err(Error::Io)?.len(); + + if file_size < 3 { + return Err(Error::InvalidData( + "File too small: self-encryption requires at least 3 bytes".into(), + )); + } + + check_disk_space_for_spill(file_size)?; + + info!( + "Estimating upload cost for {} ({file_size} bytes)", + path.display() + ); + + let (spill, _data_map) = self.encrypt_file_to_spill(path, progress.as_ref()).await?; + let chunk_count = spill.len(); + + if let Some(ref tx) = progress { + let _ = tx + .send(UploadEvent::Encrypted { + total_chunks: chunk_count, + }) + .await; + } + + info!("Encrypted into {chunk_count} chunks, requesting quote"); + + // Read the first chunk to get a representative quote from the network. + let first_addr = spill + .addresses + .first() + .ok_or_else(|| Error::InvalidData("Encryption produced zero chunks".into()))?; + let first_chunk = spill.read_chunk(first_addr)?; + let first_address = compute_address(&first_chunk); + let data_size = u64::try_from(first_chunk.len()) + .map_err(|e| Error::InvalidData(format!("chunk size too large: {e}")))?; + + // If the first chunk is already stored, we still proceed with + // the estimate using a zero price for that chunk. We cannot + // reliably detect whether ALL chunks are stored from a single + // sample, so we always return a cost estimate. + let quotes = match self + .get_store_quotes(&first_address, data_size, DATA_TYPE_CHUNK) + .await + { + Ok(q) => q, + Err(Error::AlreadyStored) => { + // First chunk exists but we don't know about the rest. + // Return zero storage cost as a best-effort estimate. + let uses_merkle = should_use_merkle(chunk_count, mode); + return Ok(UploadCostEstimate { + file_size, + chunk_count, + storage_cost_atto: "0".into(), + estimated_gas_cost_wei: "0".into(), + payment_mode: if uses_merkle { + PaymentMode::Merkle + } else { + PaymentMode::Single + }, + }); + } + Err(e) => return Err(e), + }; + + // Use the median price × 3 (matches SingleNodePayment::from_quotes + // which pays 3x the median to incentivize reliable storage). + let mut prices: Vec = quotes.iter().map(|(_, _, _, price)| *price).collect(); + prices.sort(); + let median_price = prices + .get(prices.len() / 2) + .copied() + .unwrap_or(Amount::ZERO); + let per_chunk_cost = median_price * Amount::from(3u64); + + let total_storage = per_chunk_cost * Amount::from(chunk_count as u64); + + // Estimate gas cost based on payment mode and chunk count. + // This is a rough heuristic: ~150k gas per EVM transaction at ~1 gwei. + // Actual gas prices vary by network. Treat this as an order-of-magnitude guide. + let uses_merkle = should_use_merkle(chunk_count, mode); + let estimated_gas: u128 = if uses_merkle { + let batches = chunk_count.div_ceil(MAX_LEAVES); + (batches as u128) * 150_000 * 1_000_000_000 + } else { + let waves = chunk_count.div_ceil(UPLOAD_WAVE_SIZE); + (waves as u128) * 150_000 * 1_000_000_000 + }; + + info!( + "Estimate: {chunk_count} chunks, storage={total_storage} atto, gas~={estimated_gas} wei" + ); + + Ok(UploadCostEstimate { + file_size, + chunk_count, + storage_cost_atto: total_storage.to_string(), + estimated_gas_cost_wei: estimated_gas.to_string(), + payment_mode: if uses_merkle { + PaymentMode::Merkle + } else { + PaymentMode::Single + }, + }) + } + /// Phase 1 of external-signer upload: encrypt file and prepare chunks. /// /// Requires an EVM network (for contract price queries) but NOT a wallet. diff --git a/ant-core/src/data/client/merkle.rs b/ant-core/src/data/client/merkle.rs index 1ef32b1..d9ccd88 100644 --- a/ant-core/src/data/client/merkle.rs +++ b/ant-core/src/data/client/merkle.rs @@ -29,7 +29,8 @@ use xor_name::XorName; pub const DEFAULT_MERKLE_THRESHOLD: usize = 64; /// Payment mode for uploads. -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "snake_case")] pub enum PaymentMode { /// Automatically choose: merkle for batches >= threshold, single otherwise. #[default] diff --git a/ant-core/src/data/mod.rs b/ant-core/src/data/mod.rs index 7d5c676..dbada3f 100644 --- a/ant-core/src/data/mod.rs +++ b/ant-core/src/data/mod.rs @@ -22,7 +22,8 @@ pub use ant_node::client::{compute_address, DataChunk, XorName}; pub use client::batch::{finalize_batch_payment, PaidChunk, PaymentIntent, PreparedChunk}; pub use client::data::DataUploadResult; pub use client::file::{ - DownloadEvent, ExternalPaymentInfo, FileUploadResult, PreparedUpload, UploadEvent, + DownloadEvent, ExternalPaymentInfo, FileUploadResult, PreparedUpload, UploadCostEstimate, + UploadEvent, }; pub use client::merkle::{ finalize_merkle_batch, MerkleBatchPaymentResult, PaymentMode, PreparedMerkleBatch, diff --git a/ant-core/tests/e2e_cost_estimate.rs b/ant-core/tests/e2e_cost_estimate.rs new file mode 100644 index 0000000..2c5a376 --- /dev/null +++ b/ant-core/tests/e2e_cost_estimate.rs @@ -0,0 +1,259 @@ +//! E2E tests for file upload cost estimation. +//! +//! Compares `estimate_upload_cost()` against actual upload costs to verify +//! the estimate is accurate. Tests multiple file sizes covering single-wave +//! and multi-chunk scenarios. +//! +//! Run with: cargo test --test e2e_cost_estimate -- --nocapture + +#![allow(clippy::unwrap_used, clippy::expect_used)] + +mod support; + +use ant_core::data::client::merkle::PaymentMode; +use ant_core::data::{Client, ClientConfig}; +use serial_test::serial; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use support::MiniTestnet; +use tempfile::TempDir; + +/// Simple xorshift64 PRNG for deterministic, incompressible test data. +struct Xorshift64(u64); + +impl Xorshift64 { + fn new(seed: u64) -> Self { + Self(seed) + } + + fn next_u8(&mut self) -> u8 { + self.0 ^= self.0 << 13; + self.0 ^= self.0 >> 7; + self.0 ^= self.0 << 17; + (self.0 & 0xFF) as u8 + } +} + +fn create_test_file(dir: &Path, size: u64, name: &str, seed: u64) -> PathBuf { + let path = dir.join(name); + let mut file = std::fs::File::create(&path).expect("create test file"); + + let mut rng = Xorshift64::new(seed); + let mut remaining = size; + let buf_size: usize = 64 * 1024; + let mut buf = vec![0u8; buf_size]; + while remaining > 0 { + let to_write = remaining.min(buf_size as u64) as usize; + for byte in buf.iter_mut().take(to_write) { + *byte = rng.next_u8(); + } + file.write_all(&buf[..to_write]).expect("write test data"); + remaining -= to_write as u64; + } + file.flush().expect("flush test file"); + path +} + +/// Estimate vs actual cost comparison for a single file. +/// +/// Runs `estimate_upload_cost`, then actually uploads and compares. +/// Returns (estimated_atto, actual_atto, chunk_count_estimate, chunk_count_actual). +async fn compare_estimate_vs_actual( + client: &Client, + path: &Path, + mode: PaymentMode, +) -> (u128, u128, usize, usize) { + // Phase 1: Estimate + let estimate = client + .estimate_upload_cost(path, mode, None) + .await + .expect("estimate should succeed"); + + let estimated_atto: u128 = estimate + .storage_cost_atto + .parse() + .expect("parse estimated atto"); + + // Phase 2: Actually upload (with a DIFFERENT seed so we don't get AlreadyStored) + let result = client + .file_upload_with_mode(path, mode) + .await + .expect("upload should succeed"); + + let actual_atto: u128 = result.storage_cost_atto.parse().expect("parse actual atto"); + + ( + estimated_atto, + actual_atto, + estimate.chunk_count, + result.chunks_stored, + ) +} + +/// Core test: estimate accuracy across file sizes. +/// +/// Verifies: +/// 1. Chunk count from estimate matches actual upload chunk count +/// 2. Storage cost estimate is within 50% of actual cost +/// (prices are uniform on a healthy local network, so this is generous) +/// 3. Estimate does not require a wallet (no payment made) +/// 4. Estimate returns correct payment mode +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_estimate_matches_actual_cost() { + let testnet = MiniTestnet::start(10).await; + let node = testnet.node(3).expect("Node 3 should exist"); + let client = Client::from_node(Arc::clone(&node), ClientConfig::default()) + .with_wallet(testnet.wallet().clone()); + + let work_dir = TempDir::new().expect("create work dir"); + + // Test files: small (3 chunks), medium (~13 chunks) + let test_cases: Vec<(u64, &str, u64)> = vec![ + (4 * 1024, "tiny.bin", 0xAAAA_0001), // ~4 KB -> 3 chunks + (100 * 1024, "small.bin", 0xAAAA_0002), // 100 KB -> ~3 chunks + (1024 * 1024, "1mb.bin", 0xAAAA_0003), // 1 MB -> ~3 chunks + (10 * 1024 * 1024, "10mb.bin", 0xAAAA_0004), // 10 MB -> ~3 chunks + ]; + + eprintln!(); + eprintln!("╔═══════════╤════════════════╤════════════════╤═══════════════════════╤═══════════════════════╗"); + eprintln!("║ File │ Est. Chunks │ Act. Chunks │ Est. Cost (atto) │ Act. Cost (atto) ║"); + eprintln!("╠═══════════╪════════════════╪════════════════╪═══════════════════════╪═══════════════════════╣"); + + for (size, name, seed) in &test_cases { + let path = create_test_file(work_dir.path(), *size, name, *seed); + + let (est_atto, act_atto, est_chunks, act_chunks) = + compare_estimate_vs_actual(&client, &path, PaymentMode::Auto).await; + + let size_label = if *size >= 1024 * 1024 { + format!("{} MB", size / (1024 * 1024)) + } else { + format!("{} KB", size / 1024) + }; + + eprintln!( + "║ {:<9} │ {:>14} │ {:>14} │ {:>21} │ {:>21} ║", + size_label, est_chunks, act_chunks, est_atto, act_atto, + ); + + // Chunk count MUST match exactly (same file, same encryption) + assert_eq!( + est_chunks, act_chunks, + "Chunk count mismatch for {name}: estimate={est_chunks}, actual={act_chunks}" + ); + + // Storage cost should be within 15% (prices are uniform on a local + // testnet so the extrapolation from one quote should be very close). + if act_atto > 0 { + let ratio = if est_atto > act_atto { + est_atto as f64 / act_atto as f64 + } else { + act_atto as f64 / est_atto as f64 + }; + assert!( + ratio < 1.15, + "Cost estimate too far from actual for {name}: est={est_atto}, actual={act_atto}, ratio={ratio:.2}" + ); + } + } + + eprintln!("╚═══════════╧════════════════╧════════════════╧═══════════════════════╧═══════════════════════╝"); + eprintln!(); +} + +/// Test that estimate works without a wallet. +/// +/// Creates a client WITHOUT a wallet and verifies that +/// `estimate_upload_cost` still returns a valid estimate. +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_estimate_works_without_wallet() { + // Use 10 nodes (> CLOSE_GROUP_SIZE=7) for quote reliability + let testnet = MiniTestnet::start(10).await; + let node = testnet.node(3).expect("Node 3 should exist"); + + // Client WITHOUT wallet — no .with_wallet() call + let client = Client::from_node(Arc::clone(&node), ClientConfig::default()); + + let work_dir = TempDir::new().expect("create work dir"); + let path = create_test_file(work_dir.path(), 4096, "no_wallet.bin", 0xBBBB_0001); + + let estimate = client + .estimate_upload_cost(&path, PaymentMode::Auto, None) + .await + .expect("estimate should work without wallet"); + + assert!( + estimate.chunk_count >= 3, + "self-encryption produces at least 3 chunks" + ); + assert!(estimate.file_size == 4096); +} + +/// Test that estimate returns correct payment mode. +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_estimate_payment_mode() { + let testnet = MiniTestnet::start(10).await; + let node = testnet.node(3).expect("Node 3 should exist"); + let client = Client::from_node(Arc::clone(&node), ClientConfig::default()); + + let work_dir = TempDir::new().expect("create work dir"); + + // Small file (3 chunks) with Auto mode -> should be Single + let small_path = create_test_file(work_dir.path(), 4096, "small_mode.bin", 0xDDDD_0001); + let small_est = client + .estimate_upload_cost(&small_path, PaymentMode::Auto, None) + .await + .expect("estimate should succeed"); + assert_eq!( + small_est.payment_mode, + PaymentMode::Single, + "Small file with Auto should use Single mode" + ); + + // Force merkle on small file + let merkle_est = client + .estimate_upload_cost(&small_path, PaymentMode::Merkle, None) + .await + .expect("estimate should succeed"); + assert_eq!( + merkle_est.payment_mode, + PaymentMode::Merkle, + "Forced Merkle should report Merkle mode" + ); + + // Force single + let single_est = client + .estimate_upload_cost(&small_path, PaymentMode::Single, None) + .await + .expect("estimate should succeed"); + assert_eq!( + single_est.payment_mode, + PaymentMode::Single, + "Forced Single should report Single mode" + ); +} + +/// Test that estimate rejects files too small for self-encryption. +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_estimate_rejects_tiny_files() { + let testnet = MiniTestnet::start(10).await; + let node = testnet.node(3).expect("Node 3 should exist"); + let client = Client::from_node(Arc::clone(&node), ClientConfig::default()); + + let work_dir = TempDir::new().expect("create work dir"); + + // 2-byte file — below self-encryption minimum of 3 bytes + let tiny_path = work_dir.path().join("tiny.bin"); + std::fs::write(&tiny_path, b"ab").expect("write tiny file"); + + let result = client + .estimate_upload_cost(&tiny_path, PaymentMode::Auto, None) + .await; + assert!(result.is_err(), "Estimate should fail for files < 3 bytes"); +} From af1e894ca3e5bb6d5b93e4daab5fc3527402934a Mon Sep 17 00:00:00 2001 From: grumbach Date: Fri, 17 Apr 2026 18:21:15 +0900 Subject: [PATCH 2/4] fix: address review on cost estimation (AlreadyStored, gas, progress) P1 fixes from Nic's review on #44: - Drop the AlreadyStored -> "free" best-effort branch. A majority confirming the first chunk is stored says nothing about the other 99% of chunks, so returning a zero-cost estimate was misleading. Now surfaces a typed InvalidData error so callers can retry instead of trusting the bogus cost. - Rework single-mode gas heuristic. batch_pay flattens every chunk's close group quotes into one pay_for_quotes call, so gas scales with the number of quote entries in the wave (chunks x recipients/chunk), not with the number of waves. 150k/wave was off by 5-10x on full waves; replace with 75k base + 25k per entry, summed across waves. Bump merkle budget to 500k/sub-batch to reflect tree verification + pool commitment. P2 polish: - Drop the redundant compute_address re-hash on the first chunk; the spill address is already the content address. - Replace `chunk_count as u64` with a checked conversion to match the rest of the file. - Wire the progress hook through handle_file_cost so large-file encryption emits Encrypting / Encrypted events instead of a static spinner, reusing drive_upload_progress. All 4 e2e_cost_estimate tests still pass on a local devnet. --- ant-cli/src/commands/data/file.rs | 32 +++++++++------ ant-core/src/data/client/file.rs | 66 +++++++++++++++++-------------- 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/ant-cli/src/commands/data/file.rs b/ant-cli/src/commands/data/file.rs index a2ad07f..b757475 100644 --- a/ant-cli/src/commands/data/file.rs +++ b/ant-cli/src/commands/data/file.rs @@ -127,7 +127,7 @@ impl FileAction { } else { PaymentMode::Auto }; - handle_file_cost(client, &path, mode, json).await + handle_file_cost(client, &path, mode, json, verbose).await } } } @@ -463,21 +463,29 @@ async fn handle_file_cost( path: &Path, mode: PaymentMode, json_output: bool, + verbose: u8, ) -> anyhow::Result<()> { - let spinner = if !json_output { - Some(new_spinner("Encrypting file to estimate cost...")) + let file_size = std::fs::metadata(path)?.len(); + + let estimate = if json_output { + client + .estimate_upload_cost(path, mode, None) + .await + .map_err(|e| anyhow::anyhow!("Cost estimation failed: {e}"))? } else { - None - }; + let (tx, rx) = mpsc::channel(64); + let pb_handle = tokio::spawn(drive_upload_progress( + rx, + path.display().to_string(), + file_size, + verbose, + )); - let estimate = client - .estimate_upload_cost(path, mode, None) - .await - .map_err(|e| anyhow::anyhow!("Cost estimation failed: {e}"))?; + let result = client.estimate_upload_cost(path, mode, Some(tx)).await; + let _ = pb_handle.await; - if let Some(s) = &spinner { - s.finish_and_clear(); - } + result.map_err(|e| anyhow::anyhow!("Cost estimation failed: {e}"))? + }; if json_output { println!("{}", serde_json::to_string(&estimate)?); diff --git a/ant-core/src/data/client/file.rs b/ant-core/src/data/client/file.rs index 8f019e8..5e519be 100644 --- a/ant-core/src/data/client/file.rs +++ b/ant-core/src/data/client/file.rs @@ -555,8 +555,10 @@ impl Client { /// The estimate is fast (~2-5s) and does not require a wallet. Spilled /// chunks are cleaned up automatically when the function returns. /// - /// Gas cost is a rough heuristic (150k gas per transaction at 1 gwei), - /// not a live gas price query. Actual gas varies by network conditions. + /// Gas cost is a rough heuristic (not a live gas price query) priced at + /// ~1 gwei. Single-mode gas scales with the total number of quote entries + /// paid in `pay_for_quotes`; merkle-mode gas scales with the number of + /// sub-batches. Actual gas varies by network conditions. /// /// # Errors /// @@ -602,34 +604,25 @@ impl Client { .first() .ok_or_else(|| Error::InvalidData("Encryption produced zero chunks".into()))?; let first_chunk = spill.read_chunk(first_addr)?; - let first_address = compute_address(&first_chunk); let data_size = u64::try_from(first_chunk.len()) .map_err(|e| Error::InvalidData(format!("chunk size too large: {e}")))?; - // If the first chunk is already stored, we still proceed with - // the estimate using a zero price for that chunk. We cannot - // reliably detect whether ALL chunks are stored from a single - // sample, so we always return a cost estimate. + // If the first chunk is already stored we cannot obtain a + // representative quote from a single sample — returning a "free" + // estimate would be misleading for a file where the other chunks + // still need paying for. Surface a typed error so the caller can + // retry (e.g. with a different file) rather than trust a zero cost. let quotes = match self - .get_store_quotes(&first_address, data_size, DATA_TYPE_CHUNK) + .get_store_quotes(first_addr, data_size, DATA_TYPE_CHUNK) .await { Ok(q) => q, Err(Error::AlreadyStored) => { - // First chunk exists but we don't know about the rest. - // Return zero storage cost as a best-effort estimate. - let uses_merkle = should_use_merkle(chunk_count, mode); - return Ok(UploadCostEstimate { - file_size, - chunk_count, - storage_cost_atto: "0".into(), - estimated_gas_cost_wei: "0".into(), - payment_mode: if uses_merkle { - PaymentMode::Merkle - } else { - PaymentMode::Single - }, - }); + return Err(Error::InvalidData( + "first chunk is already stored on the network; cannot \ + sample a representative price for a reliable estimate" + .into(), + )); } Err(e) => return Err(e), }; @@ -644,18 +637,33 @@ impl Client { .unwrap_or(Amount::ZERO); let per_chunk_cost = median_price * Amount::from(3u64); - let total_storage = per_chunk_cost * Amount::from(chunk_count as u64); + let chunk_count_u64 = u64::try_from(chunk_count) + .map_err(|e| Error::InvalidData(format!("chunk count too large: {e}")))?; + let total_storage = per_chunk_cost * Amount::from(chunk_count_u64); // Estimate gas cost based on payment mode and chunk count. - // This is a rough heuristic: ~150k gas per EVM transaction at ~1 gwei. - // Actual gas prices vary by network. Treat this as an order-of-magnitude guide. + // Rough heuristic at ~1 gwei; treat as an order-of-magnitude guide. + // - Single mode: `batch_pay` flattens every chunk's close-group quotes + // into one `pay_for_quotes` call, so gas scales with the number of + // quote entries in the wave (chunks × recipients/chunk), not the + // number of waves. A multi-recipient tx of that shape on Arbitrum + // runs ~75k base + ~25k per entry — use that per-wave rather than a + // flat 150k, which was off by 5–10x for full waves. + // - Merkle mode: one on-chain tx per sub-batch, but the tx verifies a + // tree and posts a pool commitment, so budget ~500k per sub-batch. let uses_merkle = should_use_merkle(chunk_count, mode); + let quotes_per_chunk = u128::try_from(quotes.len().max(1)) + .map_err(|e| Error::InvalidData(format!("quote count too large: {e}")))?; let estimated_gas: u128 = if uses_merkle { - let batches = chunk_count.div_ceil(MAX_LEAVES); - (batches as u128) * 150_000 * 1_000_000_000 + let batches = chunk_count.div_ceil(MAX_LEAVES) as u128; + batches * 500_000 * 1_000_000_000 } else { - let waves = chunk_count.div_ceil(UPLOAD_WAVE_SIZE); - (waves as u128) * 150_000 * 1_000_000_000 + // Sum over waves, accounting for a possibly-partial last wave. + let total_entries = (chunk_count as u128) * quotes_per_chunk; + let waves = chunk_count.div_ceil(UPLOAD_WAVE_SIZE) as u128; + let base_gas = waves * 75_000; + let entry_gas = total_entries * 25_000; + (base_gas + entry_gas) * 1_000_000_000 }; info!( From fbb2a84ab556ce2604250c743979ee96288cfb04 Mon Sep 17 00:00:00 2001 From: grumbach Date: Tue, 21 Apr 2026 11:43:14 +0900 Subject: [PATCH 3/4] fix: retry AlreadyStored on cost estimate, realistic gas constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 — AlreadyStored branch now samples up to ESTIMATE_SAMPLE_CAP chunk addresses instead of trusting a single probe. Only returns zero-cost when every address in the file is confirmed stored; otherwise returns a new typed Error::CostEstimationInconclusive so callers can handle it cleanly. The CLI renders this case with a helpful retry-suggestion message. P1 — Replace the per-wave gas heuristic with named constants: GAS_PER_WAVE_TX = 1_500_000 gas (Arbitrum pay_for_quotes with 64 entries) GAS_PER_MERKLE_TX = 500_000 gas ARBITRUM_GAS_PRICE_WEI = 100_000_000 (0.1 gwei baseline) Each constant carries a comment explaining where the number comes from and that it is advisory, not a live oracle query. No change to the chunk-count conversion or progress plumbing (already done in the previous review commit). --- ant-cli/src/commands/data/file.rs | 25 ++-- ant-core/src/data/client/file.rs | 194 +++++++++++++++++++++--------- ant-core/src/data/error.rs | 20 +++ 3 files changed, 178 insertions(+), 61 deletions(-) diff --git a/ant-cli/src/commands/data/file.rs b/ant-cli/src/commands/data/file.rs index b757475..406fa24 100644 --- a/ant-cli/src/commands/data/file.rs +++ b/ant-cli/src/commands/data/file.rs @@ -7,7 +7,9 @@ use serde::Serialize; use tokio::sync::mpsc; use tracing::info; -use ant_core::data::{Client, DataMap, DownloadEvent, PaymentMode, UploadEvent}; +use ant_core::data::{ + Client, DataMap, DownloadEvent, Error as DataError, PaymentMode, UploadEvent, +}; use super::chunk::parse_address; @@ -467,11 +469,8 @@ async fn handle_file_cost( ) -> anyhow::Result<()> { let file_size = std::fs::metadata(path)?.len(); - let estimate = if json_output { - client - .estimate_upload_cost(path, mode, None) - .await - .map_err(|e| anyhow::anyhow!("Cost estimation failed: {e}"))? + let raw_result = if json_output { + client.estimate_upload_cost(path, mode, None).await } else { let (tx, rx) = mpsc::channel(64); let pb_handle = tokio::spawn(drive_upload_progress( @@ -483,8 +482,20 @@ async fn handle_file_cost( let result = client.estimate_upload_cost(path, mode, Some(tx)).await; let _ = pb_handle.await; + result + }; - result.map_err(|e| anyhow::anyhow!("Cost estimation failed: {e}"))? + let estimate = match raw_result { + Ok(e) => e, + Err(DataError::CostEstimationInconclusive(msg)) => { + anyhow::bail!( + "Cost estimation inconclusive: {msg}. The sampled chunks are \ + already stored on the network, so we can't sample a representative \ + price for the rest of the file. Try again later or upload a file \ + that contains some new data." + ); + } + Err(e) => anyhow::bail!("Cost estimation failed: {e}"), }; if json_output { diff --git a/ant-core/src/data/client/file.rs b/ant-core/src/data/client/file.rs index 5e519be..159d131 100644 --- a/ant-core/src/data/client/file.rs +++ b/ant-core/src/data/client/file.rs @@ -19,9 +19,11 @@ use crate::data::client::Client; use crate::data::error::{Error, Result}; use ant_node::ant_protocol::DATA_TYPE_CHUNK; use ant_node::client::compute_address; +use ant_node::core::{MultiAddr, PeerId}; use bytes::Bytes; use evmlib::common::{Amount, QuoteHash, TxHash}; use evmlib::merkle_payments::MAX_LEAVES; +use evmlib::PaymentQuote; use fs2::FileExt; use futures::stream::{self, StreamExt}; use self_encryption::{get_root_data_map_parallel, stream_encrypt, streaming_decrypt, DataMap}; @@ -74,9 +76,47 @@ pub enum DownloadEvent { ChunksFetched { fetched: usize, total: usize }, } +/// One entry in the per-chunk quote list returned by +/// [`Client::get_store_quotes`]: the responding peer, its addresses, the +/// signed quote it returned, and the payment amount it is demanding. +type QuoteEntry = (PeerId, Vec, PaymentQuote, Amount); + /// Number of chunks per upload wave (matches batch.rs PAYMENT_WAVE_SIZE). const UPLOAD_WAVE_SIZE: usize = 64; +/// Maximum number of distinct chunk addresses to sample when probing for a +/// representative quote in [`Client::estimate_upload_cost`]. +/// +/// Bounded small so we never spend more than a couple of round-trips on the +/// `AlreadyStored` retry path, which only matters when many leading chunks +/// of a file already live on the network. +const ESTIMATE_SAMPLE_CAP: usize = 5; + +/// Gas used by one `pay_for_quotes` transaction that packs up to +/// `UPLOAD_WAVE_SIZE` (quote_hash, rewards_address, amount) entries. +/// +/// `batch_pay` in `batch.rs` flattens every chunk's close-group quotes into a +/// single EVM call, so the dominant cost is the SSTOREs for each entry plus +/// the base tx overhead. On Arbitrum that is roughly +/// `21_000 + 64 × (20_000 + small)` ≈ 1.3M; we round up to 1.5M as a +/// conservative per-wave upper bound. +const GAS_PER_WAVE_TX: u128 = 1_500_000; + +/// Gas used by one merkle batch payment transaction. +/// +/// One on-chain tx per merkle sub-batch, but each tx verifies a merkle tree +/// and posts a pool commitment, so budget higher than a plain transfer. +const GAS_PER_MERKLE_TX: u128 = 500_000; + +/// Advisory gas price (wei/gas) used to turn the gas estimate into an ETH +/// figure when no live gas oracle is consulted. +/// +/// Arbitrum One typically settles around 0.1 gwei on quiet blocks; we use +/// that as the default so the CLI prints a sensible order-of-magnitude +/// number. Users should treat the reported gas cost as an estimate, not a +/// commitment — real gas is bid at submission time. +const ARBITRUM_GAS_PRICE_WEI: u128 = 100_000_000; + /// Extra headroom percentage for disk space check. /// /// Encrypted chunks are slightly larger than the source data due to padding @@ -555,15 +595,24 @@ impl Client { /// The estimate is fast (~2-5s) and does not require a wallet. Spilled /// chunks are cleaned up automatically when the function returns. /// - /// Gas cost is a rough heuristic (not a live gas price query) priced at - /// ~1 gwei. Single-mode gas scales with the total number of quote entries - /// paid in `pay_for_quotes`; merkle-mode gas scales with the number of - /// sub-batches. Actual gas varies by network conditions. + /// Gas cost is an advisory heuristic, not a live gas-oracle query. It is + /// derived from realistic per-transaction budgets (see + /// [`GAS_PER_WAVE_TX`], [`GAS_PER_MERKLE_TX`]) priced at + /// [`ARBITRUM_GAS_PRICE_WEI`]. Real gas varies with network conditions. + /// + /// If the first sampled chunk is already stored on the network, the + /// function retries with subsequent chunk addresses (up to + /// [`ESTIMATE_SAMPLE_CAP`]). If every sampled address reports stored, + /// a [`Error::CostEstimationInconclusive`] is returned so callers can + /// decide how to react rather than trust a bogus "free" estimate. Only + /// when every address in the file is stored do we return a zero-cost + /// estimate. /// /// # Errors /// /// Returns an error if the file cannot be read, encryption fails, - /// or the network cannot provide a quote. + /// the network cannot provide a quote, or every sampled chunk is + /// already stored ([`Error::CostEstimationInconclusive`]). pub async fn estimate_upload_cost( &self, path: &Path, @@ -598,33 +647,69 @@ impl Client { info!("Encrypted into {chunk_count} chunks, requesting quote"); - // Read the first chunk to get a representative quote from the network. - let first_addr = spill - .addresses - .first() - .ok_or_else(|| Error::InvalidData("Encryption produced zero chunks".into()))?; - let first_chunk = spill.read_chunk(first_addr)?; - let data_size = u64::try_from(first_chunk.len()) - .map_err(|e| Error::InvalidData(format!("chunk size too large: {e}")))?; - - // If the first chunk is already stored we cannot obtain a - // representative quote from a single sample — returning a "free" - // estimate would be misleading for a file where the other chunks - // still need paying for. Surface a typed error so the caller can - // retry (e.g. with a different file) rather than trust a zero cost. - let quotes = match self - .get_store_quotes(first_addr, data_size, DATA_TYPE_CHUNK) - .await - { - Ok(q) => q, - Err(Error::AlreadyStored) => { - return Err(Error::InvalidData( - "first chunk is already stored on the network; cannot \ - sample a representative price for a reliable estimate" - .into(), - )); + // Sample up to ESTIMATE_SAMPLE_CAP distinct chunk addresses. A single + // AlreadyStored result says nothing about the rest of the file — the + // first chunk is often a DataMap-adjacent chunk that collides with + // prior uploads even when 99% of the file is new. Only treat the + // whole file as "fully stored" when every sample comes back stored. + let sample_limit = spill.addresses.len().min(ESTIMATE_SAMPLE_CAP); + let mut sampled = 0usize; + let mut all_already_stored = true; + let mut quotes_opt: Option> = None; + + for addr in spill.addresses.iter().take(sample_limit) { + sampled += 1; + let chunk_bytes = spill.read_chunk(addr)?; + let data_size = u64::try_from(chunk_bytes.len()) + .map_err(|e| Error::InvalidData(format!("chunk size too large: {e}")))?; + match self + .get_store_quotes(addr, data_size, DATA_TYPE_CHUNK) + .await + { + Ok(q) => { + quotes_opt = Some(q); + all_already_stored = false; + break; + } + Err(Error::AlreadyStored) => { + debug!( + "Sample chunk {} already stored; trying next address ({sampled}/{sample_limit})", + hex::encode(addr) + ); + continue; + } + Err(e) => return Err(e), + } + } + + let uses_merkle = should_use_merkle(chunk_count, mode); + + let quotes = match quotes_opt { + Some(q) => q, + None if all_already_stored && sampled == chunk_count => { + // Every address in the file was sampled and every one is + // already on the network — returning a zero-cost estimate is + // accurate in this case. + info!("All {chunk_count} chunks already stored; returning zero-cost estimate"); + return Ok(UploadCostEstimate { + file_size, + chunk_count, + storage_cost_atto: "0".into(), + estimated_gas_cost_wei: "0".into(), + payment_mode: if uses_merkle { + PaymentMode::Merkle + } else { + PaymentMode::Single + }, + }); + } + None => { + return Err(Error::CostEstimationInconclusive(format!( + "sampled {sampled} chunk addresses out of {chunk_count} and every \ + one reported AlreadyStored; cannot infer a representative price \ + for the remaining chunks" + ))); } - Err(e) => return Err(e), }; // Use the median price × 3 (matches SingleNodePayment::from_quotes @@ -637,33 +722,34 @@ impl Client { .unwrap_or(Amount::ZERO); let per_chunk_cost = median_price * Amount::from(3u64); - let chunk_count_u64 = u64::try_from(chunk_count) - .map_err(|e| Error::InvalidData(format!("chunk count too large: {e}")))?; + let chunk_count_u64 = u64::try_from(chunk_count).unwrap_or(u64::MAX); let total_storage = per_chunk_cost * Amount::from(chunk_count_u64); - // Estimate gas cost based on payment mode and chunk count. - // Rough heuristic at ~1 gwei; treat as an order-of-magnitude guide. - // - Single mode: `batch_pay` flattens every chunk's close-group quotes - // into one `pay_for_quotes` call, so gas scales with the number of - // quote entries in the wave (chunks × recipients/chunk), not the - // number of waves. A multi-recipient tx of that shape on Arbitrum - // runs ~75k base + ~25k per entry — use that per-wave rather than a - // flat 150k, which was off by 5–10x for full waves. - // - Merkle mode: one on-chain tx per sub-batch, but the tx verifies a - // tree and posts a pool commitment, so budget ~500k per sub-batch. - let uses_merkle = should_use_merkle(chunk_count, mode); - let quotes_per_chunk = u128::try_from(quotes.len().max(1)) - .map_err(|e| Error::InvalidData(format!("quote count too large: {e}")))?; + // Estimate gas cost from realistic per-transaction budgets rather + // than a flat per-chunk or per-wave number. + // + // - Single mode: `batch_pay` packs up to UPLOAD_WAVE_SIZE chunks' + // close-group quotes into one `pay_for_quotes` call on Arbitrum. + // The dominant cost is one SSTORE per entry plus base tx overhead, + // so we use GAS_PER_WAVE_TX (≈1.5M) as a conservative upper bound + // on a full wave and multiply by the number of waves. The previous + // per-wave figure of 150k was closer to a single-entry transfer + // and understated cost by 5–10x for full waves. + // - Merkle mode: one tx per sub-batch that verifies a merkle tree + // and posts a pool commitment (GAS_PER_MERKLE_TX ≈ 500k each). + // + // Gas is priced at ARBITRUM_GAS_PRICE_WEI (~0.1 gwei, a typical + // Arbitrum baseline). Treat the result as advisory, not a commitment. + let waves = u128::try_from(chunk_count.div_ceil(UPLOAD_WAVE_SIZE)).unwrap_or(u128::MAX); + let merkle_batches = u128::try_from(chunk_count.div_ceil(MAX_LEAVES)).unwrap_or(u128::MAX); let estimated_gas: u128 = if uses_merkle { - let batches = chunk_count.div_ceil(MAX_LEAVES) as u128; - batches * 500_000 * 1_000_000_000 + merkle_batches + .saturating_mul(GAS_PER_MERKLE_TX) + .saturating_mul(ARBITRUM_GAS_PRICE_WEI) } else { - // Sum over waves, accounting for a possibly-partial last wave. - let total_entries = (chunk_count as u128) * quotes_per_chunk; - let waves = chunk_count.div_ceil(UPLOAD_WAVE_SIZE) as u128; - let base_gas = waves * 75_000; - let entry_gas = total_entries * 25_000; - (base_gas + entry_gas) * 1_000_000_000 + waves + .saturating_mul(GAS_PER_WAVE_TX) + .saturating_mul(ARBITRUM_GAS_PRICE_WEI) }; info!( diff --git a/ant-core/src/data/error.rs b/ant-core/src/data/error.rs index e571fa5..3b0de5f 100644 --- a/ant-core/src/data/error.rs +++ b/ant-core/src/data/error.rs @@ -68,6 +68,15 @@ pub enum Error { #[error("insufficient disk space: {0}")] InsufficientDiskSpace(String), + /// Cost estimation could not reach a representative quote. + /// + /// Returned by [`crate::data::Client::estimate_upload_cost`] when every + /// sampled chunk address reported `AlreadyStored`, so the network price + /// for the remainder of the file cannot be inferred from a sample. + /// The attached message describes how many addresses were tried. + #[error("cost estimation inconclusive: {0}")] + CostEstimationInconclusive(String), + /// Upload partially succeeded -- some chunks stored, some failed after retries. /// /// The `stored` addresses can be used for progress tracking and resume. @@ -188,6 +197,17 @@ mod tests { ); } + #[test] + fn test_display_cost_estimation_inconclusive() { + let err = Error::CostEstimationInconclusive( + "sampled 5 addresses, all already stored".to_string(), + ); + assert_eq!( + err.to_string(), + "cost estimation inconclusive: sampled 5 addresses, all already stored" + ); + } + #[test] fn test_from_io_error() { let io_err = std::io::Error::new(std::io::ErrorKind::PermissionDenied, "access denied"); From 4f9adf9a570a3a9fc682fcee6f80fda5286184d6 Mon Sep 17 00:00:00 2001 From: grumbach Date: Tue, 21 Apr 2026 11:45:40 +0900 Subject: [PATCH 4/4] docs: avoid private intra-doc links on estimate_upload_cost --- ant-core/src/data/client/file.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ant-core/src/data/client/file.rs b/ant-core/src/data/client/file.rs index 159d131..5478a96 100644 --- a/ant-core/src/data/client/file.rs +++ b/ant-core/src/data/client/file.rs @@ -596,13 +596,13 @@ impl Client { /// chunks are cleaned up automatically when the function returns. /// /// Gas cost is an advisory heuristic, not a live gas-oracle query. It is - /// derived from realistic per-transaction budgets (see - /// [`GAS_PER_WAVE_TX`], [`GAS_PER_MERKLE_TX`]) priced at - /// [`ARBITRUM_GAS_PRICE_WEI`]. Real gas varies with network conditions. + /// derived from realistic per-transaction budgets (`GAS_PER_WAVE_TX`, + /// `GAS_PER_MERKLE_TX`) priced at `ARBITRUM_GAS_PRICE_WEI`. Real gas + /// varies with network conditions. /// /// If the first sampled chunk is already stored on the network, the /// function retries with subsequent chunk addresses (up to - /// [`ESTIMATE_SAMPLE_CAP`]). If every sampled address reports stored, + /// `ESTIMATE_SAMPLE_CAP`). If every sampled address reports stored, /// a [`Error::CostEstimationInconclusive`] is returned so callers can /// decide how to react rather than trust a bogus "free" estimate. Only /// when every address in the file is stored do we return a zero-cost