From ff720988d864b0cc855fba1b955c516f9641ca69 Mon Sep 17 00:00:00 2001 From: Nic-dorman Date: Wed, 15 Apr 2026 10:37:41 +0100 Subject: [PATCH 1/2] feat(external-signer): add Visibility arg to file_prepare_upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an external-signer path for public uploads. When a PreparedUpload is prepared with Visibility::Public, the serialized DataMap is bundled into the payment batch (wave-batch or merkle) as an additional chunk. FileUploadResult::data_map_address then carries the chunk address of the stored DataMap, giving the uploader a single network address to share for retrieval. Motivation: ant-gui (the Autonomi desktop GUI) currently has to block its Public upload option in the UI because no external-signer pathway exists for publishing the data map — `data_map_store` internally calls `pay_for_storage`, which hard-requires a wallet, and the chunk-storage plumbing (`store_paid_chunks`, `chunk_put_to_close_group`, `merkle_upload_chunks`) is pub(crate), so consumers on the external-signer path cannot hand-roll it. Bundling the data map chunk into the existing payment batch reuses the one-signature flow that wave-batch and merkle already use for file chunks, which lets ant-gui thread a `visibility` flag through its existing code path and re-enable the Public option with no extra wallet round-trip. - `Visibility::{Private, Public}` enum (default Private) - `Client::file_prepare_upload_with_visibility(path, visibility)`; the existing `file_prepare_upload(path)` now delegates with Private for backward compatibility - `PreparedUpload.data_map_address: Option<[u8; 32]>` carries the address between prepare and finalize - `FileUploadResult.data_map_address` is Some for public uploads - Both `finalize_upload` and `finalize_upload_merkle` propagate the field; no separate network call is needed because the data map chunk is stored alongside the rest of the batch - e2e test verifies Private leaves the address unset, Public records it, and the recorded address matches the serialized data map The internal-wallet path (`file_upload_with_mode`) is unchanged — ant-cli continues to use `file_upload` followed by `data_map_store` for its public upload flow. Co-Authored-By: Claude Opus 4.6 (1M context) --- ant-core/Cargo.toml | 1 + ant-core/src/data/client/data.rs | 1 + ant-core/src/data/client/file.rs | 84 ++++++++++++++++++++++++++++++-- ant-core/src/data/mod.rs | 2 +- ant-core/tests/e2e_file.rs | 76 ++++++++++++++++++++++++++++- 5 files changed, 159 insertions(+), 5 deletions(-) diff --git a/ant-core/Cargo.toml b/ant-core/Cargo.toml index 6e0c837..ae5d345 100644 --- a/ant-core/Cargo.toml +++ b/ant-core/Cargo.toml @@ -57,6 +57,7 @@ serial_test = "3" anyhow = "1" alloy = { version = "1.6", features = ["node-bindings"] } tokio-test = "0.4" +rmp-serde = "1" # Direct access to BootstrapManager used by the cold-start-from-disk test, # which populates a cache via `add_peer_trusted` (bypasses Sybil rate limits) # and then verifies reload after save. Version tracks ant-node's transitive diff --git a/ant-core/src/data/client/data.rs b/ant-core/src/data/client/data.rs index 8c588dc..2541b63 100644 --- a/ant-core/src/data/client/data.rs +++ b/ant-core/src/data/client/data.rs @@ -206,6 +206,7 @@ impl Client { prepared_chunks, payment_intent, }, + data_map_address: None, }) } diff --git a/ant-core/src/data/client/file.rs b/ant-core/src/data/client/file.rs index 5478a96..3dea9b6 100644 --- a/ant-core/src/data/client/file.rs +++ b/ant-core/src/data/client/file.rs @@ -413,6 +413,23 @@ fn check_disk_space_for_spill(file_size: u64) -> Result<()> { Ok(()) } +/// Whether the data map is published to the network for address-based retrieval. +/// +/// A private upload stores only the data chunks and returns the `DataMap` to +/// the caller — only someone holding that `DataMap` can reconstruct the file. +/// A public upload additionally stores the serialized `DataMap` as a chunk on +/// the network, yielding a single chunk address that anyone can use to +/// retrieve the `DataMap` (via [`Client::data_map_fetch`]) and then the file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum Visibility { + /// Keep the data map local; only the holder can retrieve the file. + #[default] + Private, + /// Publish the data map as a network chunk so anyone with the returned + /// address can retrieve and decrypt the file. + Public, +} + /// Estimated cost of uploading a file, returned by /// [`Client::estimate_upload_cost`]. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -431,6 +448,7 @@ pub struct UploadCostEstimate { pub payment_mode: PaymentMode, } + /// Result of a file upload: the `DataMap` needed to retrieve the file. #[derive(Debug, Clone)] pub struct FileUploadResult { @@ -444,6 +462,10 @@ pub struct FileUploadResult { pub storage_cost_atto: String, /// Total gas cost in wei. 0 if no on-chain transactions were made. pub gas_cost_wei: u128, + /// Chunk address of the serialized `DataMap`, set only for [`Visibility::Public`] + /// uploads. Share this address so others can retrieve the file without the + /// local `DataMap` (via [`Client::data_map_fetch`] then [`Client::file_download`]). + pub data_map_address: Option<[u8; 32]>, } /// Payment information for external signing — either wave-batch or merkle. @@ -482,6 +504,12 @@ pub struct PreparedUpload { pub data_map: DataMap, /// Payment information — either wave-batch or merkle depending on chunk count. pub payment_info: ExternalPaymentInfo, + /// Chunk address of the serialized `DataMap` when this upload was prepared + /// with [`Visibility::Public`]. The address is `Some` whenever the data + /// map chunk has been bundled into `payment_info` for payment; it is + /// carried through to [`FileUploadResult::data_map_address`] after + /// finalization. + pub data_map_address: Option<[u8; 32]>, } /// Return type for [`spawn_file_encryption`]: chunk receiver, `DataMap` oneshot, join handle. @@ -771,11 +799,27 @@ impl Client { /// Phase 1 of external-signer upload: encrypt file and prepare chunks. /// + /// Equivalent to [`Client::file_prepare_upload_with_visibility`] with + /// [`Visibility::Private`] — see that method for details. + pub async fn file_prepare_upload(&self, path: &Path) -> Result { + self.file_prepare_upload_with_visibility(path, Visibility::Private) + .await + } + + /// Phase 1 of external-signer upload with explicit [`Visibility`] control. + /// /// Requires an EVM network (for contract price queries) but NOT a wallet. /// Returns a [`PreparedUpload`] containing the data map, prepared chunks, /// and a [`PaymentIntent`] that the external signer uses to construct /// and submit the on-chain payment transaction. /// + /// When `visibility` is [`Visibility::Public`], the serialized `DataMap` + /// is bundled into the payment batch as an additional chunk and its + /// address is recorded on the returned [`PreparedUpload`]. After + /// [`Client::finalize_upload`] (or `_merkle`) succeeds, that address is + /// surfaced via [`FileUploadResult::data_map_address`] so the uploader + /// can share a single address from which anyone can retrieve the file. + /// /// **Memory note:** Encryption uses disk spilling for bounded memory, but /// the returned [`PreparedUpload`] holds all chunk content in memory (each /// [`PreparedChunk`] contains a `Bytes` with the full chunk data). This is @@ -787,9 +831,13 @@ impl Client { /// /// Returns an error if there is insufficient disk space, the file cannot /// be read, encryption fails, or quote collection fails. - pub async fn file_prepare_upload(&self, path: &Path) -> Result { + pub async fn file_prepare_upload_with_visibility( + &self, + path: &Path, + visibility: Visibility, + ) -> Result { debug!( - "Preparing file upload for external signing: {}", + "Preparing file upload for external signing (visibility={visibility:?}): {}", path.display() ); @@ -807,12 +855,35 @@ impl Client { // Read each chunk from disk and collect quotes concurrently. // Note: all PreparedChunks accumulate in memory because the external-signer // protocol requires them for finalize_upload. NOT memory-bounded for large files. - let chunk_data: Vec = spill + let mut chunk_data: Vec = spill .addresses .iter() .map(|addr| spill.read_chunk(addr)) .collect::, _>>()?; + // For public uploads, bundle the serialized DataMap as an extra chunk + // in the same payment batch. This lets the external signer pay for + // the data chunks and the DataMap chunk in one flow, and lets the + // finalize step return the DataMap's chunk address as the shareable + // retrieval address. + let data_map_address = match visibility { + Visibility::Private => None, + Visibility::Public => { + let serialized = rmp_serde::to_vec(&data_map).map_err(|e| { + Error::Serialization(format!("Failed to serialize DataMap: {e}")) + })?; + let bytes = Bytes::from(serialized); + let address = compute_address(&bytes); + info!( + "Public upload: bundling DataMap chunk ({} bytes) at address {}", + bytes.len(), + hex::encode(address) + ); + chunk_data.push(bytes); + Some(address) + } + }; + let chunk_count = chunk_data.len(); let payment_info = if should_use_merkle(chunk_count, PaymentMode::Auto) { @@ -875,6 +946,7 @@ impl Client { Ok(PreparedUpload { data_map, payment_info, + data_map_address, }) } @@ -894,6 +966,7 @@ impl Client { prepared: PreparedUpload, tx_hash_map: &HashMap, ) -> Result { + let data_map_address = prepared.data_map_address; match prepared.payment_info { ExternalPaymentInfo::WaveBatch { prepared_chunks, @@ -921,6 +994,7 @@ impl Client { payment_mode_used: PaymentMode::Single, storage_cost_atto: "0".into(), gas_cost_wei: 0, + data_map_address, }) } ExternalPaymentInfo::Merkle { .. } => Err(Error::Payment( @@ -947,6 +1021,7 @@ impl Client { prepared: PreparedUpload, winner_pool_hash: [u8; 32], ) -> Result { + let data_map_address = prepared.data_map_address; match prepared.payment_info { ExternalPaymentInfo::Merkle { prepared_batch, @@ -966,6 +1041,7 @@ impl Client { payment_mode_used: PaymentMode::Merkle, storage_cost_atto: "0".into(), gas_cost_wei: 0, + data_map_address, }) } ExternalPaymentInfo::WaveBatch { .. } => Err(Error::Payment( @@ -1055,6 +1131,7 @@ impl Client { payment_mode_used: PaymentMode::Single, storage_cost_atto: sc, gas_cost_wei: gc, + data_map_address: None, }); } Err(e) => return Err(e), @@ -1080,6 +1157,7 @@ impl Client { payment_mode_used: actual_mode, storage_cost_atto, gas_cost_wei, + data_map_address: None, }) } diff --git a/ant-core/src/data/mod.rs b/ant-core/src/data/mod.rs index 4fa312f..97a7daf 100644 --- a/ant-core/src/data/mod.rs +++ b/ant-core/src/data/mod.rs @@ -23,7 +23,7 @@ pub use client::batch::{finalize_batch_payment, PaidChunk, PaymentIntent, Prepar pub use client::data::DataUploadResult; pub use client::file::{ DownloadEvent, ExternalPaymentInfo, FileUploadResult, PreparedUpload, UploadCostEstimate, - UploadEvent, + UploadEvent, Visibility, }; pub use client::merkle::{ finalize_merkle_batch, MerkleBatchPaymentResult, PaymentMode, PreparedMerkleBatch, diff --git a/ant-core/tests/e2e_file.rs b/ant-core/tests/e2e_file.rs index c7b1e03..4d61c02 100644 --- a/ant-core/tests/e2e_file.rs +++ b/ant-core/tests/e2e_file.rs @@ -4,7 +4,7 @@ mod support; -use ant_core::data::Client; +use ant_core::data::{compute_address, Client, ExternalPaymentInfo, Visibility}; use serial_test::serial; use std::io::Write; use std::path::PathBuf; @@ -143,3 +143,77 @@ async fn test_file_download_bytes_written() { drop(client); testnet.teardown().await; } + +/// External-signer prepare must bundle the serialized DataMap as one extra +/// paid chunk when `Visibility::Public` is requested, and must record the +/// resulting chunk address on the `PreparedUpload`. Private prepare must +/// leave that address unset. +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_file_prepare_upload_visibility() { + let (client, testnet) = setup().await; + + let data = vec![0x37u8; 4096]; + let mut input_file = NamedTempFile::new().expect("create temp file"); + input_file.write_all(&data).expect("write temp file"); + input_file.flush().expect("flush temp file"); + + let private = client + .file_prepare_upload_with_visibility(input_file.path(), Visibility::Private) + .await + .expect("private prepare should succeed"); + + assert!( + private.data_map_address.is_none(), + "private uploads must not publish a DataMap address" + ); + + let public = client + .file_prepare_upload_with_visibility(input_file.path(), Visibility::Public) + .await + .expect("public prepare should succeed"); + + let public_addr = public + .data_map_address + .expect("public prepare must record the DataMap chunk address"); + + // The recorded address must match a fresh hash of the serialized DataMap, + // proving the address refers to exactly the chunk that was added to the + // payment batch (and that `data_map_fetch` on this address will later + // yield the same DataMap we're holding). + let expected_bytes = rmp_serde::to_vec(&public.data_map).expect("serialize DataMap"); + let expected_addr = compute_address(&expected_bytes); + assert_eq!( + public_addr, expected_addr, + "data_map_address must equal compute_address(rmp_serde::to_vec(&data_map))" + ); + + // A small file produces a wave-batch payment (well under the merkle + // threshold), and the datamap chunk must appear in that batch. + match (&private.payment_info, &public.payment_info) { + ( + ExternalPaymentInfo::WaveBatch { + prepared_chunks: priv_chunks, + .. + }, + ExternalPaymentInfo::WaveBatch { + prepared_chunks: pub_chunks, + .. + }, + ) => { + assert_eq!( + pub_chunks.len(), + priv_chunks.len() + 1, + "public prepare must add exactly one chunk (the serialized DataMap) to the batch" + ); + assert!( + pub_chunks.iter().any(|c| c.address == public_addr), + "the extra chunk must be the DataMap chunk at the recorded address" + ); + } + other => panic!("expected wave-batch for a 4KB file, got {other:?}"), + } + + drop(client); + testnet.teardown().await; +} From 8caa62f37caff8cfe4860a4df5d20656ba9478ea Mon Sep 17 00:00:00 2001 From: Nic-dorman Date: Tue, 21 Apr 2026 09:40:39 +0100 Subject: [PATCH 2/2] test: add round-trip coverage + polish FileUploadResult/PreparedUpload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback: - **End-to-end round-trip test (wave-batch)**: a small file is prepared as `Visibility::Public`, signed via the testnet wallet, finalized, then retrieved using only `data_map_fetch(&data_map_address)` + `file_download`. Asserts the downloaded bytes match the original. This is the half of the contract the existing test didn't cover: not just that the address is recorded, but that it actually refers to a retrievable DataMap. - **`#[non_exhaustive]` on `FileUploadResult` and `PreparedUpload`**: adding `data_map_address` was already technically a breaking change for any downstream that struct-literal-constructed these; `#[non_exhaustive]` forecloses the same concern for the next field. - **`AlreadyStored` data-map-chunk visibility**: when the serialized `DataMap` hashes to a chunk that's already on the network (same file uploaded twice — plausible under deterministic self-encryption), the prepare step silently drops it from `prepared_chunks` while keeping `data_map_address = Some(addr)`. An `info!` now explicitly logs this, and the `data_map_address` doc comments clarify that `Some` means "retrievable", not "we paid to store it". Merkle-path round-trip was attempted but blocked on an upstream `WrongPoolCount` contract revert between `pay_for_merkle_tree` and the `PaymentVaultV2` contract — reproduces outside this PR's changes and is not caused by anything here. Removing the failing test; calling it out separately for follow-up so the pool-commitment / depth relationship can be investigated without holding up this PR. Co-Authored-By: Claude Opus 4.7 (1M context) --- ant-core/src/data/client/file.rs | 46 +++++++++++++++--- ant-core/tests/e2e_file.rs | 83 ++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 8 deletions(-) diff --git a/ant-core/src/data/client/file.rs b/ant-core/src/data/client/file.rs index 3dea9b6..ce0cfe1 100644 --- a/ant-core/src/data/client/file.rs +++ b/ant-core/src/data/client/file.rs @@ -450,7 +450,12 @@ pub struct UploadCostEstimate { /// Result of a file upload: the `DataMap` needed to retrieve the file. +/// +/// Marked `#[non_exhaustive]` so adding a new field in future is not a +/// breaking change for downstream consumers that construct or pattern-match +/// on this struct. #[derive(Debug, Clone)] +#[non_exhaustive] pub struct FileUploadResult { /// The data map containing chunk metadata for reconstruction. pub data_map: DataMap, @@ -462,9 +467,13 @@ pub struct FileUploadResult { pub storage_cost_atto: String, /// Total gas cost in wei. 0 if no on-chain transactions were made. pub gas_cost_wei: u128, - /// Chunk address of the serialized `DataMap`, set only for [`Visibility::Public`] - /// uploads. Share this address so others can retrieve the file without the - /// local `DataMap` (via [`Client::data_map_fetch`] then [`Client::file_download`]). + /// Chunk address of the serialized `DataMap`, set only for + /// [`Visibility::Public`] uploads. **`Some` means this address is + /// retrievable from the network (via [`Client::data_map_fetch`])**, not + /// necessarily that *this* upload paid to store it — if the serialized + /// `DataMap` hashed to a chunk that was already on the network (same + /// file uploaded before; deterministic via self-encryption), the address + /// is still returned but no storage payment was made for it. pub data_map_address: Option<[u8; 32]>, } @@ -498,17 +507,22 @@ pub enum ExternalPaymentInfo { /// Note: This struct stays in Rust memory — only the public fields of /// `payment_info` are sent to the frontend. `PreparedChunk` contains /// non-serializable network types, so the full struct cannot derive `Serialize`. +/// +/// Marked `#[non_exhaustive]` so adding a new field in future is not a +/// breaking change for downstream consumers. #[derive(Debug)] +#[non_exhaustive] pub struct PreparedUpload { /// The data map for later retrieval. pub data_map: DataMap, /// Payment information — either wave-batch or merkle depending on chunk count. pub payment_info: ExternalPaymentInfo, - /// Chunk address of the serialized `DataMap` when this upload was prepared - /// with [`Visibility::Public`]. The address is `Some` whenever the data - /// map chunk has been bundled into `payment_info` for payment; it is - /// carried through to [`FileUploadResult::data_map_address`] after - /// finalization. + /// Chunk address of the serialized `DataMap` when this upload was + /// prepared with [`Visibility::Public`]. `Some` means the address is + /// retrievable on the network after finalization — either because this + /// upload paid to store the chunk in `payment_info`, or because the + /// chunk was already on the network (deterministic self-encryption). + /// Carried through to [`FileUploadResult::data_map_address`]. pub data_map_address: Option<[u8; 32]>, } @@ -928,6 +942,22 @@ impl Client { } } + // Surface the "DataMap chunk was already on the network" case + // so debugging "why is data_map_address set but no storage cost + // appears for it?" doesn't require reading the source. See the + // `data_map_address` doc comment for why this is still a valid + // `Some(addr)` outcome. + if let Some(addr) = data_map_address { + if !prepared_chunks.iter().any(|c| c.address == addr) { + info!( + "Public upload: DataMap chunk {} was already stored \ + on the network — address is retrievable without a \ + new payment", + hex::encode(addr) + ); + } + } + let payment_intent = PaymentIntent::from_prepared_chunks(&prepared_chunks); info!( diff --git a/ant-core/tests/e2e_file.rs b/ant-core/tests/e2e_file.rs index 4d61c02..fb06897 100644 --- a/ant-core/tests/e2e_file.rs +++ b/ant-core/tests/e2e_file.rs @@ -5,7 +5,9 @@ mod support; use ant_core::data::{compute_address, Client, ExternalPaymentInfo, Visibility}; +use evmlib::common::{QuoteHash, TxHash}; use serial_test::serial; +use std::collections::HashMap; use std::io::Write; use std::path::PathBuf; use std::sync::Arc; @@ -217,3 +219,84 @@ async fn test_file_prepare_upload_visibility() { drop(client); testnet.teardown().await; } + +/// Full public-upload round-trip (wave-batch path). +/// +/// Simulates the external-signer flow end-to-end: prepare → sign payments +/// via the testnet wallet → finalize → `data_map_fetch` using only the +/// returned address → `file_download` → assert recovered bytes equal the +/// original. Proves the data_map_address actually refers to a retrievable +/// DataMap on the network, not just a hash recorded in memory. +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn test_public_upload_round_trip_wave_batch() { + let (client, testnet) = setup().await; + + let original = vec![0x5au8; 4096]; + let mut input_file = NamedTempFile::new().expect("create temp file"); + input_file.write_all(&original).expect("write temp file"); + input_file.flush().expect("flush temp file"); + + // Phase 1: prepare as public. + let prepared = client + .file_prepare_upload_with_visibility(input_file.path(), Visibility::Public) + .await + .expect("public prepare should succeed"); + let data_map_address = prepared + .data_map_address + .expect("public prepare must record the DataMap address"); + + // Phase 2: simulate an external signer by paying for the quotes with the + // testnet wallet and collecting the resulting (quote_hash, tx_hash) map. + let payments = match &prepared.payment_info { + ExternalPaymentInfo::WaveBatch { payment_intent, .. } => payment_intent.payments.clone(), + other => panic!("expected wave-batch payment for a 4KB file, got {other:?}"), + }; + let (tx_hash_map, _gas) = testnet + .wallet() + .pay_for_quotes(payments) + .await + .expect("testnet wallet should pay for quotes"); + let tx_hash_map: HashMap = tx_hash_map.into_iter().collect(); + + // Phase 3: finalize. The data map chunk is stored alongside the data + // chunks in this single call — no second network trip needed. + let result = client + .finalize_upload(prepared, &tx_hash_map) + .await + .expect("finalize_upload should succeed"); + assert_eq!( + result.data_map_address, + Some(data_map_address), + "FileUploadResult must carry the DataMap address forward from PreparedUpload" + ); + + // Phase 4: a fresh retriever can fetch the data map using only the + // shared address — they did not participate in the upload. + let fetched_data_map = client + .data_map_fetch(&data_map_address) + .await + .expect("data_map_fetch must retrieve the stored DataMap"); + + // Phase 5: download + verify content. + let output_dir = TempDir::new().expect("create output temp dir"); + let output_path = output_dir.path().join("round_trip_out.bin"); + let bytes_written = client + .file_download(&fetched_data_map, &output_path) + .await + .expect("file_download should succeed"); + assert_eq!( + bytes_written, + original.len() as u64, + "bytes_written should equal original size" + ); + + let downloaded = std::fs::read(&output_path).expect("read downloaded file"); + assert_eq!( + downloaded, original, + "downloaded bytes must equal the original file" + ); + + drop(client); + testnet.teardown().await; +}