From fb79676c9892de9b39fadf97cc7f22e8790a49ad Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Fri, 13 Feb 2026 13:57:59 +1100 Subject: [PATCH 1/2] feat(storage): multi-copy upload with store->pull->commit flow Implement store->pull->commit flow for efficient multi-copy storage replication. Split operations API on StorageContext: - store(): upload data to SP, wait for parking confirmation - presignForCommit(): pre-sign EIP-712 extraData for pull + commit reuse - pull(): request SP-to-SP transfer from another provider - commit(): add pieces on-chain with optional pre-signed extraData - getPieceUrl(): get retrieval URL for SP-to-SP pulls StorageManager.upload() orchestration: - Default 2 copies (endorsed primary + any approved secondary) - Single-provider: store->commit flow - Multi-copy: store on primary, presign, pull to secondaries, commit all - Auto-retry failed secondaries with provider exclusion (up to 5 attempts) Provider selection: - Primary requires endorsed provider (throws if none reachable) - Secondaries use any approved provider from the pool - 2-tier selection per role: existing dataset, then new dataset Callback refinements: - Remove redundant onUploadComplete (use onStored instead) - onStored(providerId, pieceCid) - after data parked on provider - onPieceAdded(providerId, pieceCid) - after on-chain submission - onPieceConfirmed(providerId, pieceCid, pieceId) - after confirmation Type clarity: - Rename UploadOptions.metadata -> pieceMetadata (piece-level) - Rename CommitOptions.pieces[].metadata -> pieceMetadata - StoreError/CommitError carry providerId and endpoint for optional telemetry - New: CopyResult, FailedCopy for multi-copy transparency Implements #494 --- examples/cli/src/commands/upload.ts | 44 +- packages/synapse-sdk/src/errors/index.ts | 1 + packages/synapse-sdk/src/errors/storage.ts | 64 ++ packages/synapse-sdk/src/storage/context.ts | 959 +++++++++--------- packages/synapse-sdk/src/storage/manager.ts | 481 +++++++-- .../src/test/storage-upload.test.ts | 293 ++---- packages/synapse-sdk/src/test/storage.test.ts | 50 +- packages/synapse-sdk/src/test/synapse.test.ts | 123 +-- packages/synapse-sdk/src/types.ts | 241 ++++- utils/example-storage-e2e.js | 339 ++++--- 10 files changed, 1466 insertions(+), 1129 deletions(-) create mode 100644 packages/synapse-sdk/src/errors/index.ts create mode 100644 packages/synapse-sdk/src/errors/storage.ts diff --git a/examples/cli/src/commands/upload.ts b/examples/cli/src/commands/upload.ts index 7e43b476..da757162 100644 --- a/examples/cli/src/commands/upload.ts +++ b/examples/cli/src/commands/upload.ts @@ -1,12 +1,13 @@ import { open } from 'node:fs/promises' import path from 'node:path' import * as p from '@clack/prompts' +import type { PieceCID } from '@filoz/synapse-core/piece' import { createPieceUrlPDP } from '@filoz/synapse-core/utils' -import { Synapse } from '@filoz/synapse-sdk' +import { type PieceRecord, Synapse } from '@filoz/synapse-sdk' import { type Command, command } from 'cleye' +import type { Hex } from 'viem' import { privateKeyClient } from '../client.ts' import { globalFlags } from '../flags.ts' -import { hashLink } from '../utils.ts' export const upload: Command = command( { @@ -16,11 +17,6 @@ export const upload: Command = command( alias: 'u', flags: { ...globalFlags, - forceCreateDataSet: { - type: Boolean, - description: 'Force create a new data set', - default: false, - }, withCDN: { type: Boolean, description: 'Enable CDN', @@ -37,7 +33,7 @@ export const upload: Command = command( }, }, async (argv) => { - const { client, chain } = privateKeyClient(argv.flags.chain) + const { client } = privateKeyClient(argv.flags.chain) const filePath = argv._.requiredPath const absolutePath = path.resolve(filePath) @@ -50,7 +46,6 @@ export const upload: Command = command( p.log.step('Creating context...') const context = await synapse.storage.createContext({ - forceCreateDataSet: argv.flags.forceCreateDataSet, withCDN: argv.flags.withCDN, dataSetId: argv.flags.dataSetId, callbacks: { @@ -65,24 +60,39 @@ export const upload: Command = command( const data = fileHandle.readableWebStream() await context.upload(data, { - metadata: { + pieceMetadata: { name: path.basename(absolutePath), }, - onUploadComplete(pieceCid) { + onStored(providerId: bigint, pieceCid: PieceCID) { const url = createPieceUrlPDP({ cid: pieceCid.toString(), serviceURL: context.provider.pdp.serviceURL, }) - p.log.info(`Upload complete! ${url}`) + p.log.info(`Stored on provider ${providerId}! ${url}`) }, - onPiecesAdded(transactionHash) { - p.log.info(`Pieces added in tx ${hashLink(transactionHash, chain)}`) + onPiecesAdded( + transaction: Hex, + providerId: bigint, + pieces: { pieceCid: PieceCID }[] + ) { + p.log.info( + `Pieces added for provider ${providerId}, tx: ${transaction}` + ) + for (const { pieceCid } of pieces) { + p.log.info(` ${pieceCid}`) + } }, - onPiecesConfirmed(dataSetId, pieces) { - p.log.info(`Data set ${dataSetId} confirmed`) + onPiecesConfirmed( + dataSetId: bigint, + providerId: bigint, + pieces: PieceRecord[] + ) { p.log.info( - `Piece IDs: ${pieces.map(({ pieceId }) => pieceId).join(', ')}` + `Data set ${dataSetId} confirmed on provider ${providerId}` ) + for (const { pieceCid, pieceId } of pieces) { + p.log.info(` ${pieceCid} → pieceId ${pieceId}`) + } }, }) diff --git a/packages/synapse-sdk/src/errors/index.ts b/packages/synapse-sdk/src/errors/index.ts new file mode 100644 index 00000000..800b3f4a --- /dev/null +++ b/packages/synapse-sdk/src/errors/index.ts @@ -0,0 +1 @@ +export { CommitError, StoreError } from './storage.ts' diff --git a/packages/synapse-sdk/src/errors/storage.ts b/packages/synapse-sdk/src/errors/storage.ts new file mode 100644 index 00000000..5d550ccd --- /dev/null +++ b/packages/synapse-sdk/src/errors/storage.ts @@ -0,0 +1,64 @@ +import { isSynapseError, SynapseError, type SynapseErrorOptions } from '@filoz/synapse-core/errors' + +interface StorageErrorOptions extends SynapseErrorOptions { + providerId?: bigint + endpoint?: string +} + +/** + * Primary store failed - no data stored anywhere. + * Thrown when the initial upload to the primary provider fails. + */ +export class StoreError extends SynapseError { + override name: 'StoreError' = 'StoreError' + providerId?: string + endpoint?: string + + constructor(message: string, options?: StorageErrorOptions) { + super(message, options) + this.providerId = options?.providerId?.toString() + this.endpoint = options?.endpoint + } + + toJSON() { + return { + name: this.name, + message: this.message, + providerId: this.providerId, + endpoint: this.endpoint, + } + } + + static override is(value: unknown): value is StoreError { + return isSynapseError(value) && value.name === 'StoreError' + } +} + +/** + * All commits failed - data stored but not on-chain. + * Thrown when on-chain commit fails on every provider after successful store. + */ +export class CommitError extends SynapseError { + override name: 'CommitError' = 'CommitError' + providerId?: string + endpoint?: string + + constructor(message: string, options?: StorageErrorOptions) { + super(message, options) + this.providerId = options?.providerId?.toString() + this.endpoint = options?.endpoint + } + + toJSON() { + return { + name: this.name, + message: this.message, + providerId: this.providerId, + endpoint: this.endpoint, + } + } + + static override is(value: unknown): value is CommitError { + return isSynapseError(value) && value.name === 'CommitError' + } +} diff --git a/packages/synapse-sdk/src/storage/context.ts b/packages/synapse-sdk/src/storage/context.ts index fc10e0db..aa0118ef 100644 --- a/packages/synapse-sdk/src/storage/context.ts +++ b/packages/synapse-sdk/src/storage/context.ts @@ -6,7 +6,12 @@ * - Provider selection and data set creation/reuse * - PieceCID calculation and validation * - Payment rail setup through Warm Storage - * - Batched piece additions for efficiency + * + * The upload flow is decomposed into store -> pull -> commit: + * - store(): Upload data to SP (no on-chain state) + * - pull(): SP-to-SP transfer from another provider + * - commit(): Add piece to on-chain data set + * - upload(): Convenience that does store + commit * * @example * ```typescript @@ -23,19 +28,22 @@ */ import { asChain, type Chain as FilecoinChain } from '@filoz/synapse-core/chains' -import { getProviderIds } from '@filoz/synapse-core/endorsements' +import { getProviderIds as getEndorsedProviderIds } from '@filoz/synapse-core/endorsements' import { InvalidPieceCIDError } from '@filoz/synapse-core/errors' import * as PDPVerifier from '@filoz/synapse-core/pdp-verifier' import * as Piece from '@filoz/synapse-core/piece' import * as SP from '@filoz/synapse-core/sp' import { schedulePieceDeletion, type UploadPieceStreamingData } from '@filoz/synapse-core/sp' +import { signAddPieces, signCreateDataSetAndAddPieces } from '@filoz/synapse-core/typed-data' import { calculateLastProofDate, createPieceUrlPDP, + datasetMetadataObjectToEntry, epochToDate, type MetadataObject, pieceMetadataObjectToEntry, randIndex, + randU256, timeUntilEpoch, } from '@filoz/synapse-core/utils' import type { Account, Address, Chain, Client, Hash, Hex, Transport } from 'viem' @@ -43,8 +51,9 @@ import { getBlockNumber } from 'viem/actions' import { SPRegistryService } from '../sp-registry/index.ts' import type { Synapse } from '../synapse.ts' import type { + CommitOptions, + CommitResult, ContextCreateContextsOptions, - DataSetInfo, DownloadOptions, PDPProvider, PieceCID, @@ -52,13 +61,16 @@ import type { PieceStatus, PreflightInfo, ProviderSelectionResult, + PullOptions, + PullResult, StorageContextCreateOptions, StorageServiceOptions, - UploadCallbacks, + StoreOptions, + StoreResult, UploadOptions, UploadResult, } from '../types.ts' -import { createError, METADATA_KEYS, SIZE_CONSTANTS } from '../utils/index.ts' +import { createError, SIZE_CONSTANTS } from '../utils/index.ts' import { combineMetadata, metadataMatches } from '../utils/metadata.ts' import type { WarmStorageService } from '../warm-storage/index.ts' @@ -87,28 +99,10 @@ export class StorageContext { private readonly _pdpEndpoint: string private readonly _warmStorageService: WarmStorageService private readonly _withCDN: boolean - private readonly _uploadBatchSize: number private _dataSetId: bigint | undefined private _clientDataSetId: bigint | undefined private readonly _dataSetMetadata: Record - // AddPieces batching state - private _pendingPieces: Array<{ - pieceCid: PieceCID - resolve: (pieceId: bigint) => void - reject: (error: Error) => void - callbacks?: UploadCallbacks - metadata?: MetadataObject - }> = [] - - private _isProcessing: boolean = false - - // Upload tracking for batching (using symbols for simple idempotency) - private _activeUploads: Set = new Set() - // Timeout to wait before processing batch if there are other in-progress uploads, this allows - // more uploads to join our batch - private readonly _uploadBatchWaitTimeout: number = 15000 // 15 seconds, half Filecoin's blocktime - // Public properties from interface public readonly serviceProvider: Address @@ -169,12 +163,6 @@ export class StorageContext { } if (sizeBytes > SIZE_CONSTANTS.MAX_UPLOAD_SIZE) { - // This restriction is ~arbitrary for now, but there is a hard limit on PDP uploads in Curio - // of 254 MiB, see: https://github.com/filecoin-project/curio/blob/3ddc785218f4e237f0c073bac9af0b77d0f7125c/pdp/handlers_upload.go#L38 - // We can increase this in future, arbitrarily, but we first need to: - // - Handle streaming input. - // - Chunking input at size 254 MiB and make a separate piece per each chunk - // - Combine the pieces using "subPieces" and an aggregate PieceCID in our AddRoots call throw createError( 'StorageContext', context, @@ -196,7 +184,6 @@ export class StorageContext { this._provider = options.provider this._withCDN = options.options.withCDN ?? false this._warmStorageService = options.warmStorageService - this._uploadBatchSize = Math.max(1, options.options.uploadBatchSize ?? SIZE_CONSTANTS.DEFAULT_UPLOAD_BATCH_SIZE) this._dataSetMetadata = options.dataSetMetadata this._dataSetId = options.dataSetId this.serviceProvider = options.provider.serviceProvider @@ -204,57 +191,59 @@ export class StorageContext { } /** - * Creates new storage contexts with specified options - * Each context corresponds to a different data set + * Creates storage contexts with specified options. + * + * Three mutually exclusive modes: + * 1. `dataSetIds` provided: creates contexts for exactly those data sets + * 2. `providerIds` provided: creates contexts for exactly those providers + * 3. Neither provided: uses smart selection with `count` (default 2) */ static async createContexts(options: ContextCreateContextsOptions): Promise { - const count = options?.count ?? 2 - const resolutions: ProviderSelectionResult[] = [] const clientAddress = options.synapse.client.account.address const spRegistry = new SPRegistryService({ client: options.synapse.client }) - if (options.dataSetIds) { - const selections = [] - for (const dataSetId of new Set(options.dataSetIds)) { - selections.push( - StorageContext.resolveByDataSetId(dataSetId, options.warmStorageService, spRegistry, clientAddress, { - withCDN: options.withCDN, - metadata: options.metadata, - }) - ) - if (selections.length >= count) { - break - } - } - resolutions.push(...(await Promise.all(selections))) + + const hasDataSetIds = options.dataSetIds != null && options.dataSetIds.length > 0 + const hasProviderIds = options.providerIds != null && options.providerIds.length > 0 + + if (hasDataSetIds && hasProviderIds) { + throw createError( + 'StorageContext', + 'createContexts', + "Cannot specify both 'dataSetIds' and 'providerIds' - use one or the other" + ) } - const resolvedProviderIds = resolutions.map((resolution) => resolution.provider.id) - if (resolutions.length < count) { - if (options.providerIds) { - const selections = [] - // NOTE: Set.difference is unavailable in some targets - for (const providerId of [...new Set(options.providerIds)].filter( - (providerId) => !resolvedProviderIds.includes(providerId) - )) { - selections.push( - StorageContext.resolveByProviderId( - clientAddress, - providerId, - options.metadata ?? {}, - options.warmStorageService, - spRegistry, - options.forceCreateDataSets - ) + + let resolutions: ProviderSelectionResult[] = [] + + // Resolve explicit data set IDs (deduplicated) + if (hasDataSetIds) { + const uniqueDataSetIds = [...new Set(options.dataSetIds)] + resolutions = await Promise.all( + uniqueDataSetIds.map((dataSetId) => + StorageContext.resolveByDataSetId(dataSetId, options.warmStorageService, spRegistry, clientAddress) + ) + ) + } else if (hasProviderIds) { + // Resolve explicit provider IDs (deduplicated) + const uniqueProviderIds = [...new Set(options.providerIds)] + resolutions = await Promise.all( + uniqueProviderIds.map((providerId) => + StorageContext.resolveByProviderId( + clientAddress, + providerId, + options.metadata ?? {}, + options.warmStorageService, + spRegistry ) - resolvedProviderIds.push(providerId) - if (selections.length + resolutions.length >= count) { - break - } - } - resolutions.push(...(await Promise.all(selections))) - } + ) + ) } + + // Fill remaining slots via smart selection if count exceeds explicit resolutions + const count = options.count ?? (resolutions.length > 0 ? resolutions.length : 2) if (resolutions.length < count) { - const excludeProviderIds = [...(options.excludeProviderIds ?? []), ...resolvedProviderIds] + const excludeProviderIds = [...(options.excludeProviderIds ?? []), ...resolutions.map((r) => r.provider.id)] + for (let i = resolutions.length; i < count; i++) { try { const resolution = await StorageContext.smartSelectProvider( @@ -263,8 +252,7 @@ export class StorageContext { options.warmStorageService, spRegistry, excludeProviderIds, - resolutions.length === 0 ? await getProviderIds(options.synapse.client) : new Set(), - options.forceCreateDataSets ?? false + resolutions.length === 0 ? await getEndorsedProviderIds(options.synapse.client) : new Set() ) excludeProviderIds.push(resolution.provider.id) resolutions.push(resolution) @@ -276,6 +264,7 @@ export class StorageContext { } } } + return await Promise.all( resolutions.map( async (resolution) => @@ -294,10 +283,8 @@ export class StorageContext { * Handles provider selection and data set selection/creation */ static async create(options: StorageContextCreateOptions): Promise { - // Create SPRegistryService const spRegistry = new SPRegistryService({ client: options.synapse.client }) - // Resolve provider and data set based on options const resolution = await StorageContext.resolveProviderAndDataSet( options.synapse, options.warmStorageService, @@ -323,7 +310,6 @@ export class StorageContext { try { options.callbacks?.onProviderSelected?.(resolution.provider) } catch (error) { - // Log but don't propagate callback errors console.error('Error in onProviderSelected callback:', error) } @@ -347,7 +333,6 @@ export class StorageContext { /** * Resolve provider and data set based on provided options - * Uses lazy loading to minimize RPC calls */ private static async resolveProviderAndDataSet( synapse: Synapse, @@ -356,21 +341,27 @@ export class StorageContext { options: StorageServiceOptions ): Promise { const clientAddress = synapse.client.account.address + const requestedMetadata = combineMetadata(options.metadata, options.withCDN) // Handle explicit data set ID selection (highest priority) - if (options.dataSetId != null && options.forceCreateDataSet !== true) { - return await StorageContext.resolveByDataSetId( + if (options.dataSetId != null) { + const result = await StorageContext.resolveByDataSetId( options.dataSetId, warmStorageService, spRegistry, - clientAddress, - options + clientAddress ) + // Validate that the data set's provider matches the requested provider + if (options.providerId != null && result.provider.id !== options.providerId) { + throw createError( + 'StorageContext', + 'resolveProviderAndDataSet', + `Data set ${options.dataSetId} belongs to provider ID ${result.provider.id}, but provider ID ${options.providerId} was requested` + ) + } + return result } - // Convert options to metadata format - merge withCDN flag into metadata if needed - const requestedMetadata = combineMetadata(options.metadata, options.withCDN) - // Handle explicit provider ID selection if (options.providerId != null) { return await StorageContext.resolveByProviderId( @@ -378,20 +369,7 @@ export class StorageContext { options.providerId, requestedMetadata, warmStorageService, - spRegistry, - options.forceCreateDataSet - ) - } - - // Handle explicit provider address selection - if (options.providerAddress != null) { - return await StorageContext.resolveByProviderAddress( - options.providerAddress, - warmStorageService, - spRegistry, - clientAddress, - requestedMetadata, - options.forceCreateDataSet + spRegistry ) } @@ -402,8 +380,7 @@ export class StorageContext { warmStorageService, spRegistry, options.excludeProviderIds ?? [], - new Set(), - options.forceCreateDataSet ?? false + new Set() ) } @@ -414,17 +391,10 @@ export class StorageContext { dataSetId: bigint, warmStorageService: WarmStorageService, spRegistry: SPRegistryService, - clientAddress: string, - options: StorageServiceOptions + clientAddress: string ): Promise { const [dataSetInfo, dataSetMetadata] = await Promise.all([ - warmStorageService.getDataSet({ dataSetId }).then(async (dataSetInfo) => { - if (dataSetInfo == null) { - return null - } - await StorageContext.validateDataSetConsistency(dataSetInfo, options, spRegistry) - return dataSetInfo - }), + warmStorageService.getDataSet({ dataSetId }), warmStorageService.getDataSetMetadata({ dataSetId }), warmStorageService.validateDataSet({ dataSetId }), ]) @@ -450,16 +420,6 @@ export class StorageContext { ) } - const withCDN = dataSetInfo.cdnRailId > 0 && METADATA_KEYS.WITH_CDN in dataSetMetadata - if (options.withCDN != null && withCDN !== options.withCDN) { - throw createError( - 'StorageContext', - 'resolveByDataSetId', - `Data set ${dataSetId} has CDN ${withCDN ? 'enabled' : 'disabled'}, ` + - `but requested ${options.withCDN ? 'enabled' : 'disabled'}` - ) - } - return { provider, dataSetId, @@ -468,53 +428,13 @@ export class StorageContext { } } - /** - * Validate data set consistency with provided options - */ - private static async validateDataSetConsistency( - dataSet: DataSetInfo, - options: StorageServiceOptions, - spRegistry: SPRegistryService - ): Promise { - // Validate provider ID if specified - if (options.providerId != null) { - if (dataSet.providerId !== options.providerId) { - throw createError( - 'StorageContext', - 'validateDataSetConsistency', - `Data set belongs to provider ID ${dataSet.providerId}, but provider ID ${options.providerId} was requested` - ) - } - } - - // Validate provider address if specified - if (options.providerAddress != null) { - // Look up the actual provider to get its serviceProvider address - const actualProvider = await spRegistry.getProvider({ providerId: dataSet.providerId }) - if ( - actualProvider == null || - actualProvider.serviceProvider.toLowerCase() !== options.providerAddress.toLowerCase() - ) { - throw createError( - 'StorageContext', - 'validateDataSetConsistency', - `Data set belongs to provider ${actualProvider?.serviceProvider ?? 'unknown'}, but provider ${options.providerAddress} was requested` - ) - } - } - } - /** * Resolve the best matching DataSet for a Provider using a specific provider ID * - * Optimization Strategy: - * Uses `getClientDataSets` fetch followed by batched parallel checks to find - * the best matching data set while minimizing RPC calls. - * * Selection Logic: * 1. Filters for datasets belonging to this provider * 2. Sorts by dataSetId ascending (oldest first) - * 3. Searches in batches (size dynamic based on total count) for metadata match + * 3. Searches in batches for metadata match * 4. Prioritizes datasets with pieces > 0, then falls back to the oldest valid dataset * 5. Exits early as soon as a non-empty matching dataset is found */ @@ -523,28 +443,17 @@ export class StorageContext { providerId: bigint, requestedMetadata: Record, warmStorageService: WarmStorageService, - spRegistry: SPRegistryService, - forceCreateDataSet?: boolean + spRegistry: SPRegistryService ): Promise { - // Fetch provider (always) and dataSets (only if not forcing) in parallel const [provider, dataSets] = await Promise.all([ spRegistry.getProvider({ providerId }), - forceCreateDataSet ? Promise.resolve([]) : warmStorageService.getClientDataSets({ address: clientAddress }), + warmStorageService.getClientDataSets({ address: clientAddress }), ]) if (provider == null) { throw createError('StorageContext', 'resolveByProviderId', `Provider ID ${providerId} not found in registry`) } - if (forceCreateDataSet === true) { - return { - provider, - dataSetId: -1n, // Marker for new data set - isExisting: false, - dataSetMetadata: requestedMetadata, - } - } - // Filter for this provider's active datasets const providerDataSets = dataSets.filter( (dataSet) => dataSet.dataSetId && dataSet.providerId === provider.id && dataSet.pdpEndEpoch === 0n @@ -561,7 +470,6 @@ export class StorageContext { return Number(a.dataSetId) - Number(b.dataSetId) }) - // Batch strategy: 1/3 of total datasets per batch, with min & max, to balance latency vs RPC burst const MIN_BATCH_SIZE = 50 const MAX_BATCH_SIZE = 200 const BATCH_SIZE = Math.min(MAX_BATCH_SIZE, Math.max(MIN_BATCH_SIZE, Math.ceil(sortedDataSets.length / 3), 1)) @@ -600,19 +508,16 @@ export class StorageContext { for (const result of batchResults) { if (result == null) continue - // select the first dataset with pieces and break out of the inner loop if (result.activePieceCount > 0) { selectedDataSet = result break } - // keep the first (oldest) dataset found so far (no pieces) if (selectedDataSet == null) { selectedDataSet = result } } - // early exit if we found a dataset with pieces; break out of the outer loop if (selectedDataSet != null && selectedDataSet.activePieceCount > 0) { break } @@ -627,7 +532,6 @@ export class StorageContext { } } - // Need to create new data set return { provider, dataSetId: -1n, // Marker for new data set @@ -637,40 +541,24 @@ export class StorageContext { } /** - * Resolve using a specific provider address - */ - private static async resolveByProviderAddress( - providerAddress: Address, - warmStorageService: WarmStorageService, - spRegistry: SPRegistryService, - clientAddress: Address, - requestedMetadata: Record, - forceCreateDataSet?: boolean - ): Promise { - // Get provider by address - const provider = await spRegistry.getProviderByAddress({ address: providerAddress }) - if (provider == null) { - throw createError( - 'StorageContext', - 'resolveByProviderAddress', - `Provider ${providerAddress} not found in registry` - ) - } - - // Use the providerId resolution logic - return await StorageContext.resolveByProviderId( - clientAddress, - provider.id, - requestedMetadata, - warmStorageService, - spRegistry, - forceCreateDataSet - ) - } - - /** - * Smart provider selection algorithm - * Prioritizes existing data sets and provider health + * Select a provider and optionally an existing data set for storage. + * + * Selection is 2-tier per role. Tier 1 prefers existing data sets (deterministic, + * sorted by piece count then data set ID). Tier 2 creates a new data set with a + * random provider. All candidates are ping-validated before selection. + * + * Role is determined by {@link endorsedProviderIds}: non-empty restricts to endorsed + * providers only (primary) and throws if none reachable; empty allows any approved + * provider (secondary). + * + * @param clientAddress - Wallet address to look up existing data sets for + * @param requestedMetadata - Dataset metadata filter; only data sets with matching metadata are considered + * @param warmStorageService - Service for data set and provider lookups + * @param spRegistry - Registry for provider details and PDP endpoints + * @param excludeProviderIds - Provider IDs to skip (already used by other contexts) + * @param endorsedProviderIds - Endorsed provider IDs; non-empty = primary (endorsed-only), empty = secondary (any approved) + * @returns Resolved provider, data set ID (-1n if new), and metadata + * @throws When no eligible provider passes health check */ private static async smartSelectProvider( clientAddress: Address, @@ -678,131 +566,151 @@ export class StorageContext { warmStorageService: WarmStorageService, spRegistry: SPRegistryService, excludeProviderIds: bigint[], - endorsedProviderIds: Set, - forceCreateDataSet: boolean + endorsedProviderIds: Set ): Promise { - // Strategy: - // 1. Try to find existing data sets first - // 2. If no existing data sets, find a healthy provider - - // Get client's data sets const dataSets = await warmStorageService.getClientDataSetsWithDetails({ address: clientAddress }) const skipProviderIds = new Set(excludeProviderIds) - // Filter for managed data sets with matching metadata const managedDataSets = dataSets.filter( (ps) => ps.isLive && ps.isManaged && ps.pdpEndEpoch === 0n && - metadataMatches(ps.metadata, requestedMetadata) && - !skipProviderIds.has(ps.providerId) + !skipProviderIds.has(ps.providerId) && + metadataMatches(ps.metadata, requestedMetadata) ) - if (managedDataSets.length > 0 && !forceCreateDataSet) { - // Prefer data sets with pieces, sort by ID (older first) - const sorted = managedDataSets.sort((a, b) => { + type DataSetWithDetails = (typeof managedDataSets)[number] + const sortDataSets = (sets: DataSetWithDetails[]): DataSetWithDetails[] => + [...sets].sort((a, b) => { if (a.activePieceCount > 0n && b.activePieceCount === 0n) return -1 if (b.activePieceCount > 0n && a.activePieceCount === 0n) return 1 return Number(a.pdpVerifierDataSetId - b.pdpVerifierDataSetId) }) - // Create async generator that yields providers lazily - async function* generateProviders(): AsyncGenerator { - // First, yield providers from existing data sets (in sorted order) - for (const dataSet of sorted) { - if (skipProviderIds.has(dataSet.providerId)) { - continue - } - skipProviderIds.add(dataSet.providerId) + const createDataSetProviderGenerator = (sets: DataSetWithDetails[]) => + async function* (): AsyncGenerator { + const yieldedProviders = new Set() + for (const dataSet of sets) { + if (yieldedProviders.has(dataSet.providerId)) continue + yieldedProviders.add(dataSet.providerId) const provider = await spRegistry.getProvider({ providerId: dataSet.providerId }) - if (provider == null) { console.warn( `Provider ID ${dataSet.providerId} for data set ${dataSet.pdpVerifierDataSetId} is not currently approved` ) continue } - yield provider } } - const selectedProvider = await StorageContext.selectProviderWithPing(generateProviders()) - - if (selectedProvider != null) { - // Find the first matching data set ID for this provider - // Match by provider ID (stable identifier in the registry) - const matchingDataSet = sorted.find((ps) => ps.providerId === selectedProvider.id) - - if (matchingDataSet == null) { - console.warn( - `Could not match selected provider ${selectedProvider.serviceProvider} (ID: ${selectedProvider.id}) ` + - `to existing data sets. Falling back to selecting from all providers.` - ) - // Fall through to select from all approved providers below - } else { - // Fetch metadata for existing data set - const dataSetMetadata = await warmStorageService.getDataSetMetadata({ - dataSetId: matchingDataSet.pdpVerifierDataSetId, - }) - - return { - provider: selectedProvider, - dataSetId: matchingDataSet.pdpVerifierDataSetId, - isExisting: true, - dataSetMetadata, - } + const createResultFromDataSet = async ( + provider: PDPProvider, + sets: DataSetWithDetails[] + ): Promise => { + const matchingDataSet = sets.find((ps) => ps.providerId === provider.id) + if (matchingDataSet == null) { + console.warn( + `Could not match selected provider ${provider.serviceProvider} (ID: ${provider.id}) ` + + `to existing data sets. Falling back to new data set.` + ) + return { + provider, + dataSetId: -1n, + isExisting: false, + dataSetMetadata: requestedMetadata, } } + const dataSetMetadata = await warmStorageService.getDataSetMetadata({ + dataSetId: matchingDataSet.pdpVerifierDataSetId, + }) + return { + provider, + dataSetId: matchingDataSet.pdpVerifierDataSetId, + isExisting: true, + dataSetMetadata, + } } - // No existing data sets - select from all approved providers. First we get approved IDs from - // WarmStorage, then fetch provider details. + const createNewDataSetResult = (provider: PDPProvider): ProviderSelectionResult => ({ + provider, + dataSetId: -1n, + isExisting: false, + dataSetMetadata: requestedMetadata, + }) + + const isPrimarySelection = endorsedProviderIds.size > 0 + + // Fetch approved providers (needed for both paths) const approvedIds = await warmStorageService.getApprovedProviderIds() const approvedProviders = await spRegistry.getProviders({ providerIds: approvedIds }) - const allProviders = approvedProviders.filter((provider: PDPProvider) => !excludeProviderIds.includes(provider.id)) - - if (allProviders.length === 0) { - throw createError('StorageContext', 'smartSelectProvider', NO_REMAINING_PROVIDERS_ERROR_MESSAGE) - } + const allProviders = approvedProviders.filter((p: PDPProvider) => !excludeProviderIds.includes(p.id)) + + if (isPrimarySelection) { + // Primary: endorsed providers only, no fallback to non-endorsed + const endorsedDataSets = managedDataSets.filter((ds) => endorsedProviderIds.has(ds.providerId)) + + // Tier 1: Existing data sets with endorsed providers + if (endorsedDataSets.length > 0) { + const sorted = sortDataSets(endorsedDataSets) + const provider = await StorageContext.selectProviderWithPing(createDataSetProviderGenerator(sorted)()) + if (provider != null) { + return await createResultFromDataSet(provider, sorted) + } + } - let provider: PDPProvider | null - if (endorsedProviderIds.size > 0) { - // Split providers according to whether they have all of the endorsements - const [otherProviders, endorsedProviders] = allProviders.reduce<[PDPProvider[], PDPProvider[]]>( - (results: [PDPProvider[], PDPProvider[]], provider: PDPProvider) => { - results[endorsedProviderIds.has(provider.id) ? 1 : 0].push(provider) - return results - }, - [[], []] - ) - provider = - (await StorageContext.selectRandomProvider(endorsedProviders)) || - (await StorageContext.selectRandomProvider(otherProviders)) - } else { - // Random selection from all providers - provider = await StorageContext.selectRandomProvider(allProviders) - } + // Tier 2: New data set with endorsed provider + const endorsedProviders = allProviders.filter((p: PDPProvider) => endorsedProviderIds.has(p.id)) + if (endorsedProviders.length > 0) { + const provider = await StorageContext.selectRandomProvider(endorsedProviders) + if (provider != null) { + return createNewDataSetResult(provider) + } + } - if (provider == null) { + // All endorsed providers exhausted, no fall back to non-endorsed, this is a FOC system-level failure for the user + const endorsedCount = [...endorsedProviderIds].filter((id) => !excludeProviderIds.includes(id)).length throw createError( 'StorageContext', - 'selectProviderWithPing', - `All ${allProviders.length} providers failed health check. Storage may be temporarily unavailable.` + 'smartSelectProvider', + endorsedCount > 0 + ? `No endorsed provider available — all ${endorsedCount} endorsed provider(s) failed health check` + : 'No endorsed provider available' ) } - return { - provider, - dataSetId: -1n, // Marker for new data set - isExisting: false, - dataSetMetadata: requestedMetadata, + // Secondary: any approved provider + // Tier 1: Existing data sets with any approved provider + if (managedDataSets.length > 0) { + const sorted = sortDataSets(managedDataSets) + const provider = await StorageContext.selectProviderWithPing(createDataSetProviderGenerator(sorted)()) + if (provider != null) { + return await createResultFromDataSet(provider, sorted) + } } + + // Tier 2: New data set with any approved provider + if (allProviders.length > 0) { + const provider = await StorageContext.selectRandomProvider(allProviders) + if (provider != null) { + return createNewDataSetResult(provider) + } + } + + if (allProviders.length === 0) { + throw createError('StorageContext', 'smartSelectProvider', NO_REMAINING_PROVIDERS_ERROR_MESSAGE) + } + throw createError( + 'StorageContext', + 'smartSelectProvider', + `All ${allProviders.length} approved provider(s) failed health check` + ) } /** - * Select a random provider from a list with ping validation + * Select a random provider from a list with ping validation. + * * @param providers - Array of providers to select from * @returns Selected provider */ @@ -811,12 +719,9 @@ export class StorageContext { return null } - // Create async generator that yields providers in random order async function* generateRandomProviders(): AsyncGenerator { const remaining = [...providers] - while (remaining.length > 0) { - // Remove and yield the selected provider const selected = remaining.splice(randIndex(remaining.length), 1)[0] yield selected } @@ -828,12 +733,12 @@ export class StorageContext { /** * Select a provider from an async iterator with ping validation. * This is shared logic used by both smart selection and random selection. + * * @param providers - Async iterable of providers to try * @returns The first provider that responds * @throws If all providers fail */ private static async selectProviderWithPing(providers: AsyncIterable): Promise { - // Try providers in order until we find one that responds to ping for await (const provider of providers) { try { await SP.ping(provider.pdp.serviceURL) @@ -843,7 +748,6 @@ export class StorageContext { `Provider ${provider.serviceProvider} failed ping test:`, error instanceof Error ? error.message : String(error) ) - // Continue to next provider } } @@ -852,6 +756,7 @@ export class StorageContext { /** * Static method to perform preflight checks for an upload + * * @param options - Options for the preflight check * @param options.size - The size of data to upload in bytes * @param options.withCDN - Whether CDN is enabled @@ -864,13 +769,10 @@ export class StorageContext { warmStorageService: WarmStorageService }): Promise { const { size, withCDN, warmStorageService } = options - // Validate size before proceeding StorageContext.validateRawSize({ sizeBytes: options.size, context: 'preflightUpload' }) - // Check allowances and get costs in a single call const allowanceCheck = await warmStorageService.checkAllowanceForStorage({ sizeInBytes: BigInt(size), withCDN }) - // Return preflight info return { estimatedCost: { perEpoch: allowanceCheck.costs.perEpoch, @@ -894,240 +796,325 @@ export class StorageContext { * @returns Preflight information including costs and allowances */ async preflightUpload(options: { size: number }): Promise { - // Use the static method for core logic - const preflightResult = await StorageContext.performPreflightCheck({ + return await StorageContext.performPreflightCheck({ size: options.size, withCDN: this._withCDN, warmStorageService: this._warmStorageService, }) - - // Return preflight info with provider and dataSet specifics - return preflightResult } + // ========================================================================== + // Split Upload Flow: store -> pull -> commit + // ========================================================================== + /** - * Upload data to the service provider + * Store data on the service provider without committing on-chain. * - * Accepts Uint8Array or ReadableStream. - * For large files, prefer streaming to minimize memory usage. + * First step of the split upload flow: store -> pull -> commit. + * After storing, the piece is "parked" on the provider and ready for + * pulling to other providers via pull(), on-chain commitment via commit(), + * or retrieval via getPieceUrl() (not yet committed; eligible for GC). * - * Note: When uploading to multiple contexts, pieceCid should be pre-calculated and passed in options - * to avoid redundant computation. For streaming uploads, pieceCid must be provided in options as it - * cannot be calculated without consuming the stream. + * @param data - Raw bytes or readable stream to upload + * @param options - Optional pieceCid (skip CommP), signal, and onProgress callback + * @returns PieceCid and size of the stored piece */ - async upload(data: UploadPieceStreamingData, options?: UploadOptions): Promise { - performance.mark('synapse:upload-start') - - // Validation Phase: Check data size and calculate pieceCid - const pieceCid = options?.pieceCid - // Note: Size is unknown for streams (size will be undefined) - - // Track this upload for batching purposes - const uploadId = Symbol('upload') - this._activeUploads.add(uploadId) + async store(data: UploadPieceStreamingData, options?: StoreOptions): Promise { + if (data instanceof Uint8Array) { + StorageContext.validateRawSize({ sizeBytes: data.length, context: 'store' }) + } + let uploadResult: SP.uploadPieceStreaming.OutputType try { - let uploadResult: SP.uploadPieceStreaming.OutputType - // Upload Phase: Upload data to service provider - try { - uploadResult = await SP.uploadPieceStreaming({ - serviceURL: this._pdpEndpoint, - data, - ...options, - pieceCid, - }) - } catch (error) { - throw createError('StorageContext', 'uploadPiece', 'Failed to upload piece to service provider', error) - } + uploadResult = await SP.uploadPieceStreaming({ + serviceURL: this._pdpEndpoint, + data, + pieceCid: options?.pieceCid, + signal: options?.signal, + onProgress: options?.onProgress, + }) + } catch (error) { + throw createError('StorageContext', 'store', 'Failed to store piece on service provider', error) + } - // Poll for piece to be "parked" (ready) + try { await SP.findPiece({ serviceURL: this._pdpEndpoint, pieceCid: uploadResult.pieceCid, retry: true, + signal: options?.signal, }) + } catch (error) { + throw createError('StorageContext', 'store', 'Failed to confirm piece storage', error) + } + + return { + pieceCid: uploadResult.pieceCid, + size: uploadResult.size, + } + } + + /** + * Pre-sign EIP-712 extraData for the given pieces. + * + * The returned Hex can be passed to both pull() and commit() to avoid + * redundant wallet signature prompts during multi-copy uploads. + * + * @param pieces - Pieces to sign for, with optional per-piece metadata + * @returns Signed extraData hex to pass to pull() or commit() + */ + async presignForCommit(pieces: Array<{ pieceCid: PieceCID; pieceMetadata?: MetadataObject }>): Promise { + const signingPieces = pieces.map((p) => ({ + pieceCid: p.pieceCid, + metadata: pieceMetadataObjectToEntry(p.pieceMetadata), + })) + + if (this._dataSetId) { + return signAddPieces(this._synapse.client, { + clientDataSetId: await this.getClientDataSetId(), + pieces: signingPieces, + }) + } + + return signCreateDataSetAndAddPieces(this._synapse.client, { + clientDataSetId: randU256(), + payee: this._provider.serviceProvider, + payer: this._synapse.client.account.address, + metadata: datasetMetadataObjectToEntry(this._dataSetMetadata, { + cdn: this._withCDN, + }), + pieces: signingPieces, + }) + } - // Upload phase complete - remove from active tracking - this._activeUploads.delete(uploadId) + /** + * Request this provider to pull pieces from another provider. + * + * Used for multi-copy uploads: data stored once on primary, then pulled to + * secondaries via SP-to-SP transfer. + * + * @param options - Pull options: pieces to pull, source (URL or StorageContext), optional extraData, signal, and onProgress + * @returns Status per piece ('complete' or 'failed') and overall result + */ + async pull(options: PullOptions): Promise { + const { pieces, from, signal, onProgress, extraData } = options - // Notify upload complete - if (options?.onUploadComplete != null) { - options.onUploadComplete(uploadResult.pieceCid) + const getSourceUrl = (pieceCid: PieceCID): string => { + if (typeof from === 'string') { + return createPieceUrlPDP({ cid: pieceCid.toString(), serviceURL: from }) } + return from.getPieceUrl(pieceCid) + } - // Add Piece Phase: Queue the AddPieces operation for sequential processing + const pullPiecesInput = pieces.map((pieceCid) => ({ + pieceCid, + sourceUrl: getSourceUrl(pieceCid), + })) + + const handleProgressResponse = onProgress + ? (response: SP.PullResponse) => { + for (const piece of response.pieces) { + const pieceCid = pieces.find((p) => p.toString() === piece.pieceCid) + if (pieceCid) { + onProgress(pieceCid, piece.status) + } + } + } + : undefined - // Validate metadata early (before queueing) to fail fast - if (options?.metadata != null) { - pieceMetadataObjectToEntry(options.metadata) + try { + const sharedOptions = { + serviceURL: this._pdpEndpoint, + pieces: pullPiecesInput, + signal, + onStatus: handleProgressResponse, + extraData, } - const finalPieceId = await new Promise((resolve, reject) => { - // Add to pending batch - this._pendingPieces.push({ - pieceCid: uploadResult.pieceCid, - resolve, - reject, - callbacks: options, - metadata: options?.metadata, - }) + const pullOptions = this._dataSetId + ? { + ...sharedOptions, + dataSetId: this._dataSetId, + clientDataSetId: await this.getClientDataSetId(), + } + : { + ...sharedOptions, + payee: this._provider.serviceProvider, + payer: this._synapse.client.account.address, + cdn: this._withCDN, + metadata: this._dataSetMetadata, + } + + const response = await SP.waitForPullStatus(this._synapse.client, pullOptions as SP.waitForPullStatus.OptionsType) - // Debounce: defer processing to next event loop tick - // This allows multiple synchronous upload() calls to queue up before processing - setTimeout(() => { - void this._processPendingPieces().catch((error) => { - console.error('Failed to process pending pieces batch:', error) - }) - }, 0) + const pieceResults = response.pieces.map((piece: { pieceCid: string; status: string }) => { + const pieceCid = pieces.find((p) => p.toString() === piece.pieceCid) + return { + pieceCid: pieceCid as PieceCID, + status: piece.status === 'complete' ? ('complete' as const) : ('failed' as const), + } }) - // Return upload result - performance.mark('synapse:upload-end') - performance.measure('synapse:upload', 'synapse:upload-start', 'synapse:upload-end') + const allComplete = pieceResults.every((p: { status: string }) => p.status === 'complete') + return { - pieceCid: uploadResult.pieceCid, - size: uploadResult.size, - pieceId: finalPieceId, + status: allComplete ? 'complete' : 'failed', + pieces: pieceResults, } - } finally { - this._activeUploads.delete(uploadId) + } catch (error) { + throw createError('StorageContext', 'pull', 'Failed to pull pieces from source provider', error) } } /** - * Process pending pieces by batching them into a single AddPieces operation - * This method is called from the promise queue to ensure sequential execution + * Commit pieces on-chain by calling AddPieces (or CreateDataSetAndAddPieces). + * + * Pieces must be stored on the provider (via store() or pull()) before committing. + * Creates a new data set if this context doesn't have one yet. + * + * @param options - Pieces to commit with optional pieceMetadata, extraData, and onSubmitted callback + * @returns Transaction hash, confirmed pieceIds, dataSetId, and whether a new data set was created */ - private async _processPendingPieces(): Promise { - if (this._isProcessing || this._pendingPieces.length === 0) { - return - } - this._isProcessing = true - - // Wait for any in-flight uploads to complete before processing, but only if we don't - // already have a full batch - no point waiting for more if we can process a full batch now. - // Snapshot the current uploads so we don't wait for new uploads that start during our wait. - const uploadsToWaitFor = new Set(this._activeUploads) - - if (uploadsToWaitFor.size > 0 && this._pendingPieces.length < this._uploadBatchSize) { - const waitStart = Date.now() - const pollInterval = 200 - - while (uploadsToWaitFor.size > 0 && Date.now() - waitStart < this._uploadBatchWaitTimeout) { - // Check which of our snapshot uploads have completed - for (const uploadId of uploadsToWaitFor) { - if (!this._activeUploads.has(uploadId)) { - uploadsToWaitFor.delete(uploadId) - } - } - - if (uploadsToWaitFor.size > 0) { - await new Promise((resolve) => setTimeout(resolve, pollInterval)) - } - } + async commit(options: CommitOptions): Promise { + const { pieces, extraData } = options - const waited = Date.now() - waitStart - if (waited > pollInterval) { - console.debug(`Waited ${waited}ms for ${uploadsToWaitFor.size} active upload(s) to complete`) + // Validate metadata early + for (const piece of pieces) { + if (piece.pieceMetadata) { + pieceMetadataObjectToEntry(piece.pieceMetadata) } } - // Extract up to uploadBatchSize pending pieces - const batch = this._pendingPieces.splice(0, this._uploadBatchSize) - try { - // Create piece data array and metadata from the batch - const pieceCids: PieceCID[] = batch.map((item) => item.pieceCid) - const confirmedPieceIds: bigint[] = [] - const addedPieceRecords = pieceCids.map((pieceCid) => ({ pieceCid })) + const pieceInputs = pieces.map((p) => ({ pieceCid: p.pieceCid, metadata: p.pieceMetadata })) - if (this.dataSetId) { + try { + if (this._dataSetId) { + // Add pieces to existing data set const [, clientDataSetId] = await Promise.all([ - this._warmStorageService.validateDataSet({ dataSetId: this.dataSetId }), + this._warmStorageService.validateDataSet({ dataSetId: this._dataSetId }), this.getClientDataSetId(), ]) - // Add pieces to the data set - const addPiecesResult = await SP.addPieces(this._synapse.sessionClient ?? this._client, { - dataSetId: this.dataSetId, // PDPVerifier data set ID - clientDataSetId, // Client's dataset nonce - pieces: batch.map((item) => ({ pieceCid: item.pieceCid, metadata: item.metadata })), + + const addPiecesResult = await SP.addPieces(this._client, { + dataSetId: this._dataSetId, + clientDataSetId, + pieces: pieceInputs, serviceURL: this._pdpEndpoint, + extraData, }) + options.onSubmitted?.(addPiecesResult.txHash as Hex) - // Notify callbacks with transaction - batch.forEach((item) => { - item.callbacks?.onPiecesAdded?.(addPiecesResult.txHash as Hex, addedPieceRecords) - }) const confirmation = await SP.waitForAddPieces(addPiecesResult) + const confirmedPieceIds = confirmation.confirmedPieceIds - // Handle transaction tracking if available - confirmedPieceIds.push(...confirmation.confirmedPieceIds) + return { + txHash: addPiecesResult.txHash as Hex, + pieceIds: confirmedPieceIds, + dataSetId: this._dataSetId, + isNewDataSet: false, + } + } - const confirmedPieceRecords: PieceRecord[] = confirmedPieceIds.map((pieceId, index) => ({ - pieceId, - pieceCid: pieceCids[index], - })) + // Create new data set and add pieces + const result = await SP.createDataSetAndAddPieces(this._client, { + cdn: this._withCDN, + payee: this._provider.serviceProvider, + payer: this._client.account.address, + recordKeeper: this._chain.contracts.fwss.address, + pieces: pieceInputs, + metadata: this._dataSetMetadata, + serviceURL: this._pdpEndpoint, + extraData, + }) + options.onSubmitted?.(result.txHash as Hex) - batch.forEach((item) => { - item.callbacks?.onPiecesConfirmed?.(this.dataSetId as bigint, confirmedPieceRecords) - }) - } else { - // Create a new data set and add pieces to it - const result = await SP.createDataSetAndAddPieces(this._synapse.sessionClient ?? this._client, { - cdn: this._withCDN, - payee: this._provider.serviceProvider, - payer: this._client.account.address, - recordKeeper: this._chain.contracts.fwss.address, - pieces: batch.map((item) => ({ pieceCid: item.pieceCid, metadata: item.metadata })), - metadata: this._dataSetMetadata, - serviceURL: this._pdpEndpoint, - }) - batch.forEach((item) => { - item.callbacks?.onPiecesAdded?.(result.txHash as Hex, addedPieceRecords) - }) - const confirmation = await SP.waitForCreateDataSetAddPieces(result) - this._dataSetId = confirmation.dataSetId - confirmedPieceIds.push(...confirmation.piecesIds) + const confirmation = await SP.waitForCreateDataSetAddPieces(result) + this._dataSetId = confirmation.dataSetId - const confirmedPieceRecords: PieceRecord[] = confirmedPieceIds.map((pieceId, index) => ({ - pieceId, - pieceCid: pieceCids[index], - })) - batch.forEach((item) => { - item.callbacks?.onPiecesConfirmed?.(this.dataSetId as bigint, confirmedPieceRecords) - }) + return { + txHash: result.txHash as Hex, + pieceIds: confirmation.piecesIds, + dataSetId: this._dataSetId, + isNewDataSet: true, } - - // Resolve all promises in the batch with their respective piece IDs - batch.forEach((item, index) => { - const pieceId = confirmedPieceIds[index] - if (pieceId == null) { - throw createError('StorageContext', 'addPieces', `Server did not return piece ID for piece at index ${index}`) - } - item.resolve(pieceId) - }) } catch (error) { - // Reject all promises in the batch - const finalError = createError('StorageContext', 'addPieces', 'Failed to add piece to data set', error) - batch.forEach((item) => { - item.reject(finalError) - }) - } finally { - this._isProcessing = false - if (this._pendingPieces.length > 0) { - void this._processPendingPieces().catch((error) => { - console.error('Failed to process pending pieces batch:', error) - }) - } + throw createError('StorageContext', 'commit', 'Failed to commit pieces on-chain', error) } } /** - * Download data from this specific service provider + * Get the retrieval URL for a piece on this provider. + * + * Used by pull() to construct source URLs when pulling from this context + * to another provider. + */ + getPieceUrl(pieceCid: PieceCID): string { + return createPieceUrlPDP({ cid: pieceCid.toString(), serviceURL: this._pdpEndpoint }) + } + + // ========================================================================== + // Convenience: upload = store + commit + // ========================================================================== + + /** + * Upload data to the service provider and commit on-chain. * - * @param options - Download options - * @param options.pieceCid - The PieceCID identifier - * @param options.withCDN - Whether to enable CDN retrieval - * @returns The downloaded data {@link Uint8Array} + * Combines store() and commit() into a single call. Accepts Uint8Array or + * ReadableStream; prefer streaming for large files to minimize memory. + * + * When uploading to multiple contexts, pieceCid should be pre-calculated and passed + * in options to avoid redundant computation. For streaming uploads, pieceCid must be + * provided as it cannot be calculated without consuming the stream. + * + * @param data - Raw bytes or readable stream to upload + * @param options - Upload options including callbacks, pieceMetadata, pieceCid, and signal + * @returns Upload result with pieceCid, size, and a single-element copies array + */ + async upload(data: UploadPieceStreamingData, options?: UploadOptions): Promise { + // Store phase + const storeResult = await this.store(data, { + pieceCid: options?.pieceCid, + signal: options?.signal, + onProgress: options?.onProgress, + }) + + options?.onStored?.(this._provider.id, storeResult.pieceCid) + + // Commit phase + const commitResult = await this.commit({ + pieces: [{ pieceCid: storeResult.pieceCid, pieceMetadata: options?.pieceMetadata }], + onSubmitted: (txHash) => + options?.onPiecesAdded?.(txHash, this._provider.id, [{ pieceCid: storeResult.pieceCid }]), + }) + + const pieceId = commitResult.pieceIds[0] + options?.onPiecesConfirmed?.(commitResult.dataSetId, this._provider.id, [ + { pieceId, pieceCid: storeResult.pieceCid }, + ]) + + return { + pieceCid: storeResult.pieceCid, + size: storeResult.size, + copies: [ + { + providerId: this._provider.id, + dataSetId: commitResult.dataSetId, + pieceId, + role: 'primary' as const, + retrievalUrl: this.getPieceUrl(storeResult.pieceCid), + isNewDataSet: commitResult.isNewDataSet, + }, + ], + failures: [], + } + } + + // ========================================================================== + // Download, piece queries, and data set operations + // ========================================================================== + + /** + * Download data from this specific service provider */ async download(options: DownloadOptions): Promise { const parsedPieceCID = Piece.asPieceCID(options.pieceCid) @@ -1153,7 +1140,8 @@ export class StorageContext { } /** - * Get information about the service provider used by this service + * Get information about the service provider used by this service. + * * @returns Provider information including pricing (currently same for all providers) */ async getProviderInfo(): Promise { @@ -1161,7 +1149,8 @@ export class StorageContext { } /** - * Get pieces scheduled for removal from this data set + * Get pieces scheduled for removal from this data set. + * * @returns Array of piece IDs scheduled for removal */ async getScheduledRemovals() { @@ -1174,10 +1163,9 @@ export class StorageContext { /** * Get all active pieces for this data set as an async generator. - * This provides lazy evaluation and better memory efficiency for large data sets. * @param options - Optional configuration object * @param options.batchSize - The batch size for each pagination call (default: 100) - * @yields Object with pieceCid and pieceId - the piece ID is needed for certain operations like deletion + * @yields Object with pieceCid and pieceId */ async *getPieces(options: { batchSize?: bigint } = {}): AsyncGenerator { if (this._dataSetId == null) { @@ -1195,7 +1183,6 @@ export class StorageContext { limit: batchSize, }) - // Yield pieces one by one for lazy evaluation for (let i = 0; i < result.pieces.length; i++) { yield { pieceCid: result.pieces[i].cid, @@ -1207,6 +1194,7 @@ export class StorageContext { offset += batchSize } } + private async _getPieceIdByCID(pieceCid: string | PieceCID): Promise { if (this.dataSetId == null) { throw createError('StorageContext', 'getPieceIdByCID', 'Data set not found') @@ -1228,7 +1216,7 @@ export class StorageContext { } /** - * Delete a piece with given CID from this data set + * Delete a piece with given CID from this data set. * * @param options - Options for the delete operation * @param options.piece - The PieceCID identifier or a piece number to delete by pieceID @@ -1255,6 +1243,7 @@ export class StorageContext { /** * Check if a piece exists on this service provider. + * * @param options - Options for the has piece operation * @param options.pieceCid - The PieceCID (piece CID) to check * @returns True if the piece exists on this provider, false otherwise diff --git a/packages/synapse-sdk/src/storage/manager.ts b/packages/synapse-sdk/src/storage/manager.ts index 0a78da0a..a589c259 100644 --- a/packages/synapse-sdk/src/storage/manager.ts +++ b/packages/synapse-sdk/src/storage/manager.ts @@ -16,23 +16,28 @@ * await context.upload(data) * * // Context routing - * await synapse.storage.upload(data, { context }) + * await synapse.storage.upload(data, { contexts: [ctx1, ctx2] }) * ``` */ import * as Piece from '@filoz/synapse-core/piece' import type { UploadPieceStreamingData } from '@filoz/synapse-core/sp' import { getPDPProviderByAddress } from '@filoz/synapse-core/sp-registry' -import { type Address, type Hash, zeroAddress } from 'viem' +import { randIndex } from '@filoz/synapse-core/utils' +import { type Address, type Hash, type Hex, zeroAddress } from 'viem' +import { CommitError, StoreError } from '../errors/storage.ts' import { SPRegistryService } from '../sp-registry/index.ts' import type { Synapse } from '../synapse.ts' import type { + CopyResult, CreateContextsOptions, DownloadOptions, EnhancedDataSetInfo, + FailedCopy, PDPProvider, PieceCID, PreflightInfo, + PullStatus, StorageContextCallbacks, StorageInfo, StorageServiceOptions, @@ -50,38 +55,60 @@ import { import type { WarmStorageService } from '../warm-storage/index.ts' import { StorageContext } from './context.ts' -// Combined callbacks type that can include both creation and upload callbacks +// Multi-copy upload constants +const MAX_SECONDARY_ATTEMPTS = 5 +const DEFAULT_COPY_COUNT = 2 + +/** + * Safely invoke a user-provided callback without interrupting flow. + * Logs a warning if the callback throws. + */ +function safeInvoke(fn: ((...args: T) => void) | undefined, ...args: T): void { + if (fn == null) return + try { + fn(...args) + } catch (error) { + console.warn('Callback error (ignored):', error instanceof Error ? error.message : error) + } +} + +/** + * Combined callbacks for StorageManager.upload(). + * + * Lifecycle stages: + * - Context creation: onProviderSelected, onDataSetResolved (from StorageContextCallbacks) + * - Store (primary): onProgress, onStored (from UploadCallbacks) + * - Pull (secondary): onPullProgress, onCopyComplete, onCopyFailed + * - Commit: onPiecesAdded, onPiecesConfirmed + */ export type CombinedCallbacks = StorageContextCallbacks & UploadCallbacks /** - * Upload options for StorageManager.upload() - the all-in-one upload method + * Upload options for StorageManager.upload() * - * This is the "uber-shortcut" method that can handle everything from context - * creation to piece upload in a single call. It combines: - * - Storage context creation options (provider selection, data set creation) - * - Upload callbacks (both creation and upload progress) - * - Piece-specific metadata + * Extends CreateContextsOptions to inherit multi-copy provider selection. + * Adds upload-specific options: explicit contexts, pre-calculated PieceCID, and abort signal. * * Usage patterns: - * 1. With explicit context: `{ context, callbacks?, metadata? }` - routes to context.upload() - * 2. Auto-create context: `{ providerId?, dataSetId?, withCDN?, callbacks?, metadata? }` - creates/reuses context - * 3. Use default context: `{ callbacks?, metadata? }` - uses cached default context + * 1. With explicit contexts: `{ contexts }` - uses the given contexts directly + * 2. Auto-create contexts: `{ providerIds?, dataSetIds?, count? }` - creates/reuses contexts + * 3. Use default contexts: no options - uses cached default contexts (2 copies) */ -export interface StorageManagerUploadOptions extends StorageServiceOptions { - // Multiple storage providers: if provided, all other context options are invalid +export interface StorageManagerUploadOptions extends CreateContextsOptions { + /** Pre-created contexts to use. If provided, other selection options are invalid. */ contexts?: StorageContext[] - // Context routing - if provided, all other context options are invalid - context?: StorageContext - - // Callbacks that can include both creation and upload callbacks + /** Callbacks for both context creation and upload lifecycle */ callbacks?: Partial - /** Optional pre-calculated PieceCID to skip CommP calculation (BYO PieceCID, it will be checked by the server) */ + /** Optional pre-calculated PieceCID to skip CommP calculation (verified by server) */ pieceCid?: PieceCID /** Optional AbortSignal to cancel the upload */ signal?: AbortSignal + + /** Custom metadata for pieces being uploaded (key-value pairs) */ + pieceMetadata?: Record } export interface StorageManagerDownloadOptions extends DownloadOptions { @@ -115,86 +142,360 @@ export class StorageManager { } /** - * Upload data to storage - * Uses the storage contexts or context provided in the options - * Otherwise creates/reuses default context + * Upload data to Filecoin Onchain Cloud using a store->pull->commit flow across + * multiple providers. + * + * By default, uploads to 2 providers (primary + secondary) for redundancy. + * Data is uploaded once to the primary, then secondaries pull from the primary + * via SP-to-SP transfer. + * + * This method only throws if zero copies succeed. Individual copy failures + * are recorded in `result.failures`. Always check `result.copies.length` + * against your requested count. * - * Accepts Uint8Array or ReadableStream. * For large files, prefer streaming to minimize memory usage. * - * Note: Multi-context uploads (uploading to multiple providers simultaneously) currently - * only support Uint8Array. For streaming uploads with multiple contexts, convert your - * stream to Uint8Array first or use stream forking (future feature). + * For uploading multiple files, use the split operations API directly: + * createContexts() -> store() -> presignForCommit() -> pull() -> commit() + * + * @param data - Raw bytes (Uint8Array) or ReadableStream to upload + * @param options - Upload options including contexts, callbacks, and abort signal + * @returns Upload result with pieceCid, size, copies array, and failures array + * @throws StoreError if primary store fails (before any data is committed) + * @throws CommitError if all commit attempts fail (data stored but not on-chain) */ async upload(data: UploadPieceStreamingData, options?: StorageManagerUploadOptions): Promise { - // Validate options - if context is provided, no other options should be set - if (options?.context != null || options?.contexts != null) { + const { contexts, explicitProviders } = await this._resolveUploadContexts(options) + const [primary, ...secondaries] = contexts + + // Store on primary provider + let storeResult: { pieceCid: PieceCID; size: number } + try { + storeResult = await primary.store(data, { + pieceCid: options?.pieceCid, + signal: options?.signal, + onProgress: options?.callbacks?.onProgress, + }) + safeInvoke(options?.callbacks?.onStored, primary.provider.id, storeResult.pieceCid) + } catch (error) { + throw new StoreError( + `Failed to store on primary provider ${primary.provider.id} (${primary.provider.pdp.serviceURL})`, + { + cause: error instanceof Error ? error : undefined, + providerId: primary.provider.id, + endpoint: primary.provider.pdp.serviceURL, + } + ) + } + + const pieceInputs = [{ pieceCid: storeResult.pieceCid, pieceMetadata: options?.pieceMetadata }] + + // Pull to secondaries via SP-to-SP transfer + let successfulSecondaries: StorageContext[] = [] + let pullFailures: FailedCopy[] = [] + let extraDataMap = new Map() + + if (secondaries.length > 0) { + const pullResult = await this._pullToSecondariesWithRetry(primary, secondaries, [storeResult.pieceCid], { + explicitProviders, + signal: options?.signal, + withCDN: options?.withCDN, + metadata: options?.metadata, + pieceMetadata: options?.pieceMetadata, + callbacks: options?.callbacks, + onProgress: options?.callbacks?.onPullProgress, + onSuccess: options?.callbacks?.onCopyComplete, + onFailure: options?.callbacks?.onCopyFailed, + pieceInputs, + }) + successfulSecondaries = pullResult.successful + pullFailures = pullResult.failures + extraDataMap = pullResult.extraDataMap + } + + // Commit on all providers in parallel + const commitPromises = [ + { ctx: primary, role: 'primary' as const }, + ...successfulSecondaries.map((ctx) => ({ ctx, role: 'secondary' as const })), + ].map(async ({ ctx, role }) => { + const result = await ctx.commit({ + pieces: pieceInputs, + extraData: extraDataMap.get(ctx), + onSubmitted: (txHash) => + safeInvoke(options?.callbacks?.onPiecesAdded, txHash, ctx.provider.id, [{ pieceCid: storeResult.pieceCid }]), + }) + return { ctx, role, result } + }) + + const commitResults = await Promise.allSettled(commitPromises) + + // Process commit results — failures are recorded, throw only if all fail + type CommitResultType = { txHash: string; pieceIds: bigint[]; dataSetId: bigint; isNewDataSet: boolean } + let primaryCommit: CommitResultType | undefined + let primaryCommitError: Error | undefined + const secondaryCommits: Array<{ context: StorageContext; result: CommitResultType }> = [] + const commitFailedSecondaryIds: Set = new Set() + + for (const settled of commitResults) { + if (settled.status === 'fulfilled') { + const { ctx, role, result } = settled.value + if (role === 'primary') { + primaryCommit = result + } else { + secondaryCommits.push({ context: ctx, result }) + } + } else { + const failedIndex = commitResults.indexOf(settled) + if (failedIndex === 0) { + primaryCommitError = settled.reason instanceof Error ? settled.reason : new Error(String(settled.reason)) + } else { + // Data is already on this SP (pull succeeded) but commit failed. + // A targeted addPieces retry could recover without re-uploading. + // Not currently implemented; the piece will be GC'd by the SP. + const failedSecondary = successfulSecondaries[failedIndex - 1] + commitFailedSecondaryIds.add(failedSecondary.provider.id) + } + } + } + + // Build result + const copies: CopyResult[] = [] + + if (primaryCommit) { + copies.push({ + providerId: primary.provider.id, + dataSetId: primaryCommit.dataSetId, + pieceId: primaryCommit.pieceIds[0], + role: 'primary', + retrievalUrl: primary.getPieceUrl(storeResult.pieceCid), + isNewDataSet: primaryCommit.isNewDataSet, + }) + } + + for (const { context, result } of secondaryCommits) { + copies.push({ + providerId: context.provider.id, + dataSetId: result.dataSetId, + pieceId: result.pieceIds[0], + role: 'secondary', + retrievalUrl: context.getPieceUrl(storeResult.pieceCid), + isNewDataSet: result.isNewDataSet, + }) + } + + // Throw if no copies succeeded + if (copies.length === 0) { + throw new CommitError( + `Failed to commit on primary provider ${primary.provider.id} (${primary.provider.pdp.serviceURL}) - data is stored but not on-chain`, + { + cause: primaryCommitError, + providerId: primary.provider.id, + endpoint: primary.provider.pdp.serviceURL, + } + ) + } + + // Fire onPiecesConfirmed callbacks for successful commits + if (primaryCommit) { + safeInvoke(options?.callbacks?.onPiecesConfirmed, primaryCommit.dataSetId, primary.provider.id, [ + { pieceId: primaryCommit.pieceIds[0], pieceCid: storeResult.pieceCid }, + ]) + } + for (const { context, result } of secondaryCommits) { + safeInvoke(options?.callbacks?.onPiecesConfirmed, result.dataSetId, context.provider.id, [ + { pieceId: result.pieceIds[0], pieceCid: storeResult.pieceCid }, + ]) + } + + // Build failures list + const failures: FailedCopy[] = [...pullFailures] + const pullFailedIds = new Set(pullFailures.map((f) => f.providerId)) + + if (primaryCommitError && !pullFailedIds.has(primary.provider.id)) { + failures.push({ + providerId: primary.provider.id, + role: 'primary', + error: 'Commit failed', + explicit: explicitProviders, + }) + } + + for (const failedId of commitFailedSecondaryIds) { + if (!pullFailedIds.has(failedId)) { + failures.push({ + providerId: failedId, + role: 'secondary', + error: 'Commit failed', + explicit: explicitProviders, + }) + } + } + + return { pieceCid: storeResult.pieceCid, size: storeResult.size, copies, failures } + } + + /** + * Resolve and validate upload contexts from options. + * Handles contexts passthrough, option validation, and context creation. + */ + private async _resolveUploadContexts(options?: StorageManagerUploadOptions): Promise<{ + contexts: StorageContext[] + explicitProviders: boolean + }> { + if (options?.contexts != null) { const invalidOptions = [] - if (options.providerId !== undefined) invalidOptions.push('providerId') - if (options.providerAddress !== undefined) invalidOptions.push('providerAddress') - if (options.dataSetId !== undefined) invalidOptions.push('dataSetId') + if (options.providerIds !== undefined) invalidOptions.push('providerIds') + if (options.dataSetIds !== undefined) invalidOptions.push('dataSetIds') if (options.withCDN !== undefined) invalidOptions.push('withCDN') - if (options.forceCreateDataSet !== undefined) invalidOptions.push('forceCreateDataSet') - if (options.uploadBatchSize !== undefined) invalidOptions.push('uploadBatchSize') if (invalidOptions.length > 0) { throw createError( 'StorageManager', 'upload', - `Cannot specify both 'context' and other options: ${invalidOptions.join(', ')}` + `Cannot specify both 'contexts' and other options: ${invalidOptions.join(', ')}` ) } } - if (options?.contexts != null && options.contexts.length > 0) { - if (options?.context != null) { - throw createError('StorageManager', 'upload', "Cannot specify both 'context' and 'contexts'") - } - } + // Explicit providers disables auto-retry on failure + const explicitProviders = + options?.contexts != null || + (options?.providerIds != null && options.providerIds.length > 0) || + (options?.dataSetIds != null && options.dataSetIds.length > 0) - // Get the context to use const contexts = options?.contexts ?? - (options?.context - ? [options.context] - : await this.createContexts({ - withCDN: options?.withCDN, - count: 1, // single context by default for now - this will be changed in a future version - uploadBatchSize: options?.uploadBatchSize, - forceCreateDataSets: options?.forceCreateDataSet, - metadata: options?.metadata, - excludeProviderIds: options?.excludeProviderIds, - providerIds: options?.providerId ? [options.providerId] : undefined, - dataSetIds: options?.dataSetId ? [options.dataSetId] : undefined, - callbacks: options?.callbacks, - })) - - // Multi-context upload handling - if (contexts.length > 1) { - if (data instanceof ReadableStream) { - throw createError('StorageManager', 'upload', 'Streaming uploads are not supported for multiple contexts') - } - // Upload to all contexts with the same pieceCid - return Promise.all( - contexts.map((context) => - context.upload(data, { - ...options?.callbacks, // TODO: callbacks should be able to differentiate by provider - metadata: options?.metadata, - signal: options?.signal, + (await this.createContexts({ + withCDN: options?.withCDN, + count: options?.count ?? DEFAULT_COPY_COUNT, + metadata: options?.metadata, + excludeProviderIds: options?.excludeProviderIds, + providerIds: options?.providerIds, + dataSetIds: options?.dataSetIds, + callbacks: options?.callbacks, + })) + + return { contexts, explicitProviders } + } + + /** + * Pull pieces from primary to secondaries with retry logic. + * + * For each secondary: attempt pull, and if failed with non-explicit providers, + * try a replacement provider up to MAX_SECONDARY_ATTEMPTS times. + */ + private async _pullToSecondariesWithRetry( + primary: StorageContext, + secondaries: StorageContext[], + pieceCids: PieceCID[], + options: { + explicitProviders: boolean + signal?: AbortSignal + withCDN?: boolean + metadata?: Record + pieceMetadata?: Record + callbacks?: Partial + onProgress?: (providerId: bigint, pieceCid: PieceCID, status: PullStatus) => void + onSuccess?: (providerId: bigint, pieceCid: PieceCID) => void + onFailure?: (providerId: bigint, pieceCid: PieceCID, error: Error) => void + pieceInputs?: Array<{ pieceCid: PieceCID; pieceMetadata?: Record }> + } + ): Promise<{ successful: StorageContext[]; failures: FailedCopy[]; extraDataMap: Map }> { + const usedProviderIds = new Set([primary.provider.id, ...secondaries.map((s) => s.provider.id)]) + const successful: StorageContext[] = [] + const failures: FailedCopy[] = [] + const extraDataMap = new Map() + + for (let i = 0; i < secondaries.length; i++) { + let currentSecondary = secondaries[i] + let attempts = 0 + let succeeded = false + + while (!succeeded && attempts < MAX_SECONDARY_ATTEMPTS) { + try { + // Pre-sign extraData so the same blob is reused for commit + let extraData: Hex | undefined + if (options.pieceInputs) { + extraData = await currentSecondary.presignForCommit(options.pieceInputs) + } + + const providerId = currentSecondary.provider.id + const pullResult = await currentSecondary.pull({ + pieces: pieceCids, + from: primary, + signal: options.signal, + extraData, + onProgress: options.onProgress + ? (cid, status) => safeInvoke(options.onProgress, providerId, cid, status) + : undefined, }) - ) - ).then((results) => results[0]) // all results should be the same - } else { - // Single context upload - supports all data types - const context = contexts[0] - // Upload to single context - return context.upload(data, { - ...options?.callbacks, - metadata: options?.metadata, - signal: options?.signal, - }) + if (pullResult.status === 'complete') { + succeeded = true + successful.push(currentSecondary) + if (extraData) { + extraDataMap.set(currentSecondary, extraData) + } + + for (const pieceCid of pieceCids) { + safeInvoke(options.onSuccess, providerId, pieceCid) + } + } else { + const failedPieces = pullResult.pieces.filter((p) => p.status !== 'complete') + const errorMsg = + failedPieces.length > 0 + ? `Pull failed for ${failedPieces.length} piece(s): ${failedPieces.map((p) => p.pieceCid).join(', ')}` + : 'Pull failed' + failures.push({ + providerId, + role: 'secondary', + error: errorMsg, + explicit: options.explicitProviders, + }) + const err = new Error(errorMsg) + for (const pieceCid of pieceCids) { + safeInvoke(options.onFailure, providerId, pieceCid, err) + } + } + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error) + failures.push({ + providerId: currentSecondary.provider.id, + role: 'secondary', + error: errorMsg, + explicit: options.explicitProviders, + }) + const err = error instanceof Error ? error : new Error(errorMsg) + for (const pieceCid of pieceCids) { + safeInvoke(options.onFailure, currentSecondary.provider.id, pieceCid, err) + } + } + + attempts++ + + // If failed and not explicit, try to get a replacement provider + if (!succeeded && !options.explicitProviders && attempts < MAX_SECONDARY_ATTEMPTS) { + try { + const [newContext] = await this.createContexts({ + withCDN: options.withCDN, + count: 1, + metadata: options.metadata, + callbacks: options.callbacks, + excludeProviderIds: [...usedProviderIds], + }) + currentSecondary = newContext + usedProviderIds.add(newContext.provider.id) + } catch { + // No more providers available + break + } + } else if (!succeeded && options.explicitProviders) { + break + } + } } + + return { successful, failures, extraDataMap } } /** @@ -312,31 +613,25 @@ export class StorageManager { * * Contexts are selected by priority: * 1. Specified datasets (`dataSetIds`) - uses their existing providers - * 2. Specified providers (`providerIds` or `providerAddresses`) - finds or creates matching datasets + * 2. Specified providers (`providerIds`) - finds or creates matching datasets * 3. Automatically selected from remaining approved providers * - * For automatic selection, existing datasets matching the `metadata` are reused unless - * `forceCreateDataSets` is true. Providers are randomly chosen to distribute across the network. + * For automatic selection, existing datasets matching the `metadata` are reused. + * Providers are randomly chosen to distribute across the network. * * @param options - Configuration options {@link CreateContextsOptions} * @param options.count - Maximum number of contexts to create (default: 2) * @param options.dataSetIds - Specific dataset IDs to include * @param options.providerIds - Specific provider IDs to use * @param options.metadata - Metadata to match when finding/creating datasets - * @param options.forceCreateDataSets - Always create new datasets instead of reusing existing ones * @param options.excludeProviderIds - Provider IDs to skip during selection * @returns Promise resolving to array of storage contexts */ async createContexts(options?: CreateContextsOptions): Promise { const withCDN = options?.withCDN ?? this._withCDN - const canUseDefault = - options == null || - (options.providerIds == null && - options.dataSetIds == null && - options.forceCreateDataSets !== true && - options.uploadBatchSize == null) + const canUseDefault = options == null || (options.providerIds == null && options.dataSetIds == null) if (this._defaultContexts != null) { - const expectedSize = options?.count ?? 2 + const expectedSize = options?.count ?? DEFAULT_COPY_COUNT if ( this._defaultContexts.length === expectedSize && this._defaultContexts.every((context) => options?.excludeProviderIds?.includes(context.provider.id) !== true) @@ -388,7 +683,7 @@ export class StorageManager { } /** - * Create a new storage context with specified options + * Create a single storage context with specified options */ async createContext(options?: StorageServiceOptions): Promise { // Determine the effective withCDN setting @@ -398,13 +693,7 @@ export class StorageManager { // We can use the default if: // 1. No options provided, OR // 2. Only withCDN, metadata and/or callbacks are provided (callbacks can fire for cached context) - const canUseDefault = - options == null || - (options.providerId == null && - options.providerAddress == null && - options.dataSetId == null && - options.forceCreateDataSet !== true && - options.uploadBatchSize == null) + const canUseDefault = options == null || (options.providerId == null && options.dataSetId == null) if (canUseDefault && this._defaultContexts != null) { // Check if we have a default context with compatible metadata diff --git a/packages/synapse-sdk/src/test/storage-upload.test.ts b/packages/synapse-sdk/src/test/storage-upload.test.ts index 3395a120..5dc2631a 100644 --- a/packages/synapse-sdk/src/test/storage-upload.test.ts +++ b/packages/synapse-sdk/src/test/storage-upload.test.ts @@ -9,10 +9,10 @@ import * as Mocks from '@filoz/synapse-core/mocks' import { assert } from 'chai' import { setup } from 'iso-web/msw' import { HttpResponse, http } from 'msw' -import { type Account, type Client, createWalletClient, type Hex, type Transport, http as viemHttp } from 'viem' +import { type Account, type Client, createWalletClient, type Transport, http as viemHttp } from 'viem' import { privateKeyToAccount } from 'viem/accounts' import { Synapse } from '../synapse.ts' -import type { PieceCID, PieceRecord } from '../types.ts' +import type { PieceCID } from '../types.ts' import { SIZE_CONSTANTS } from '../utils/constants.ts' // mock server for testing @@ -37,7 +37,7 @@ describe('Storage Upload', () => { }) it('should support parallel uploads', async () => { - const txHash = '0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef123456' + let nextPieceId = 0 let uploadCompleteCount = 0 server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, debug: false }), @@ -45,6 +45,8 @@ describe('Storage Upload', () => { ...Mocks.pdp.streamingUploadHandlers(), Mocks.pdp.findAnyPieceHandler(true), http.post<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces`, async ({ params }) => { + const txHash = `0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef12345${nextPieceId}` + nextPieceId++ return new HttpResponse(null, { status: 201, headers: { @@ -52,19 +54,24 @@ describe('Storage Upload', () => { }, }) }), - http.get<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces/added/:txHash`, ({ params }) => { - const response = { - addMessageOk: true, - confirmedPieceIds: [0, 1, 2], - dataSetId: parseInt(params.id, 10), - pieceCount: 3, - piecesAdded: true, - txHash, - txStatus: 'confirmed', - } + http.get<{ id: string; txHash: string }>( + `https://pdp.example.com/pdp/data-sets/:id/pieces/added/:txHash`, + ({ params }) => { + // Extract the piece ID from the last character of the txHash + const pieceId = Number.parseInt(params.txHash.slice(-1), 10) + const response = { + addMessageOk: true, + confirmedPieceIds: [pieceId], + dataSetId: parseInt(params.id, 10), + pieceCount: 1, + piecesAdded: true, + txHash: params.txHash, + txStatus: 'confirmed', + } - return HttpResponse.json(response, { status: 200 }) - }) + return HttpResponse.json(response, { status: 200 }) + } + ) ) const synapse = new Synapse({ client }) const context = await synapse.storage.createContext({ @@ -82,13 +89,13 @@ describe('Storage Upload', () => { // Start all uploads concurrently with callbacks const uploads = [ context.upload(firstData, { - onUploadComplete: () => uploadCompleteCount++, + onPiecesConfirmed: () => uploadCompleteCount++, }), context.upload(secondData, { - onUploadComplete: () => uploadCompleteCount++, + onPiecesConfirmed: () => uploadCompleteCount++, }), context.upload(thirdData, { - onUploadComplete: () => uploadCompleteCount++, + onPiecesConfirmed: () => uploadCompleteCount++, }), ] @@ -96,22 +103,28 @@ describe('Storage Upload', () => { assert.lengthOf(results, 3, 'All three uploads should complete successfully') const resultSizes = results.map((r) => r.size) - const resultPieceIds = results.map((r) => r.pieceId) + const resultPieceIds = results.map((r) => r.copies[0].pieceId) assert.deepEqual(resultSizes, [127, 128, 129], 'Should have one result for each data size') - assert.deepEqual(resultPieceIds, [0n, 1n, 2n], 'The set of assigned piece IDs should be {0, 1, 2}') + assert.deepEqual( + [...resultPieceIds].sort((a, b) => Number(a - b)), + [0n, 1n, 2n], + 'The set of assigned piece IDs should be {0, 1, 2}' + ) assert.strictEqual(uploadCompleteCount, 3, 'uploadComplete should be called 3 times') }) - it('should respect batch size configuration', async () => { + it('should commit each upload independently', async () => { let addPiecesCalls = 0 - const txHash = '0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef123456' + let nextPieceId = 0 server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, debug: false }), Mocks.PING(), ...Mocks.pdp.streamingUploadHandlers(), Mocks.pdp.findAnyPieceHandler(true), http.post<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces`, async ({ params }) => { + const txHash = `0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef12345${nextPieceId}` + nextPieceId++ return new HttpResponse(null, { status: 201, headers: { @@ -119,204 +132,53 @@ describe('Storage Upload', () => { }, }) }), - http.get<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces/added/:txHash`, ({ params }) => { - addPiecesCalls++ - - if (addPiecesCalls === 2) { - return HttpResponse.json( - { - addMessageOk: true, - confirmedPieceIds: [2], - dataSetId: parseInt(params.id, 10), - pieceCount: 1, - piecesAdded: true, - txHash, - txStatus: 'confirmed', - }, - { status: 200 } - ) - } - - return HttpResponse.json({ - addMessageOk: true, - confirmedPieceIds: [0, 1], - dataSetId: parseInt(params.id, 10), - pieceCount: 2, - piecesAdded: true, - txHash, - txStatus: 'confirmed', - }) - }) - ) - const synapse = new Synapse({ client }) - const context = await synapse.storage.createContext({ - withCDN: true, - uploadBatchSize: 2, - metadata: { - environment: 'test', - }, - }) - - // Create distinct data for each upload - const firstData = new Uint8Array(127).fill(1) // 127 bytes - const secondData = new Uint8Array(128).fill(2) // 66 bytes - const thirdData = new Uint8Array(129).fill(3) // 67 bytes - - // Start all uploads concurrently with callbacks - const uploads = [context.upload(firstData), context.upload(secondData), context.upload(thirdData)] - - const results = await Promise.all(uploads) - - assert.lengthOf(results, 3, 'All three uploads should complete successfully') - - assert.strictEqual(addPiecesCalls, 2, 'addPieces should be called 2 times') - }) - - it('should handle batch size of 1', async () => { - let addPiecesCalls = 0 - const txHash = '0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef123456' - const pdpOptions = { - baseUrl: 'https://pdp.example.com', - } - server.use( - Mocks.JSONRPC({ ...Mocks.presets.basic, debug: false }), - Mocks.PING(), - ...Mocks.pdp.streamingUploadHandlers(pdpOptions), - Mocks.pdp.findAnyPieceHandler(true, pdpOptions), - http.post<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces`, async ({ params }) => { - return new HttpResponse(null, { - status: 201, - headers: { - Location: `/pdp/data-sets/${params.id}/pieces/added/${txHash}`, - }, - }) - }), - http.get<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces/added/:txHash`, ({ params }) => { - addPiecesCalls++ - - if (addPiecesCalls === 2) { - return HttpResponse.json( - { - addMessageOk: true, - confirmedPieceIds: [1], - dataSetId: parseInt(params.id, 10), - pieceCount: 1, - piecesAdded: true, - txHash, - txStatus: 'confirmed', - }, - { status: 200 } - ) - } - if (addPiecesCalls === 3) { + http.get<{ id: string; txHash: string }>( + `https://pdp.example.com/pdp/data-sets/:id/pieces/added/:txHash`, + ({ params }) => { + addPiecesCalls++ + const pieceId = Number.parseInt(params.txHash.slice(-1), 10) return HttpResponse.json( { addMessageOk: true, - confirmedPieceIds: [2], + confirmedPieceIds: [pieceId], dataSetId: parseInt(params.id, 10), pieceCount: 1, piecesAdded: true, - txHash, + txHash: params.txHash, txStatus: 'confirmed', }, { status: 200 } ) } - - return HttpResponse.json( - { - addMessageOk: true, - confirmedPieceIds: [0], - dataSetId: parseInt(params.id, 10), - pieceCount: 1, - piecesAdded: true, - txHash, - txStatus: 'confirmed', - }, - { status: 200 } - ) - }) + ) ) const synapse = new Synapse({ client }) const context = await synapse.storage.createContext({ withCDN: true, - uploadBatchSize: 1, metadata: { environment: 'test', }, }) - // Create distinct data for each upload - const firstData = new Uint8Array(127).fill(1) // 127 bytes - const secondData = new Uint8Array(128).fill(2) // 66 bytes - const thirdData = new Uint8Array(129).fill(3) // 67 bytes + const firstData = new Uint8Array(127).fill(1) + const secondData = new Uint8Array(128).fill(2) + const thirdData = new Uint8Array(129).fill(3) - // Start all uploads concurrently with callbacks const uploads = [context.upload(firstData), context.upload(secondData), context.upload(thirdData)] - const results = await Promise.all(uploads) assert.lengthOf(results, 3, 'All three uploads should complete successfully') + assert.strictEqual(addPiecesCalls, 3, 'Each upload should commit independently') const resultSizes = results.map((r) => r.size) - const resultPieceIds = results.map((r) => r.pieceId) + const resultPieceIds = results.map((r) => r.copies[0].pieceId) assert.deepEqual(resultSizes, [127, 128, 129], 'Should have one result for each data size') - assert.deepEqual(resultPieceIds, [0n, 1n, 2n], 'The set of assigned piece IDs should be {0, 1, 2}') - assert.strictEqual(addPiecesCalls, 3, 'addPieces should be called 2 times') - }) - - it('should debounce uploads for better batching', async () => { - let addPiecesCalls = 0 - const txHash = '0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef123456' - const pdpOptions = { - baseUrl: 'https://pdp.example.com', - } - server.use( - Mocks.JSONRPC({ ...Mocks.presets.basic, debug: false }), - Mocks.PING(), - ...Mocks.pdp.streamingUploadHandlers(pdpOptions), - Mocks.pdp.findAnyPieceHandler(true, pdpOptions), - http.post<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces`, async ({ params }) => { - return new HttpResponse(null, { - status: 201, - headers: { - Location: `/pdp/data-sets/${params.id}/pieces/added/${txHash}`, - }, - }) - }), - http.get<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces/added/:txHash`, ({ params }) => { - addPiecesCalls++ - - return HttpResponse.json( - { - addMessageOk: true, - confirmedPieceIds: [0, 1, 2, 3, 4], - dataSetId: parseInt(params.id, 10), - pieceCount: 5, - piecesAdded: true, - txHash, - txStatus: 'confirmed', - }, - { status: 200 } - ) - }) + assert.deepEqual( + [...resultPieceIds].sort((a, b) => Number(a - b)), + [0n, 1n, 2n], + 'The set of assigned piece IDs should be {0, 1, 2}' ) - const synapse = new Synapse({ client }) - const context = await synapse.storage.createContext({ - withCDN: true, - metadata: { - environment: 'test', - }, - }) - - const uploads = [] - for (let i = 0; i < 5; i++) { - uploads.push(context.upload(new Uint8Array(127).fill(i))) - } - - await Promise.all(uploads) - assert.strictEqual(addPiecesCalls, 1, 'addPieces should be called 1 time') }) it('should accept exactly 127 bytes', async () => { @@ -366,7 +228,7 @@ describe('Storage Upload', () => { const expectedSize = 127 const upload = await context.upload(new Uint8Array(expectedSize)) assert.strictEqual(addPiecesCalls, 1, 'addPieces should be called 1 time') - assert.strictEqual(upload.pieceId, 0n, 'pieceId should be 0') + assert.strictEqual(upload.copies[0].pieceId, 0n, 'pieceId should be 0') assert.strictEqual(upload.size, expectedSize, 'size should be 127') }) @@ -418,15 +280,17 @@ describe('Storage Upload', () => { const upload = await context.upload(new Uint8Array(expectedSize).fill(1)) assert.strictEqual(addPiecesCalls, 1, 'addPieces should be called 1 time') - assert.strictEqual(upload.pieceId, 0n, 'pieceId should be 0') + assert.strictEqual(upload.copies[0].pieceId, 0n, 'pieceId should be 0') assert.strictEqual(upload.size, expectedSize, 'size should be 200 MiB') }) it('should handle new server with transaction tracking', async () => { - let piecesAddedArgs: { transaction?: Hex; pieces?: Array<{ pieceCid: PieceCID }> } | null = null - let piecesConfirmedArgs: { dataSetId?: bigint; pieces?: PieceRecord[] } | null = null - let uploadCompleteCallbackFired = false - let resolvedDataSetId: number | undefined + let piecesAddedArgs: { transaction?: string; providerId?: bigint; pieces?: { pieceCid: PieceCID }[] } | null = null + let piecesConfirmedArgs: { + dataSetId?: bigint + providerId?: bigint + pieces?: { pieceId: bigint; pieceCid: PieceCID }[] + } | null = null const txHash = '0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef123456' const pdpOptions = { baseUrl: 'https://pdp.example.com', @@ -445,12 +309,11 @@ describe('Storage Upload', () => { }) }), http.get<{ id: string }>(`https://pdp.example.com/pdp/data-sets/:id/pieces/added/:txHash`, ({ params }) => { - resolvedDataSetId = parseInt(params.id, 10) return HttpResponse.json( { addMessageOk: true, confirmedPieceIds: [0], - dataSetId: resolvedDataSetId, + dataSetId: parseInt(params.id, 10), pieceCount: 1, piecesAdded: true, txHash, @@ -470,38 +333,36 @@ describe('Storage Upload', () => { const expectedSize = SIZE_CONSTANTS.MIN_UPLOAD_SIZE const uploadResult = await context.upload(new Uint8Array(expectedSize).fill(1), { - onPiecesAdded(transaction: Hex | undefined, pieces: Array<{ pieceCid: PieceCID }> | undefined) { - piecesAddedArgs = { transaction, pieces } + onPiecesAdded(transaction, providerId, pieces) { + piecesAddedArgs = { transaction, providerId, pieces } }, - onPiecesConfirmed(dataSetId: bigint, pieces: PieceRecord[]) { - piecesConfirmedArgs = { dataSetId, pieces } - }, - onUploadComplete() { - uploadCompleteCallbackFired = true + onPiecesConfirmed(dataSetId, providerId, pieces) { + piecesConfirmedArgs = { dataSetId, providerId, pieces } }, }) - assert.isTrue(uploadCompleteCallbackFired, 'uploadCompleteCallback should have been called') assert.isNotNull(piecesAddedArgs, 'onPiecesAdded args should be captured') assert.isNotNull(piecesConfirmedArgs, 'onPiecesConfirmed args should be captured') if (piecesAddedArgs == null || piecesConfirmedArgs == null) { throw new Error('Callbacks should have been called') } - const addedArgs: { transaction?: Hex; pieces?: Array<{ pieceCid: PieceCID }> } = piecesAddedArgs - const confirmedArgs: { dataSetId?: bigint; pieces?: PieceRecord[] } = piecesConfirmedArgs - assert.strictEqual(addedArgs.transaction, txHash, 'onPiecesAdded should receive transaction hash') + const addedArgs: { transaction: string; pieces: { pieceCid: PieceCID }[] } = piecesAddedArgs as any + const confirmedArgs: { dataSetId: bigint; pieces: { pieceId: bigint; pieceCid: PieceCID }[] } = + piecesConfirmedArgs as any + assert.isString(addedArgs.transaction, 'onPiecesAdded should provide transaction hash') + assert.lengthOf(addedArgs.pieces, 1, 'onPiecesAdded should have 1 piece') assert.strictEqual( - addedArgs.pieces?.[0].pieceCid.toString(), + addedArgs.pieces[0].pieceCid.toString(), uploadResult.pieceCid.toString(), 'onPiecesAdded should provide matching pieceCid' ) - assert.isDefined(resolvedDataSetId, 'resolvedDataSetId should be defined') assert.strictEqual( - confirmedArgs.dataSetId, - BigInt(resolvedDataSetId), - 'onPiecesConfirmed should provide the dataset id' + confirmedArgs.pieces[0].pieceCid.toString(), + uploadResult.pieceCid.toString(), + 'onPiecesConfirmed should provide matching pieceCid' ) - assert.strictEqual(confirmedArgs.pieces?.[0].pieceId, 0n, 'onPiecesConfirmed should include piece IDs') + assert.strictEqual(confirmedArgs.pieces[0].pieceId, 0n, 'onPiecesConfirmed should include piece ID') + assert.isAbove(Number(confirmedArgs.dataSetId), 0, 'onPiecesConfirmed should include dataSetId') }) it('should handle ArrayBuffer input', async () => { @@ -547,7 +408,7 @@ describe('Storage Upload', () => { const buffer = new Uint8Array(1024) const upload = await context.upload(buffer) - assert.strictEqual(upload.pieceId, 0n, 'pieceId should be 0') + assert.strictEqual(upload.copies[0].pieceId, 0n, 'pieceId should be 0') assert.strictEqual(upload.size, 1024, 'size should be 1024') }) }) diff --git a/packages/synapse-sdk/src/test/storage.test.ts b/packages/synapse-sdk/src/test/storage.test.ts index 194abcad..cf6bfc7e 100644 --- a/packages/synapse-sdk/src/test/storage.test.ts +++ b/packages/synapse-sdk/src/test/storage.test.ts @@ -100,15 +100,13 @@ describe('StorageService', () => { assert.equal(service.serviceProvider, Mocks.PROVIDERS.provider1.providerInfo.serviceProvider) }) - it('should skip existing datasets and return -1 with providerId when forceCreateDataSet is true', async () => { - let fetchedDataSets = false + it('should select provider and find existing data set with providerId', async () => { server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, warmStorageView: { ...Mocks.presets.basic.warmStorageView, getAllDataSetMetadata() { - fetchedDataSets = true return [[], []] }, }, @@ -127,7 +125,6 @@ describe('StorageService', () => { synapse, warmStorageService, providerId: Mocks.PROVIDERS.provider1.providerId, - forceCreateDataSet: true, }) assert.equal( @@ -135,11 +132,9 @@ describe('StorageService', () => { Mocks.PROVIDERS.provider1.providerInfo.serviceProvider, 'Should select the requested provider' ) - assert.equal(context.dataSetId, undefined, 'Should not have a data set id when forceCreateDataSet is true') - assert.isFalse(fetchedDataSets, 'Should not have fetched existing data sets when forceCreateDataSet is true') }) - it('should skip existing datasets and return -1 with providerAddress when forceCreateDataSet is true', async () => { + it('should select provider by providerId and reuse existing data set', async () => { server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, @@ -160,7 +155,7 @@ describe('StorageService', () => { const context = await StorageContext.create({ synapse, warmStorageService, - providerAddress: Mocks.PROVIDERS.provider1.providerInfo.serviceProvider, + providerId: Mocks.PROVIDERS.provider1.providerId, }) assert.equal( @@ -168,10 +163,9 @@ describe('StorageService', () => { Mocks.PROVIDERS.provider1.providerInfo.serviceProvider, 'Should select the requested provider' ) - assert.equal(context.dataSetId, undefined, 'Should not have a data set id when forceCreateDataSet is true') }) - it('should reuse existing data set with providerId when forceCreateDataSet is not set', async () => { + it('should reuse existing data set with providerId', async () => { server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, @@ -199,7 +193,7 @@ describe('StorageService', () => { }) // Should have reused existing data set (not created new one) assert.equal(context.serviceProvider, Mocks.PROVIDERS.provider1.providerInfo.serviceProvider) - assert.equal(context.dataSetId, 1n, 'Should not have a data set id when forceCreateDataSet is true') + assert.equal(context.dataSetId, 1n, 'Should reuse the existing data set') }) it('should throw when no approved providers available', async () => { @@ -448,7 +442,7 @@ describe('StorageService', () => { assert.equal(service.serviceProvider, Mocks.PROVIDERS.provider1.providerInfo.serviceProvider) }) - it('should select by providerAddress', async () => { + it('should select by providerId', async () => { server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, @@ -473,7 +467,7 @@ describe('StorageService', () => { const service = await StorageContext.create({ synapse, warmStorageService, - providerAddress: Mocks.PROVIDERS.provider2.providerInfo.serviceProvider, + providerId: Mocks.PROVIDERS.provider2.providerId, }) assert.equal(service.serviceProvider, Mocks.PROVIDERS.provider2.providerInfo.serviceProvider) @@ -533,7 +527,7 @@ describe('StorageService', () => { } }) - it('should throw when providerAddress not approved', async () => { + it('should throw when providerId not approved', async () => { server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, @@ -546,7 +540,7 @@ describe('StorageService', () => { await StorageContext.create({ synapse, warmStorageService, - providerAddress: '0x6666666666666666666666666666666666666666', + providerId: 999n, }) assert.fail('Should have thrown error') } catch (error: any) { @@ -725,7 +719,7 @@ describe('StorageService', () => { } }) - it('should handle conflict between dataSetId and providerAddress', async () => { + it('should handle conflict between dataSetId and providerId', async () => { server.use( Mocks.JSONRPC({ ...Mocks.presets.basic, @@ -762,7 +756,7 @@ describe('StorageService', () => { synapse, warmStorageService, dataSetId: 1n, - providerAddress: '0x9999888877776666555544443333222211110000', + providerId: 999n, }) assert.fail('Should have thrown error') } catch (error: any) { @@ -1051,15 +1045,13 @@ describe('StorageService', () => { assert.equal(results[1].status, 'rejected') if (results[0].status === 'rejected' && results[1].status === 'rejected') { - assert.include(results[0].reason.message, 'Failed to upload piece to service provider') - assert.include(results[1].reason.message, 'Failed to upload piece to service provider') - // They should have the same error message (same batch) - assert.equal(results[0].reason.message, results[1].reason.message) + assert.include(results[0].reason.message, 'Failed to store piece on service provider') + assert.include(results[1].reason.message, 'Failed to store piece on service provider') } - // Third upload might succeed or fail depending on timing + // Third upload should also fail since the mock always errors if (results[2].status === 'rejected') { - assert.include(results[2].reason.message, 'Failed to upload piece to service provider') + assert.include(results[2].reason.message, 'Failed to store piece on service provider') } }) @@ -1183,7 +1175,7 @@ describe('StorageService', () => { } catch (error: any) { // The error is wrapped twice - first by the specific throw, then by the outer catch assert.include(error.message, 'StorageContext addPieces failed:') - assert.include(error.message, 'Failed to add piece to data set') + assert.include(error.message, 'Failed to commit pieces on-chain') } }) @@ -1275,7 +1267,7 @@ describe('StorageService', () => { await service.upload(testData) assert.fail('Should have thrown upload error') } catch (error: any) { - assert.include(error.message, 'Failed to upload piece to service provider') + assert.include(error.message, 'Failed to store piece on service provider') } }) @@ -1304,7 +1296,7 @@ describe('StorageService', () => { await service.upload(testData) assert.fail('Should have thrown add pieces error') } catch (error: any) { - assert.include(error.message, 'Failed to add piece to data set') + assert.include(error.message, 'Failed to commit pieces on-chain') } }) }) @@ -1353,8 +1345,8 @@ describe('StorageService', () => { await StorageContext.create({ synapse, warmStorageService }) assert.fail('Should have thrown error') } catch (error: any) { - assert.include(error.message, 'StorageContext selectProviderWithPing failed') - assert.include(error.message, 'All 2 providers failed health check') + assert.include(error.message, 'StorageContext smartSelectProvider failed') + assert.include(error.message, 'All 2 approved provider(s) failed health check') } }) }) @@ -1400,7 +1392,7 @@ describe('StorageService', () => { }) }) - describe('getDataSetPieces', () => { + describe('getPieces', () => { it('should successfully fetch data set pieces', async () => { const mockDataSetData = { id: 1, diff --git a/packages/synapse-sdk/src/test/synapse.test.ts b/packages/synapse-sdk/src/test/synapse.test.ts index a8cab876..b8e1679c 100644 --- a/packages/synapse-sdk/src/test/synapse.test.ts +++ b/packages/synapse-sdk/src/test/synapse.test.ts @@ -466,7 +466,7 @@ describe('Synapse', () => { assert.equal((contexts[0] as any)._dataSetId, 1n) }) - it('force creates new data set specified by providerId even when metadata matches', async () => { + it('creates new data set when metadata does not fully match existing data set', async () => { const metadata = { withCDN: '', } @@ -474,11 +474,10 @@ describe('Synapse', () => { providerIds: [Mocks.PROVIDERS.provider1.providerId], metadata, count: 1, - forceCreateDataSets: true, }) assert.equal(contexts.length, 1) assert.equal(BigInt(contexts[0].provider.id), Mocks.PROVIDERS.provider1.providerId) - // should create new data set + // Existing data set has { environment: 'test', withCDN: '' } which differs from { withCDN: '' } assert.equal((contexts[0] as any)._dataSetId, undefined) }) @@ -559,20 +558,22 @@ describe('Synapse', () => { assert.notEqual((contexts[0] as any)._dataSetId, (contexts[1] as any)._dataSetId) }) - it('does not create multiple contexts for a specified data set when providerId also provided', async () => { + it('throws when both dataSetIds and providerIds are specified', async () => { const metadata = { environment: 'test', withCDN: '', } - const contexts = await synapse.storage.createContexts({ - count: 2, - dataSetIds: [1n, 1n], - providerIds: [Mocks.PROVIDERS.provider1.providerId, Mocks.PROVIDERS.provider1.providerId], - metadata, - }) - assert.equal(contexts.length, 2) - assert.equal((contexts[0] as any)._dataSetId, 1) - assert.notEqual((contexts[0] as any)._dataSetId, (contexts[1] as any)._dataSetId) + try { + await synapse.storage.createContexts({ + count: 2, + dataSetIds: [1n], + providerIds: [Mocks.PROVIDERS.provider1.providerId], + metadata, + }) + assert.fail('Expected createContexts to throw') + } catch (error: any) { + assert.include(error.message, "Cannot specify both 'dataSetIds' and 'providerIds'") + } }) it('selects existing data set by default when metadata matches', async () => { @@ -603,20 +604,6 @@ describe('Synapse', () => { assert.notEqual(contexts[0].provider.id, 1n) }) - it('creates new data set context when forced even when metadata matches', async () => { - const metadata = { - environment: 'test', - withCDN: '', - } - const contexts = await synapse.storage.createContexts({ - count: 1, - metadata, - forceCreateDataSets: true, - }) - assert.equal(contexts.length, 1) - assert.equal((contexts[0] as any)._dataSetId, undefined) - }) - it('can select new data sets from different providers using default params', async () => { const contexts = await synapse.storage.createContexts() assert.equal(contexts.length, 2) @@ -635,22 +622,34 @@ describe('Synapse', () => { endorsedProviderIds.push(BigInt(endorsedProviderId)) }) - it('falls back to other provider when endorsed provider fails ping', async () => { + it('throws when endorsed provider is excluded', async () => { + try { + await synapse.storage.createContexts({ + count: 1, + excludeProviderIds: [BigInt(endorsedProviderId)], + }) + assert.fail('Expected createContexts to throw') + } catch (error: any) { + assert.include(error.message, 'No endorsed provider available') + } + }) + + it('throws when endorsed provider fails ping (no fallback to non-endorsed)', async () => { // mock ping to fail for endorsed provider const endorsedProvider = providers[index] server.use( http.get(`${endorsedProvider.products[0].offering.serviceURL}/pdp/ping`, () => HttpResponse.error()) ) - const contexts = await synapse.storage.createContexts({ - count: 1, - forceCreateDataSets: true, - }) - assert.equal(contexts.length, 1) - assert.equal((contexts[0] as any)._dataSetId, undefined) - - const otherProviderId = providerIds[providers.length - index - 1] - assert.equal(contexts[0].provider.id, otherProviderId) + try { + await synapse.storage.createContexts({ + count: 1, + }) + assert.fail('Expected createContexts to throw when no endorsed provider available') + } catch (error: any) { + assert.include(error.message, 'No endorsed provider available') + assert.include(error.message, 'failed health check') + } }) for (const count of [1, 2]) { @@ -662,7 +661,6 @@ describe('Synapse', () => { for (let i = 0; i < 5; i++) { const contexts = await synapse.storage.createContexts({ count, - forceCreateDataSets: true, // This prevents the defaultContexts caching }) assert.equal(contexts.length, count) assert.equal((contexts[0] as any)._dataSetId, undefined) @@ -753,49 +751,22 @@ describe('Synapse', () => { assert.equal(result.size, 1024) }) - it('fails when one storage provider returns wrong pieceCid', async () => { + it('fails when primary store fails', async () => { const data = new Uint8Array(1024) - const pieceCid = Piece.calculate(data) - const mockUUID = '12345678-90ab-cdef-1234-567890abcdef' - const found = true - const wrongCid = 'wrongCid' - for (const provider of [Mocks.PROVIDERS.provider1, Mocks.PROVIDERS.provider2]) { - const pdpOptions = { - baseUrl: provider.products[0].offering.serviceURL, - } - server.use(Mocks.pdp.postPieceUploadsHandler(mockUUID, pdpOptions)) - server.use(Mocks.pdp.uploadPieceStreamingHandler(mockUUID, pdpOptions)) - server.use( - Mocks.pdp.finalizePieceUploadHandler( - mockUUID, - provider === Mocks.PROVIDERS.provider1 ? pieceCid.toString() : wrongCid, - pdpOptions - ) - ) - server.use(Mocks.pdp.findPieceHandler(pieceCid.toString(), found, pdpOptions)) - server.use(Mocks.pdp.createAndAddPiecesHandler(FAKE_TX_HASH, pdpOptions)) - server.use( - Mocks.pdp.pieceAdditionStatusHandler( - DATA_SET_ID, - FAKE_TX_HASH, - { - txHash: FAKE_TX_HASH, - txStatus: 'pending', - dataSetId: DATA_SET_ID, - pieceCount: 1, - addMessageOk: true, - piecesAdded: true, - confirmedPieceIds: [0], - }, - pdpOptions - ) - ) + const pdpOptions = { + baseUrl: Mocks.PROVIDERS.provider1.products[0].offering.serviceURL, } + // Primary SP rejects upload + server.use( + http.post(`${pdpOptions.baseUrl}/pdp/piece/uploads`, async () => { + return HttpResponse.error() + }) + ) try { await synapse.storage.upload(data, { contexts }) - assert.fail('Expected upload to fail when one provider returns wrong pieceCid') + assert.fail('Expected upload to fail when primary store fails') } catch (error: any) { - assert.include(error.message, 'Failed to create upload session') + assert.include(error.message, 'Failed to store piece on service provider') } }) }) diff --git a/packages/synapse-sdk/src/types.ts b/packages/synapse-sdk/src/types.ts index bd8eb09c..27b89f90 100644 --- a/packages/synapse-sdk/src/types.ts +++ b/packages/synapse-sdk/src/types.ts @@ -8,14 +8,15 @@ import type { Chain } from '@filoz/synapse-core/chains' import type { PieceCID } from '@filoz/synapse-core/piece' import type { SessionKey, SessionKeyAccount } from '@filoz/synapse-core/session-key' +import type { PullStatus } from '@filoz/synapse-core/sp' import type { PDPProvider } from '@filoz/synapse-core/sp-registry' import type { MetadataObject } from '@filoz/synapse-core/utils' import type { Account, Address, Client, Hex, Transport } from 'viem' import type { Synapse } from './synapse.ts' import type { WarmStorageService } from './warm-storage/service.ts' -// Re-export PieceCID and PDPProvider types -export type { PieceCID, PDPProvider } +// Re-export PieceCID, PDPProvider, and PullStatus types +export type { PieceCID, PDPProvider, PullStatus } export type PrivateKey = string export type TokenAmount = bigint export type DataSetId = bigint @@ -207,12 +208,22 @@ export interface SettlementResult { // ============================================================================ // Storage Context Creation Types // ============================================================================ -// These types are used when creating or selecting storage contexts -// (provider + data set pairs) +// +// BaseContextOptions contains shared fields: withCDN, metadata, callbacks. +// +// StorageServiceOptions extends BaseContextOptions with singular fields +// (providerId, dataSetId) for single-context creation via createContext(). +// +// CreateContextsOptions extends BaseContextOptions with plural fields +// (providerIds, dataSetIds, count, excludeProviderIds) for createContexts(). +// +// StorageManagerUploadOptions (in manager.ts) extends CreateContextsOptions +// with upload-specific fields (contexts, pieceCid, pieceMetadata, signal). +// // ============================================================================ /** - * Callbacks for storage service creation process + * Callbacks for storage context creation process * * These callbacks provide visibility into the context creation process, * including provider and data set selection. @@ -231,32 +242,45 @@ export interface StorageContextCallbacks { onDataSetResolved?: (info: { isExisting: boolean; dataSetId: bigint; provider: PDPProvider }) => void } -export interface CreateContextsOptions { +/** + * Base options shared by all context creation methods + * + * Contains fields common to both single and multi-context creation: + * CDN enablement, metadata matching, and creation callbacks. + */ +export interface BaseContextOptions { + /** Whether to enable CDN services */ + withCDN?: boolean + + /** + * Custom metadata for data sets (key-value pairs). + * Used to match existing data sets during provider selection. + */ + metadata?: Record + + /** Callbacks for creation process */ + callbacks?: StorageContextCallbacks +} + +/** + * Options for creating multiple storage contexts via createContexts() + * + * Extends BaseContextOptions with plural provider/dataset selection + * and count for multi-provider redundancy. + */ +export interface CreateContextsOptions extends BaseContextOptions { /** Number of contexts to create (optional, defaults to 2) */ count?: number /** - * Specific data set IDs to use + * Specific data set IDs to use (mutually exclusive with providerIds) */ dataSetIds?: bigint[] /** - * Specific provider IDs to use + * Specific provider IDs to use (mutually exclusive with dataSetIds) */ providerIds?: bigint[] /** Do not select any of these providers */ excludeProviderIds?: bigint[] - /** Whether to enable CDN services */ - withCDN?: boolean - /** - * Custom metadata for the data sets (key-value pairs) - * When smart-selecting data sets, this metadata will be used to match. - */ - metadata?: Record - /** Create new data sets, even if candidates exist */ - forceCreateDataSets?: boolean - /** Callbacks for creation process (will need to change to handle multiples) */ - callbacks?: StorageContextCallbacks - /** Maximum number of uploads to process in a single batch (default: 32, minimum: 1) */ - uploadBatchSize?: number } export interface ContextCreateContextsOptions extends CreateContextsOptions { @@ -267,34 +291,17 @@ export interface ContextCreateContextsOptions extends CreateContextsOptions { } /** - * Options for creating or selecting a storage context + * Options for creating or selecting a single storage context via createContext() * - * Used by StorageManager.createContext() and indirectly by StorageManager.upload() - * when auto-creating contexts. Allows specification of: - * - Provider selection (by ID or address) - * - Data set selection or creation - * - CDN enablement and metadata - * - Creation process callbacks - */ -export interface StorageServiceOptions { + * Extends BaseContextOptions with singular provider/dataset selection. + */ +export interface StorageServiceOptions extends BaseContextOptions { /** Specific provider ID to use (optional) */ providerId?: bigint /** Do not select any of these providers */ excludeProviderIds?: bigint[] - /** Specific provider address to use (optional) */ - providerAddress?: Address /** Specific data set ID to use (optional) */ dataSetId?: bigint - /** Whether to enable CDN services */ - withCDN?: boolean - /** Force creation of a new data set, even if a candidate exists */ - forceCreateDataSet?: boolean - /** Maximum number of uploads to process in a single batch (default: 32, minimum: 1) */ - uploadBatchSize?: number - /** Callbacks for creation process */ - callbacks?: StorageContextCallbacks - /** Custom metadata for the data set (key-value pairs) */ - metadata?: Record } export interface StorageContextCreateOptions extends StorageServiceOptions { @@ -339,12 +346,18 @@ export interface PreflightInfo { export interface UploadCallbacks { /** Called periodically during upload with bytes uploaded so far */ onProgress?: (bytesUploaded: number) => void - /** Called when upload to service provider completes */ - onUploadComplete?: (pieceCid: PieceCID) => void - /** Called when the service provider has added the piece(s) and submitted the transaction to the chain */ - onPiecesAdded?: (transaction: Hex, pieces?: { pieceCid: PieceCID }[]) => void - /** Called when the service provider agrees that the piece addition(s) are confirmed on-chain */ - onPiecesConfirmed?: (dataSetId: bigint, pieces: PieceRecord[]) => void + /** Called when piece data has been stored on a provider (before on-chain commit) */ + onStored?: (providerId: bigint, pieceCid: PieceCID) => void + /** Called when the addPieces transaction has been submitted for a provider */ + onPiecesAdded?: (transaction: Hex, providerId: bigint, pieces: { pieceCid: PieceCID }[]) => void + /** Called when the addPieces transaction is confirmed on-chain for a provider */ + onPiecesConfirmed?: (dataSetId: bigint, providerId: bigint, pieces: PieceRecord[]) => void + /** Called when a secondary copy completes successfully */ + onCopyComplete?: (providerId: bigint, pieceCid: PieceCID) => void + /** Called when a secondary copy fails */ + onCopyFailed?: (providerId: bigint, pieceCid: PieceCID, error: Error) => void + /** Called with pull status updates during SP-to-SP transfer */ + onPullProgress?: (providerId: bigint, pieceCid: PieceCID, status: PullStatus) => void } /** @@ -364,13 +377,41 @@ export interface PieceRecord { * Used by StorageContext.upload() for uploading data to a specific provider * and data set that has already been created/selected. */ -export interface UploadOptions extends UploadCallbacks { +export interface UploadOptions extends StoreOptions, UploadCallbacks { /** Custom metadata for this specific piece (key-value pairs) */ - metadata?: MetadataObject - /** Optional pre-calculated PieceCID to skip CommP calculation (BYO PieceCID) */ - pieceCid?: PieceCID - /** Optional AbortSignal to cancel the upload */ - signal?: AbortSignal + pieceMetadata?: MetadataObject +} + +/** + * Result for a single successful copy of data on a provider + */ +export interface CopyResult { + /** Provider ID that holds this copy */ + providerId: bigint + /** Data set ID on this provider */ + dataSetId: bigint + /** Piece ID within the data set */ + pieceId: bigint + /** Whether this is the primary (store) or secondary (pull) copy */ + role: 'primary' | 'secondary' + /** URL where this copy can be retrieved */ + retrievalUrl: string + /** Whether a new data set was created for this copy */ + isNewDataSet: boolean +} + +/** + * Record of a failed copy attempt + */ +export interface FailedCopy { + /** Provider ID that failed */ + providerId: bigint + /** Role of the failed copy */ + role: 'primary' | 'secondary' + /** Error description */ + error: string + /** Whether the provider was explicitly specified (no auto-retry for explicit) */ + explicit: boolean } /** @@ -381,8 +422,96 @@ export interface UploadResult { pieceCid: PieceCID /** Size of the original data */ size: number - /** Piece ID in the data set */ - pieceId?: bigint + /** Successful copies across providers */ + copies: CopyResult[] + /** Failed copy attempts (individual failures don't throw; check copies.length) */ + failures: FailedCopy[] +} + +// ============================================================================ +// Split Operation Types +// ============================================================================ +// The upload flow can be decomposed into: store → pull → commit +// These types support that split flow for advanced use cases. +// ============================================================================ + +/** + * Options for storing data on a provider without on-chain commit + */ +export interface StoreOptions { + /** Optional pre-calculated PieceCID to skip CommP calculation */ + pieceCid?: PieceCID + /** Optional AbortSignal to cancel the store */ + signal?: AbortSignal + /** Progress callback for upload bytes */ + onProgress?: (bytesUploaded: number) => void +} + +/** + * Result of a store operation + */ +export interface StoreResult { + /** PieceCID of the stored data */ + pieceCid: PieceCID + /** Size of the original data in bytes */ + size: number +} + +/** + * Source for pulling pieces from another provider + */ +export type PullSource = string | { getPieceUrl: (pieceCid: PieceCID) => string } + +/** + * Options for pulling pieces from a source provider + */ +export interface PullOptions { + /** Pieces to pull */ + pieces: PieceCID[] + /** Source provider to pull from (URL or context with getPieceUrl) */ + from: PullSource + /** Optional AbortSignal */ + signal?: AbortSignal + /** Pull progress callback */ + onProgress?: (pieceCid: PieceCID, status: PullStatus) => void + /** Pre-built signed extraData (avoids double wallet prompts) */ + extraData?: Hex +} + +/** + * Result of a pull operation + */ +export interface PullResult { + /** Overall status */ + status: 'complete' | 'failed' + /** Per-piece status */ + pieces: Array<{ pieceCid: PieceCID; status: 'complete' | 'failed' }> +} + +/** + * Options for committing pieces on-chain + */ +export interface CommitOptions { + /** Pieces to commit with optional per-piece metadata */ + pieces: Array<{ pieceCid: PieceCID; pieceMetadata?: MetadataObject }> + /** Pre-built signed extraData (avoids re-signing) */ + extraData?: Hex + /** Called when the commit transaction is submitted (before on-chain confirmation) */ + onSubmitted?: (txHash: Hex) => void +} + +/** + * Result of a commit operation + */ +export interface CommitResult { + /** Transaction hash */ + txHash: Hex + /** Piece IDs assigned by the contract */ + pieceIds: bigint[] + /** Data set ID (may be newly created) */ + dataSetId: bigint + /** Whether a new data set was created */ + isNewDataSet: boolean } /** diff --git a/utils/example-storage-e2e.js b/utils/example-storage-e2e.js index f42c37f7..79b8a934 100644 --- a/utils/example-storage-e2e.js +++ b/utils/example-storage-e2e.js @@ -6,6 +6,11 @@ * Demonstrates uploading files to Filecoin storage via the Synapse SDK and * downloading them back to verify the round-trip. * + * Two upload paths are shown: + * - Single file: upload() with streaming - handles everything automatically + * - Multiple files: split operations (store -> pull -> commit) - batches on-chain + * transactions for efficiency (1 tx per provider instead of N) + * * Usage: * PRIVATE_KEY=0x... node utils/example-storage-e2e.js [file-path2] ... * NETWORK=devnet node utils/example-storage-e2e.js ... @@ -14,7 +19,9 @@ * See resolveConfig() at the bottom of this file for all environment variables. */ +import fs from 'fs' import fsPromises from 'fs/promises' +import { Readable } from 'stream' import { http as viemHttp } from 'viem' import { privateKeyToAccount } from 'viem/accounts' import { calibration, mainnet } from '../packages/synapse-core/src/chains.ts' @@ -25,13 +32,12 @@ async function main() { console.log('=== Synapse SDK Storage E2E Example ===\n') - // Read files into memory + // Validate files and collect metadata console.log(`Reading file${filePaths.length !== 1 ? 's' : ''}...`) const files = [] let totalSize = 0 for (const filePath of filePaths) { - console.log(` Reading file: ${filePath}`) const stat = await fsPromises.stat(filePath) if (!stat.isFile()) { throw new Error(`Path is not a file: ${filePath}`) @@ -41,7 +47,8 @@ async function main() { `File exceeds maximum size of ${formatBytes(SIZE_CONSTANTS.MAX_UPLOAD_SIZE)}: ${filePath} (${formatBytes(stat.size)})` ) } - files.push({ path: filePath, handle: await fsPromises.open(filePath, 'r'), length: stat.size }) + console.log(` ${filePath} (${formatBytes(stat.size)})`) + files.push({ path: filePath, length: stat.size }) totalSize += stat.size } @@ -68,49 +75,9 @@ async function main() { console.log(`FIL balance: ${Number(filBalance) / 1e18} FIL`) console.log(`USDFC balance: ${formatUSDFC(usdfcBalance)}`) - // Create storage context (auto-selects provider and data set) - console.log('\n--- Setting Up Storage Context ---') - const contexts = await synapse.storage.createContexts({ - count: 1, - withCDN: false, - callbacks: { - onProviderSelected: (provider) => { - console.log(`Selected service provider: ${provider.serviceProvider}`) - }, - onDataSetResolved: (info) => { - if (info.isExisting) { - console.log(`Using existing data set: ${info.dataSetId}`) - } else { - console.log(`Created new data set: ${info.dataSetId}`) - } - }, - }, - }) - - for (const [index, storageContext] of contexts.entries()) { - const providerLabel = contexts.length > 1 ? ` #${index + 1}` : '' - if (storageContext.dataSetId === undefined) { - console.log('Data set not yet created') - } else { - console.log(`Data set ID: ${storageContext.dataSetId}`) - } - const pieceCids = await storageContext.getDataSetPieces() - console.log(`Data set contains ${pieceCids.length} piece CIDs`) - - console.log(`\n--- Service Provider${providerLabel} Details ---`) - const providerInfo = await storageContext.getProviderInfo() - console.log(`Provider ID: ${providerInfo.id}`) - console.log(`Provider Address: ${providerInfo.serviceProvider}`) - console.log(`Provider Name: ${providerInfo.name}`) - console.log(`Active: ${providerInfo.isActive}`) - if (providerInfo.pdp?.serviceURL) { - console.log(`PDP Service URL: ${providerInfo.pdp.serviceURL}`) - } - } - // Preflight checks console.log('\n--- Preflight Upload Check ---') - const preflight = await synapse.storage.preflightUpload(totalSize) + const preflight = await synapse.storage.preflightUpload({ size: totalSize }) console.log('Estimated costs:') console.log(` Per epoch (30s): ${formatUSDFC(preflight.estimatedCost.perEpoch)}`) @@ -128,152 +95,216 @@ async function main() { console.log('Sufficient allowances available') - // Upload + // Upload files - single vs multi-file paths console.log('\n--- Uploading ---') - const providerText = contexts.length > 1 ? `${contexts.length} service providers` : 'service provider' - if (files.length > 1) { - console.log(`Uploading files to ${providerText} in parallel...\n`) - } else { - console.log(`Uploading file to ${providerText}...\n`) - } - const uploadPromises = files.map(async (file, index) => { - let pfx = '' - if (files.length > 1) { - pfx = `[File ${index + 1}/${files.length}] ` - } + // uploadResults is built by either the single-file or multi-file path + const uploadResults = [] + + if (files.length === 1) { + // ----------------------------------------------------------------- + // Single file: upload() handles everything - provider selection, + // data transfer, SP-to-SP replication, and on-chain commitment. + // Uses streaming to avoid buffering the entire file in memory. + // ----------------------------------------------------------------- + const file = files[0] + const fileStream = Readable.toWeb(fs.createReadStream(file.path)) + console.log(`Uploading ${file.path} (${formatBytes(file.length)}) via stream...\n`) const PROGRESS_CHUNK_SIZE = 10 * 1024 * 1024 // 10 MiB let lastReportedBytes = 0 - const data = contexts.length !== 1 ? await file.handle.readFile() : file.handle.readableWebStream() - return synapse.storage.upload(data, { - contexts, + const result = await synapse.storage.upload(fileStream, { callbacks: { + onProviderSelected: (provider) => { + console.log(` Selected provider: ${provider.serviceProvider}`) + }, + onDataSetResolved: (info) => { + const verb = info.isExisting ? 'Using existing' : 'Created new' + console.log(` ${verb} data set: ${info.dataSetId}`) + }, onProgress: (bytesUploaded) => { if (bytesUploaded - lastReportedBytes >= PROGRESS_CHUNK_SIZE || bytesUploaded === file.length) { - let progressMsg = '' - if (file.length !== -1) { - const percent = ((bytesUploaded / file.length) * 100).toFixed(1) - progressMsg = `${formatBytes(bytesUploaded)} / ${formatBytes(file.length)} (${percent}%)` - } else { - progressMsg = `${formatBytes(bytesUploaded)}` - } - console.log(` ${pfx}Upload progress: ${progressMsg}`) + const pct = file.length > 0 ? ` (${((bytesUploaded / file.length) * 100).toFixed(1)}%)` : '' + console.log(` Upload progress: ${formatBytes(bytesUploaded)}${pct}`) lastReportedBytes = bytesUploaded } }, - onUploadComplete: (pieceCid) => { - console.log(`${pfx}Upload complete! PieceCID: ${pieceCid}`) + onStored: (providerId, pieceCid) => { + console.log(` Stored on provider ${providerId}: ${pieceCid}`) }, - onPieceAdded: (transactionHash) => { - console.log(`${pfx}Piece addition transaction: ${transactionHash}`) + onPiecesAdded: (transaction, providerId, pieces) => { + console.log(` Pieces added for provider ${providerId}, tx: ${transaction}`) + for (const { pieceCid } of pieces) { + console.log(` ${pieceCid}`) + } }, - onPieceConfirmed: (pieceIds) => { - console.log(`${pfx}Piece addition confirmed! ID(s): ${pieceIds.join(', ')}`) + onPiecesConfirmed: (dataSetId, providerId, pieces) => { + console.log(` Data set ${dataSetId} confirmed on provider ${providerId}`) + for (const { pieceCid, pieceId } of pieces) { + console.log(` ${pieceCid} -> pieceId ${pieceId}`) + } }, }, }) - }) - const uploadResults = await Promise.all(uploadPromises) - await Promise.all(files.map((file) => file.handle.close())) + uploadResults.push({ file, result }) + } else { + // ----------------------------------------------------------------- + // Multiple files: orchestrate store -> pull -> commit manually. + // + // More efficient than calling upload() per file because all pieces + // are committed in a single on-chain transaction per provider, + // rather than one transaction per file. + // ----------------------------------------------------------------- + console.log(`Uploading ${files.length} files using split operations...\n`) + + // Create storage contexts - primary (endorsed) + secondaries + const contexts = await synapse.storage.createContexts({ + callbacks: { + onProviderSelected: (provider) => { + console.log(` Selected provider: ${provider.serviceProvider}`) + }, + onDataSetResolved: (info) => { + const verb = info.isExisting ? 'Using existing' : 'Created new' + console.log(` ${verb} data set: ${info.dataSetId}`) + }, + }, + }) - console.log('\n--- Upload Summary ---') - uploadResults.forEach((fileResult, fileIndex) => { - console.log(`File ${fileIndex + 1}: ${files[fileIndex].path}`) - console.log(` PieceCID: ${fileResult.pieceCid}`) - console.log(` Size: ${formatBytes(fileResult.size)}`) - console.log(` Piece ID: ${fileResult.pieceId}`) - }) + const [primary, ...secondaries] = contexts + console.log(`Primary: SP ${primary.provider.id}`) + for (const sec of secondaries) { + console.log(`Secondary: SP ${sec.provider.id}`) + } - // Download and verify - console.log('\n--- Downloading Files ---') - console.log(`Downloading file${files.length !== 1 ? 's in parallel' : ''}...\n`) + // Store all files on the primary provider in parallel using streaming + const stored = await Promise.all( + files.map(async (file) => { + const fileStream = Readable.toWeb(fs.createReadStream(file.path)) + console.log(`\nStoring ${file.path} (${formatBytes(file.length)}) on SP ${primary.provider.id}...`) + const storeResult = await primary.store(fileStream) + console.log(` Stored: ${storeResult.pieceCid}`) + return { file, pieceCid: storeResult.pieceCid, size: storeResult.size } + }) + ) - const downloadPromises = uploadResults.map((fileResult, index) => { - console.log(` Downloading file ${index + 1}: ${fileResult.pieceCid}`) - return synapse.storage.download(fileResult.pieceCid) - }) + // Pull all pieces to each secondary via SP-to-SP transfer. + // Pre-sign extraData per secondary so the same signature covers both + // the pull (estimateGas validation) and commit (on-chain submission). + const pieceCids = stored.map((s) => s.pieceCid) + const pieceInputs = stored.map((s) => ({ pieceCid: s.pieceCid })) + const successfulSecondaries = [] + + for (const secondary of secondaries) { + console.log(`\nPulling ${pieceCids.length} piece(s) to SP ${secondary.provider.id}...`) + try { + const extraData = await secondary.presignForCommit(pieceInputs) + const pullResult = await secondary.pull({ pieces: pieceCids, from: primary, extraData }) + + if (pullResult.status === 'complete') { + console.log(' Pull complete') + successfulSecondaries.push({ context: secondary, extraData }) + } else { + const failedPieces = pullResult.pieces.filter((p) => p.status === 'failed') + console.log(` Pull failed for ${failedPieces.length} piece(s)`) + } + } catch (error) { + console.log(` Pull failed: ${error.message}`) + } + } + + // Commit all pieces on each provider in a single transaction. + // Primary commits without extraData (signs internally); secondaries + // reuse the extraData signed during the pull step. + console.log(`\nCommitting ${stored.length} piece(s) on ${1 + successfulSecondaries.length} provider(s)...`) + + const primaryCommit = await primary.commit({ pieces: pieceInputs }) + console.log(` Committed on SP ${primary.provider.id} (tx: ${primaryCommit.txHash.slice(0, 18)}...)`) + + const secondaryCommits = [] + for (const { context, extraData } of successfulSecondaries) { + try { + const result = await context.commit({ pieces: pieceInputs, extraData }) + console.log(` Committed on SP ${context.provider.id} (tx: ${result.txHash.slice(0, 18)}...)`) + secondaryCommits.push({ context, result }) + } catch (error) { + console.log(` Commit failed on SP ${context.provider.id}: ${error.message}`) + } + } + + // Build upload results (same shape as upload() returns) + for (const { file, pieceCid, size } of stored) { + const i = stored.findIndex((s) => s.pieceCid === pieceCid) + const copies = [ + { + providerId: primary.provider.id, + dataSetId: primaryCommit.dataSetId, + pieceId: primaryCommit.pieceIds[i], + role: 'primary', + }, + ...secondaryCommits.map(({ context, result }) => ({ + providerId: context.provider.id, + dataSetId: result.dataSetId, + pieceId: result.pieceIds[i], + role: 'secondary', + })), + ] + uploadResults.push({ file, result: { pieceCid, size, copies, failures: [] } }) + } + } + + // Upload summary + console.log('\n--- Upload Summary ---') + for (const { file, result } of uploadResults) { + console.log(`\nFile: ${file.path}`) + console.log(` PieceCID: ${result.pieceCid}`) + console.log(` Size: ${formatBytes(result.size)}`) + + for (const copy of result.copies) { + const roleLabel = copy.role === 'primary' ? '[Primary] ' : '[Secondary]' + console.log(` ${roleLabel} Provider ${copy.providerId} - pieceId: ${copy.pieceId}, dataSetId: ${copy.dataSetId}`) + } + + if (result.failures.length > 0) { + for (const failure of result.failures) { + console.log(` Failed: provider ${failure.providerId} - ${failure.error.message}`) + } + } + } - const downloadedFiles = await Promise.all(downloadPromises) - console.log(`\nDownloaded ${downloadedFiles.length} file${files.length !== 1 ? 's' : ''} successfully`) + // Download and verify + console.log('\n--- Downloading and Verifying ---') - console.log('\n--- Verifying Data ---') let allMatch = true + for (const { file, result } of uploadResults) { + console.log(`\nDownloading ${result.pieceCid}...`) + const downloadedData = await synapse.storage.download({ pieceCid: result.pieceCid }) - for (let i = 0; i < files.length; i++) { - const downloadedData = downloadedFiles[i] if (downloadedData == null) { - console.warn(`Skipped File ${i + 1} (${files[i].path})`) + console.error(' FAILED: Could not download') + allMatch = false continue } - const originalData = await fsPromises.readFile(files[i].path) + const originalData = await fsPromises.readFile(file.path) const matches = Buffer.from(originalData).equals(Buffer.from(downloadedData)) - console.log( - `File ${i + 1} (${files[i].path}): ${matches ? 'MATCH' : 'MISMATCH'} (${formatBytes(downloadedData.length)})` - ) - - if (!matches) { + if (matches) { + console.log(` VERIFIED: ${formatBytes(downloadedData.length)} matches original`) + } else { + console.error(' MISMATCH: Downloaded data does not match original!') allMatch = false } } if (!allMatch) { - console.error('\nERROR: One or more downloaded files do not match originals!') + console.error('\nERROR: One or more files failed verification!') process.exit(1) } - console.log('\nSUCCESS: All downloaded files match originals!') - - // Piece status and storage info - console.log('\n--- Piece Status ---') - - for (const fileResult of uploadResults) { - const pieceCid = fileResult.pieceCid - - for (let spIndex = 0; spIndex < contexts.length; spIndex++) { - const storageContext = contexts[spIndex] - const providerLabel = contexts.length > 1 ? ` #${spIndex + 1}` : '' - const firstPieceStatus = await storageContext.pieceStatus(pieceCid) - console.log(`Data set exists on provider: ${firstPieceStatus.exists}`) - if (firstPieceStatus.dataSetLastProven) { - console.log(`Data set last proven: ${firstPieceStatus.dataSetLastProven.toLocaleString()}`) - } - if (firstPieceStatus.dataSetNextProofDue) { - console.log(`Data set next proof due: ${firstPieceStatus.dataSetNextProofDue.toLocaleString()}`) - } - if (firstPieceStatus.inChallengeWindow) { - console.log('Currently in challenge window - proof must be submitted soon') - } else if (firstPieceStatus.hoursUntilChallengeWindow && firstPieceStatus.hoursUntilChallengeWindow > 0) { - console.log(`Hours until challenge window: ${firstPieceStatus.hoursUntilChallengeWindow.toFixed(1)}`) - } - - const providerInfo = storageContext.provider - console.log(`\n--- Storage Information${providerLabel} ---`) - const fileText = files.length !== 1 ? 'files are' : 'file is' - console.log(`Your ${uploadResults.length} ${fileText} now stored on the Filecoin network:`) - console.log(`- Data set ID: ${storageContext.dataSetId}`) - console.log(`- Service provider: ${storageContext.provider.serviceProvider}`) - - console.log('\nUploaded pieces:') - uploadResults.forEach((fileResult, fileIndex) => { - console.log(`\n File ${fileIndex + 1}: ${files[fileIndex].path}`) - console.log(` PieceCID: ${fileResult.pieceCid}`) - console.log(` Piece ID: ${fileResult.pieceId}`) - console.log(` Size: ${formatBytes(fileResult.size)}`) - if (providerInfo.pdp?.serviceURL) { - console.log( - ` Retrieval URL: ${providerInfo.pdp.serviceURL.replace(/\/$/, '')}/piece/${fileResult.pieceCid}` - ) - } - }) - } - } - - console.log('\nThe service provider(s) will periodically prove they still have your data.') + console.log('\n=== SUCCESS: All files uploaded, replicated, and verified ===') + console.log('The service provider(s) will periodically prove they still have your data.') console.log('You are being charged based on the storage size and duration.') } @@ -304,8 +335,8 @@ function formatUSDFC(amount) { * * Devnet mode (NETWORK=devnet): * Loads chain config from foc-devnet's devnet-info.json. PRIVATE_KEY is - * optional — defaults to the first devnet user. - * - DEVNET_INFO_PATH: Path to devnet-info.json + * optional - defaults to the first devnet user. + * - DEVNET: Path to devnet-info.json * (default: ~/.foc-devnet/state/latest/devnet-info.json) * - DEVNET_USER_INDEX: Which user from devnet info (default: 0) * @@ -332,7 +363,7 @@ async function resolveConfig() { const { validateDevnetInfo, toChain } = await import('../packages/synapse-core/src/devnet/index.ts') const devnetInfoPath = - process.env.DEVNET_INFO_PATH || join(homedir(), '.foc-devnet', 'state', 'latest', 'devnet-info.json') + process.env.DEVNET || join(homedir(), '.foc-devnet', 'state', 'latest', 'devnet-info.json') const userIndex = Number(process.env.DEVNET_USER_INDEX || '0') console.log(`Loading devnet info from: ${devnetInfoPath}`) From 31a254e90d4d78100350c29785cf1a92e6112152 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Mon, 16 Feb 2026 21:39:50 +1100 Subject: [PATCH 2/2] feat(storage): extract provider selection to synapse-core and update docs for multi-copy Move provider selection logic (selectProviders, fetchProviderSelectionInput, findMatchingDataSets) from SDK internals to synapse-core as public API for DIY users. Simplify selection from 4-tier fallback to 2-tier preference (existing dataset -> new dataset) since endorsedIds already controls the eligible pool. Clean up createContexts() to three explicit paths (dataSetIds, providerIds, smartSelect) with count validation and duplicate-provider guard. Update storage docs to reflect multi-copy as the default upload path. --- .../docs/developer-guides/components.mdx | 10 +- .../storage/storage-context.mdx | 641 +++++++++--------- .../storage/storage-operations.mdx | 281 ++++++-- .../content/docs/developer-guides/synapse.md | 17 +- .../content/docs/getting-started/index.mdx | 28 +- package.json | 1 + .../fetch-provider-selection-input.ts | 46 ++ .../warm-storage/find-matching-data-sets.ts | 63 ++ .../src/warm-storage/get-pdp-data-set.ts | 8 +- .../synapse-core/src/warm-storage/index.ts | 4 + .../src/warm-storage/location-types.ts | 79 +++ .../src/warm-storage/select-providers.ts | 91 +++ .../synapse-core/src/warm-storage/types.ts | 2 + .../fetch-provider-selection-input.test.ts | 96 +++ .../test/find-matching-data-sets.test.ts | 151 +++++ .../test/select-providers.test.ts | 444 ++++++++++++ packages/synapse-sdk/src/storage/context.ts | 375 ++++------ packages/synapse-sdk/src/storage/manager.ts | 21 +- .../src/test/metadata-selection.test.ts | 113 --- .../synapse-sdk/src/test/session-keys.test.ts | 4 +- packages/synapse-sdk/src/test/storage.test.ts | 74 +- packages/synapse-sdk/src/test/synapse.test.ts | 93 ++- packages/synapse-sdk/src/types.ts | 19 +- packages/synapse-sdk/src/utils/index.ts | 2 +- packages/synapse-sdk/src/utils/metadata.ts | 34 - utils/example-storage-e2e.js | 27 +- 26 files changed, 1855 insertions(+), 869 deletions(-) create mode 100644 packages/synapse-core/src/warm-storage/fetch-provider-selection-input.ts create mode 100644 packages/synapse-core/src/warm-storage/find-matching-data-sets.ts create mode 100644 packages/synapse-core/src/warm-storage/location-types.ts create mode 100644 packages/synapse-core/src/warm-storage/select-providers.ts create mode 100644 packages/synapse-core/test/fetch-provider-selection-input.test.ts create mode 100644 packages/synapse-core/test/find-matching-data-sets.test.ts create mode 100644 packages/synapse-core/test/select-providers.test.ts diff --git a/docs/src/content/docs/developer-guides/components.mdx b/docs/src/content/docs/developer-guides/components.mdx index d7fa7a3d..425f7a92 100644 --- a/docs/src/content/docs/developer-guides/components.mdx +++ b/docs/src/content/docs/developer-guides/components.mdx @@ -81,7 +81,7 @@ Check out the [Payment Operations](/developer-guides/payments/payment-operations ### StorageManager -**Purpose**: High-level, auto-managed storage operations - upload and download data to and from the Filecoin Onchain Cloud. +**Purpose**: High-level storage operations with multi-copy durability. `upload()` stores data on multiple providers automatically. Also handles provider-agnostic downloads. **API Reference**: [StorageManager API Reference](/reference/filoz/synapse-sdk/storage/classes/storagemanager/) @@ -89,11 +89,11 @@ Check out the [Storage Operations](/developer-guides/storage/storage-operations/ ### StorageContext -**Purpose**: Provider-specific storage operations - upload and download data to and from the Filecoin Onchain Cloud. +**Purpose**: Provider-specific split operations (`store` → `pull` → `commit`). Used for batch uploads, custom error handling, and manual orchestration of multi-copy flows. **API Reference**: [StorageContext API Reference](/reference/filoz/synapse-sdk/storage/classes/storagecontext/) -Check out the [Storage Context](/developer-guides/storage/storage-context/) guide for more details. +Check out the [Split Operations](/developer-guides/storage/storage-context/) guide for more details. ### WarmStorageService @@ -162,8 +162,8 @@ Choose your learning path based on your immediate needs: Jump straight to code with the [**Getting Started Guide →**](/getting-started/) -- [**Storage Operations →**](/developer-guides/storage/storage-operations/) - Upload and download your first file -- [**Storage Context →**](/developer-guides/storage/storage-context/) - Advanced storage operations and batch uploads +- [**Storage Operations →**](/developer-guides/storage/storage-operations/) - Multi-copy uploads and downloads +- [**Split Operations →**](/developer-guides/storage/storage-context/) - Manual control over store, pull, and commit - [**Payment Operations →**](/developer-guides/payments/payment-operations/) - Fund your account and manage payments - [**Rails & Settlement →**](/developer-guides/payments/rails-settlement/) - Payment mechanics and settlement strategies diff --git a/docs/src/content/docs/developer-guides/storage/storage-context.mdx b/docs/src/content/docs/developer-guides/storage/storage-context.mdx index c5ff7597..b317d08b 100644 --- a/docs/src/content/docs/developer-guides/storage/storage-context.mdx +++ b/docs/src/content/docs/developer-guides/storage/storage-context.mdx @@ -1,108 +1,93 @@ --- -title: Storage Context -description: Guides for using the Storage Context API. +title: Split Operations +description: Manual control over store, pull, and commit phases for advanced upload workflows. sidebar: order: 3 + label: Split Operations --- :::tip[Advanced Guide] -This guide is for developers who need fine-grained control over storage operations. -You'll learn about explicit provider selection, batch uploads, lifecycle management, and download strategies. +This guide is for developers who need manual control over each phase of the upload pipeline. +For most use cases, `synapse.storage.upload()` handles everything automatically — see [Storage Operations](/developer-guides/storage/storage-operations/). **Audience**: Experienced developers building production applications **Prerequisites**: Complete the [Storage Operations Guide](/developer-guides/storage/storage-operations/) first -**When to use this**: Batch operations, custom callbacks, specific provider requirements, advanced error handling +**When to use this**: Batch uploads, custom error handling at each phase, pre-signing for wallet UX, explicit provider/dataset targeting ::: -## Storage Context Overview - -A Storage Context represents a connection to a specific storage provider and data set. Unlike the auto-managed approach in the [Storage Operations Guide](/developer-guides/storage/storage-operations/), contexts give you explicit control over these key capabilities: - -- **Provider Selection**: Choose specific providers for your data -- **Data Set Management**: Create, reuse, and manage data sets explicitly -- **Batch Operations**: Upload multiple pieces efficiently with progress tracking -- **Lifecycle Control**: Terminate data sets and delete pieces when needed -- **Download Strategies**: Choose between SP-agnostic and SP-specific retrieval - -This guide assumes you've already completed the [Storage Operations Guide](/developer-guides/storage/storage-operations/) and understand the basics of uploading and downloading data. - -### Creating a Storage Context - -#### Creation Options - -```ts twoslash -// @lib: esnext,dom -import { PDPProvider } from "@filoz/synapse-sdk"; -type StorageContextCallbacks = { - onProviderSelected?: (provider: PDPProvider) => void; - onDataSetResolved?: (info: { - isExisting: boolean; - dataSetId: bigint; - provider: PDPProvider; - }) => void; -}; -// ---cut--- -interface StorageServiceOptions { - providerId?: number; // Specific provider ID to use (optional) - excludeProviderIds?: number[]; // Do not select any of these providers (optional) - providerAddress?: string; // Specific provider address to use (optional) - dataSetId?: number; // Specific data set ID to use (optional) - withCDN?: boolean; // Enable CDN services (optional) - forceCreateDataSet?: boolean; // Force creation of a new data set, even if a candidate exists (optional) - callbacks?: StorageContextCallbacks; // Progress callbacks (optional) - metadata?: Record; // Metadata requirements for data set selection/creation - uploadBatchSize?: number; // Max uploads per batch (default: 32, min: 1) -} +## When You Need This + +The high-level `upload()` handles single-piece multi-copy uploads end-to-end. Use split operations when you need: + +- **Batch uploading** many files to specific providers without repeated context creation +- **Custom error handling** at each phase — retry store failures, skip failed secondaries, recover from commit failures +- **Signing control** to control the signing operations to avoid multiple wallet signature prompts during multi-copy uploads +- **Greater provider/dataset targeting** for uploading to known providers + +## The Upload Pipeline + +Every upload goes through three phases: + +``` +store ──► pull ──► commit + │ │ │ + │ │ └─ On-chain: create dataset, add piece, start payments + │ └─ SP-to-SP: secondary provider fetches from primary + └─ Upload: bytes sent to one provider (no on-chain state yet) ``` -Monitor the creation process with detailed callbacks to track progress: - -```ts twoslash -// @lib: esnext,dom -import { Synapse, StorageServiceOptions } from "@filoz/synapse-sdk"; -import { privateKeyToAccount } from 'viem/accounts' -const synapse = Synapse.create({ account: privateKeyToAccount('0x...') }); -// ---cut--- -const storageContext = await synapse.storage.createContext({ - providerAddress: "0x...", // Optional: use specific provider address - withCDN: true, // Optional: enable CDN for faster downloads - metadata: { - Application: "Filecoin Storage DApp", - Version: "1.0.0", - Category: "AI", - }, - callbacks: { - onDataSetResolved: (info) => { - if (info.isExisting) { - console.log( - `Data set with id ${info.dataSetId}`, - `matches your context criteria and will be reused` - ); - } else { - console.log( - `No matching data set found`, - `A new data set will be created in the next file upload`, - `In a single transaction!` - ); - } - }, - onProviderSelected: (provider) => { - console.log( - `Selected Provider with`, - ` id: ${provider.id}`, - ` name: ${provider.name}`, - ` description: ${provider.description}`, - ` address: ${provider.serviceProvider}` - ); - }, - }, -}); +- **store**: Upload bytes to a single SP. Returns `{ pieceCid, size }`. The piece is "parked" on the SP but not yet on-chain and subject to garbage collection if not committed. +- **pull**: SP-to-SP transfer. The destination SP fetches the piece from a source SP. No client bandwidth used. +- **commit**: Submit an on-chain transaction to add the piece to a data set. Creates the data set and payment rail if needed. + +## Creating Contexts + +A `StorageContext` represents a connection to a specific provider and data set. Create one for single-provider work, or multiple for multi-copy: + +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" + +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) + +// Single context — auto-selects provider +const ctx = await synapse.storage.createContext({ + metadata: { source: "my-app" }, +}) + +// Multiple contexts for multi-copy +const contexts = await synapse.storage.createContexts({ + count: 2, + metadata: { source: "my-app" }, +}) +const [primary, secondary] = contexts +``` + +Context creation options: + +```ts +// Single context (createContext) +await synapse.storage.createContext({ + providerId: 1n, // specific provider (optional) + dataSetId: 42n, // specific data set (optional) + metadata: { ... }, // data set metadata for matching/creation + withCDN: true, // enable fast-retrieval (paid, optional) + excludeProviderIds: [3n], // skip specific providers (optional) +}) + +// Multiple contexts (createContexts) +await synapse.storage.createContexts({ + count: 3, // number of contexts (default: 2) + providerIds: [1n, 2n, 3n], // specific providers (mutually exclusive with dataSetIds) + dataSetIds: [10n, 20n, 30n], // specific data sets (mutually exclusive with providerIds) + metadata: { ... }, +}) ``` ### Data Set Selection and Matching :::tip[Metadata Matching for Cost Efficiency] -**The SDK reuses existing data sets when metadata matches exactly**, avoiding floor pricing. To maximize reuse: +**The SDK reuses existing data sets when metadata matches exactly**, avoiding duplicate payment rails. To maximize reuse: - Use consistent metadata keys and values across uploads - Avoid changing metadata unnecessarily @@ -116,7 +101,7 @@ The SDK intelligently manages data sets to minimize on-chain transactions. The s **Selection Scenarios**: 1. **Explicit data set ID**: If you specify `dataSetId`, that exact data set is used (must exist and be accessible) -2. **Specific provider**: If you specify `providerId` or `providerAddress`, the SDK searches for matching data sets only within that provider's existing data sets +2. **Specific provider**: If you specify `providerId`, the SDK searches for matching data sets only within that provider's existing data sets 3. **Automatic selection**: Without specific parameters, the SDK searches across all your data sets with any approved provider **Exact Metadata Matching**: In scenarios 2 and 3, the SDK will reuse an existing data set only if it has **exactly** the same metadata keys and values as requested. This ensures data sets remain organized according to your specific requirements. @@ -128,174 +113,163 @@ The SDK intelligently manages data sets to minimize on-chain transactions. The s **Provider Selection** (when no matching data sets exist): -- If you specify a provider (via `providerId` or `providerAddress`), that provider is used -- Otherwise, the SDK currently uses random selection from all approved providers +- If you specify a provider (via `providerId`), that provider is used +- Otherwise, the SDK selects from endorsed providers for the primary copy and any approved provider for secondaries - Before finalizing selection, the SDK verifies the provider is reachable via a ping test - If a provider fails the ping test, the SDK tries the next candidate -- After the provider is selected, the SDK will automatically create a new data set in the next file upload in a single transaction. - -**API Design**: - -```ts twoslash -// @lib: esnext,dom -import { - PieceCID, - PieceRecord, - UploadResult, - PDPProvider, - PreflightInfo, - PieceStatus, -} from "@filoz/synapse-sdk"; -import { Hash } from 'viem' -type Transaction = Promise; -type Hex = `0x${string}`; -export interface UploadCallbacks { - /** Called periodically during upload with bytes uploaded so far */ - onProgress?: (bytesUploaded: number) => void; - /** Called when upload to service provider completes */ - onUploadComplete?: (pieceCid: PieceCID) => void; - /** Called when the service provider has added the piece(s) and submitted the transaction to the chain */ - onPiecesAdded?: (transaction?: Hex, pieces?: { pieceCid: PieceCID }[]) => void; - /** Called when the service provider agrees that the piece addition(s) are confirmed on-chain */ - onPiecesConfirmed?: (dataSetId: number, pieces: PieceRecord[]) => void; -} +- A new data set will be created automatically during the first commit -/** - * Options for uploading individual pieces to an existing storage context - * @param metadata - Custom metadata for this specific piece (key-value pairs) - * @param onUploadComplete - Called when upload to service provider completes - * @param onPiecesAdded - Called when the service provider has added the piece(s) and submitted the transaction to the chain - * @param onPiecesConfirmed - Called when the service provider agrees that the piece addition(s) are confirmed on-chain and provides the dataSetId - */ -type UploadOptions = { - metadata?: Record; - onUploadComplete?: (pieceCid: PieceCID) => void; - onPiecesAdded?: (transaction?: Hex, pieces?: { pieceCid: PieceCID }[]) => void; - onPiecesConfirmed?: (dataSetId: number, pieces: PieceRecord[]) => void; -}; -// ---cut--- -interface StorageContextAPI { - // Properties - readonly provider: PDPProvider; - readonly serviceProvider: string; - readonly withCDN: boolean; - readonly dataSetId: number | undefined; - readonly dataSetMetadata: Record; - - // Upload & Download - upload( - data: File, - options?: UploadOptions - ): Promise; - download(pieceCid: string | PieceCID): Promise; - - // Piece Queries - hasPiece(pieceCid: string | PieceCID): Promise; - pieceStatus(pieceCid: string | PieceCID): Promise; - getDataSetPieces(): Promise; - - // Piece Management - deletePiece(piece: string | PieceCID | number): Promise; - - // Info & Preflight - getProviderInfo(): Promise; - preflightUpload(size: number): Promise; - - // Lifecycle - terminate(): Transaction; -} -``` +## Store Phase -### Storage Context Methods - -```ts twoslash -// @lib: esnext,dom -import { Synapse } from "@filoz/synapse-sdk"; -import { privateKeyToAccount } from 'viem/accounts' -const synapse = Synapse.create({ account: privateKeyToAccount('0x...') }); -// ---cut--- -const storageContext = await synapse.storage.createContext({ - providerAddress: "0x...", // Optional: use specific provider address - withCDN: true, // Optional: enable CDN for faster downloads - metadata: { - Application: "Filecoin Storage DApp", - Version: "1.0.0", - Category: "AI", +Upload data to a provider without committing on-chain: + +```ts +const { pieceCid, size } = await ctx.store(data, { + pieceCid: preCalculatedCid, // skip expensive PieceCID (hash digest) calculation (optional) + signal: abortController.signal, // cancellation (optional) + onProgress: (bytes) => { // progress callback (optional) + console.log(`Uploaded ${bytes} bytes`) }, -}); +}) -const llmModel = "sonnnet-4.5"; -const conversationId = "1234567890"; +console.log(`Stored: ${pieceCid}, ${size} bytes`) +``` -const data = new TextEncoder().encode("Deep research on decentralization...") +`store()` accepts `Uint8Array` or `ReadableStream`. Use streaming for large files to minimize memory. -const preflight = await storageContext.preflightUpload({ size: data.length }); +After store completes, the piece is parked on the SP and can be: -console.log("Estimated costs:", preflight.estimatedCost); -console.log("Allowance sufficient:", preflight.allowanceCheck.sufficient); +- Retrieved via the context's `getPieceUrl(pieceCid)` +- Pulled to other providers via `pull()` +- Committed on-chain via `commit()` -const { pieceCid, size, pieceId } = await storageContext.upload(data, { - metadata: { llmModel, conversationId }, - onUploadComplete: (piece) => { - console.log( - `Uploaded PieceCID: ${piece.toV1().toString()} to storage provider!` - ); - }, - onPiecesAdded: (hash, pieces) => { - console.log( - `🔄 Waiting for transaction to be confirmed on chain (txHash: ${hash})` - ); - console.log( - `Batch includes PieceCIDs: ${ - pieces?.map(({ pieceCid }) => pieceCid.toString()).join(", ") ?? "" - }` - ); - }, - onPiecesConfirmed: (dataSetId, pieces) => { - console.log(`Data set ${dataSetId} confirmed with provider`); - console.log( - `Piece ID mapping: ${pieces - .map(({ pieceId, pieceCid }) => `${pieceId}:${pieceCid}`) - .join(", ")}` - ); +## Pull Phase (SP-to-SP Transfer) + +Request a secondary provider to fetch pieces from the primary: + +```ts +// Pre-sign to avoid double wallet prompts during pull + commit +const extraData = await secondary.presignForCommit([{ pieceCid }]) + +const pullResult = await secondary.pull({ + pieces: [pieceCid], + from: (cid) => primary.getPieceUrl(cid), // source URL builder (or URL string) + extraData, // pre-signed auth (optional, reused for commit) + signal: abortController.signal, // cancellation (optional) + onProgress: (cid, status) => { // status callback (optional) + console.log(`${cid}: ${status}`) }, -}); +}) + +if (pullResult.status !== "complete") { + for (const piece of pullResult.pieces) { + if (piece.status === "failed") { + console.error(`Failed to pull ${piece.pieceCid}`) + } + } +} +``` + +The `from` parameter accepts either a URL string (base service URL) or a function that returns a piece URL for a given PieceCID. + +**Pre-signing**: `presignForCommit()` creates an EIP-712 signature that can be reused for both `pull()` and `commit()`. This avoids prompting the wallet twice. Pass the same `extraData` to both calls. -const receivedData = await storageContext.download({ pieceCid }); +## Commit Phase -console.log(`Received data: ${new TextDecoder().decode(receivedData)}`); +Add pieces to an on-chain data set. Creates the data set and payment rail if one doesn't exist: -// Get the list of piece CIDs in the current data set by querying the provider -const pieceCids = await Array.fromAsync(storageContext.getPieces()); -console.log(`Piece CIDs: ${pieceCids.map((cid) => cid.toString()).join(", ")}`); +```ts +const commitResult = await ctx.commit({ + pieces: [{ pieceCid, pieceMetadata: { filename: "doc.pdf" } }], + extraData, // pre-signed auth from presignForCommit() (optional) + onSubmitted: (txHash) => { + console.log(`Transaction submitted: ${txHash}`) + }, +}) -// Check the status of a piece on the service provider -const status = await storageContext.pieceStatus({ pieceCid }); -console.log(`Piece exists: ${status.exists}`); -console.log(`Data set last proven: ${status.dataSetLastProven}`); -console.log(`Data set next proof due: ${status.dataSetNextProofDue}`); +console.log(`Committed: dataSet=${commitResult.dataSetId}, piece=${commitResult.pieceIds[0]}`) +console.log(`New data set: ${commitResult.isNewDataSet}`) ``` -#### Efficient Batch Uploads +The result: + +- **`txHash`** — transaction hash +- **`pieceIds`** — assigned piece IDs (one per input piece) +- **`dataSetId`** — data set ID (may be newly created) +- **`isNewDataSet`** — whether a new data set was created + +## Multi-File Batch Example + +Upload multiple files to 2 providers with full error handling: -When uploading multiple files, the SDK automatically batches operations for efficiency. Due to blockchain transaction ordering requirements, uploads are processed sequentially. To maximize efficiency: -The SDK batches up to 32 uploads by default (configurable via `uploadBatchSize`). If you have more than 32 files, they'll be processed in multiple batches automatically. +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" -:::tip[Batch Upload Performance] -**For best performance, start all uploads without awaiting** and let the SDK batch them automatically. This can significantly reduce total upload time for multiple files. +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) -```typescript -// ✅ Efficient: Batched automatically -const uploads = dataArray.map((data) => context.upload(data)); -const results = await Promise.all(uploads); +const files = [ + new TextEncoder().encode("File 1 content..."), + new TextEncoder().encode("File 2 content..."), + new TextEncoder().encode("File 3 content..."), +] -// ❌ Slow: Forces sequential processing -for (const data of dataArray) { - await context.upload(data); +// Create contexts for 2 providers +const [primary, secondary] = await synapse.storage.createContexts({ + count: 2, + metadata: { source: "batch-upload" }, +}) + +// Store all files on primary (note: these could be done in parallel w/ Promise.all) +const stored = [] +for (const file of files) { + const result = await primary.store(file) + stored.push(result) + console.log(`Stored ${result.pieceCid}`) +} + +// Pre-sign for all pieces on secondary +const pieceCids = stored.map(s => s.pieceCid) +const extraData = await secondary.presignForCommit( + pieceCids.map(cid => ({ pieceCid: cid })) +) + +// Pull all pieces to secondary +const pullResult = await secondary.pull({ + pieces: pieceCids, + from: primary, + extraData, +}) + +// Commit on both providers +const [primaryCommit, secondaryCommit] = await Promise.allSettled([ + primary.commit({ pieces: pieceCids.map(cid => ({ pieceCid: cid })) }), + pullResult.status === "complete" + ? secondary.commit({ pieces: pieceCids.map(cid => ({ pieceCid: cid })), extraData }) + : Promise.reject(new Error("Pull failed, skipping secondary commit")), // not advised! +]) + +if (primaryCommit.status === "fulfilled") { + console.log(`Primary: dataSet=${primaryCommit.value.dataSetId}`) +} +if (secondaryCommit.status === "fulfilled") { + console.log(`Secondary: dataSet=${secondaryCommit.value.dataSetId}`) } ``` -::: +## Error Handling Patterns + +Each phase's errors are independent. Failures don't cascade — you can retry at any level: + +| Phase | Failure | Data state | Recovery | +|-------|---------|------------|----------| +| **store** | Upload/network error | No data on SP | Retry `store()` with same or different context | +| **pull** | SP-to-SP transfer failed | Data on primary only | Retry `pull()`, try different secondary, or skip | +| **commit** | On-chain transaction failed | Data on SP but not on-chain | Retry `commit()` (no re-upload needed) | + +The key advantage of split operations: if commit fails, data is already stored on the SP. You can retry `commit()` without re-uploading the data. With the high-level `upload()`, a `CommitError` would require re-uploading. + +## Lifecycle Management ### Terminating a Data Set @@ -313,54 +287,41 @@ Only terminate data sets when you're certain you no longer need the data. To delete an entire data set and discontinue payments for the service, call `context.terminate()`. This method submits an on-chain transaction to initiate the termination process. Following a defined termination period, payments will cease, and the service provider will be able to delete the data set. -You can also terminate a data set using `synapse.storage.terminateDataSet(dataSetId)`, in a case that creation of the context is not possible or `dataSetId` is known and creation of the context is not necessary. - -```ts twoslash -// @lib: esnext,dom -import { Synapse } from "@filoz/synapse-sdk"; -import { privateKeyToAccount } from 'viem/accounts' -const synapse = Synapse.create({ account: privateKeyToAccount('0x...') }); -// ---cut--- -const storageContext = await synapse.storage.createContext({ - providerAddress: "0x...", // Optional: use specific provider address - withCDN: true, // Optional: enable CDN for faster downloads -}); -const hash = await storageContext.terminate(); -console.log(`Dataset termination transaction: ${hash}`); - -await synapse.client.waitForTransactionReceipt({ hash }); -console.log("Dataset terminated successfully"); +You can also terminate a data set using `synapse.storage.terminateDataSet({ dataSetId })`, when the data set ID is known and creating a context is not necessary. + +```ts +// Via context +const hash = await ctx.terminate() +await synapse.client.waitForTransactionReceipt({ hash }) +console.log("Dataset terminated successfully") + +// Or directly by data set ID +const hash2 = await synapse.storage.terminateDataSet({ dataSetId: 42n }) +await synapse.client.waitForTransactionReceipt({ hash: hash2 }) ``` ### Deleting a Piece -To delete an individual piece from the data set, call `context.deletePiece(pieceCid)`. +To delete an individual piece from the data set, call `context.deletePiece()`. This method submits an on-chain transaction to initiate the deletion process. **Important:** Piece deletion is irreversible and cannot be canceled once initiated. -```ts twoslash -// @lib: esnext,dom -import { Synapse } from "@filoz/synapse-sdk"; -import { privateKeyToAccount } from 'viem/accounts' -const synapse = Synapse.create({ account: privateKeyToAccount('0x...') }); -// ---cut--- -const storageContext = await synapse.storage.createContext({ - providerAddress: "0x...", // Optional: use specific provider address - withCDN: true, // Optional: enable CDN for faster downloads -}); - -// Collect all pieces at once -const pieces = []; -for await (const piece of storageContext.getPieces()) { - pieces.push(piece); +```ts +// List all pieces in the data set +const pieces = [] +for await (const piece of ctx.getPieces()) { + pieces.push(piece) } -// Delete the first piece -await storageContext.deletePiece({ piece: pieces[0].pieceId }); +// Delete by piece ID +await ctx.deletePiece({ piece: pieces[0].pieceId }) console.log( `Piece ${pieces[0].pieceCid} (ID: ${pieces[0].pieceId}) deleted successfully` -); +) + +// Delete by PieceCID +await ctx.deletePiece({ piece: "bafkzcib..." }) ``` ### Download Options @@ -371,70 +332,144 @@ The SDK provides flexible download options with clear semantics: Download pieces from any available provider using the StorageManager: -```typescript +```ts // Download from any provider that has the piece -const data = await synapse.storage.download(pieceCid); +const data = await synapse.storage.download({ pieceCid }) // Download with CDN optimization (if available) -const dataWithCDN = await synapse.storage.download(pieceCid, { withCDN: true }); - -// Prefer a specific provider (falls back to others if unavailable) -const dataFromProvider = await synapse.storage.download(pieceCid, { - providerAddress: "0x...", -}); +const dataWithCDN = await synapse.storage.download({ pieceCid, withCDN: true }) ``` #### Context-Specific Download (from this provider) When using a StorageContext, downloads are automatically restricted to that specific provider: -```typescript +```ts // Downloads from the provider associated with this context -const context = await synapse.storage.createContext({ - providerAddress: "0x...", -}); -const data = await context.download(pieceCid); - -// The context passes its withCDN setting to the download -const contextWithCDN = await synapse.storage.createContext({ withCDN: true }); -const dataWithCDN = await contextWithCDN.download(pieceCid); // Uses CDN if available +const data = await ctx.download({ pieceCid }) ``` #### CDN Option Inheritance -The `withCDN` option (which is an alias for `metadata: { withCDN: '' }`) follows a clear inheritance hierarchy: +The `withCDN` option follows a clear inheritance hierarchy: 1. **Synapse level**: Default setting for all operations 2. **StorageContext level**: Can override Synapse's default 3. **Method level**: Can override instance settings -```typescript +```ts // Example of inheritance -const synapse = await Synapse.create({ withCDN: true }); // Global default: CDN enabled -const context = await synapse.storage.createContext({ withCDN: false }); // Context override: CDN disabled -await synapse.storage.download(pieceCid); // Uses Synapse's withCDN: true -await context.download(pieceCid); // Uses context's withCDN: false -await synapse.storage.download(pieceCid, { withCDN: false }); // Method override: CDN disabled +const synapse = Synapse.create({ account, withCDN: true }) // Global default: CDN enabled +const ctx = await synapse.storage.createContext({ withCDN: false }) // Context override: CDN disabled +await synapse.storage.download({ pieceCid }) // Uses Synapse's withCDN: true +await ctx.download({ pieceCid }) // Uses context's withCDN: false +await synapse.storage.download({ pieceCid, withCDN: false }) // Method override: CDN disabled ``` Note: When `withCDN: true` is set, it adds `{ withCDN: '' }` to the data set's metadata, ensuring CDN-enabled and non-CDN data sets remain separate. +## Using synapse-core Directly + +For maximum control, use the core library functions without the SDK wrapper classes. This is useful for building custom upload pipelines, integrating into existing frameworks, or server-side applications that don't need the SDK's orchestration. + +### Provider Selection + +```ts +import { fetchProviderSelectionInput, selectProviders } from "@filoz/synapse-core/warm-storage" + +// Fetch all chain data needed for selection +const input = await fetchProviderSelectionInput(client, { + address: walletAddress, +}) + +// Primary: pass endorsedIds to restrict pool to endorsed providers only +const [primary] = selectProviders({ + ...input, + count: 1, + metadata: { source: "my-app" }, +}) + +// Secondary: pass empty set to allow any approved provider +const [secondary] = selectProviders({ + ...input, + endorsedIds: new Set(), + count: 1, + excludeProviderIds: new Set([primary.provider.id]), + metadata: { source: "my-app" }, +}) +``` + +`fetchProviderSelectionInput()` makes a single multicall to gather providers, endorsements, and existing data sets. `selectProviders()` is a pure function — no network calls — that applies a 2-tier preference within the eligible pool: + +1. Existing data set with matching metadata +2. New data set (no matching data set found) + +The `endorsedIds` parameter controls which providers are eligible. When non-empty, **only** endorsed providers can be selected — there is no fallback to non-endorsed. When empty, all approved providers are eligible. The SDK's `smartSelect()` uses this to enforce endorsed-for-primary (hard constraint) while allowing any approved provider for secondaries. + +### Upload and Commit + +```ts +import * as SP from "@filoz/synapse-core/sp" +import { signAddPieces, signCreateDataSetAndAddPieces } from "@filoz/synapse-core/typed-data" + +// Upload piece to SP +const { pieceCid, size } = await SP.uploadPieceStreaming({ + serviceURL: primary.provider.pdp.serviceURL, + data: myStream, +}) + +// Confirm piece is parked +await SP.findPiece({ + serviceURL: primary.provider.pdp.serviceURL, + pieceCid, + retry: true, +}) + +// Sign and commit (new data set) +const result = await SP.createDataSetAndAddPieces(client, { + cdn: false, + payee: primary.provider.serviceProvider, + payer: client.account.address, + recordKeeper: chain.contracts.fwss.address, + pieces: [{ pieceCid }], + metadata: primary.dataSetMetadata, + serviceURL: primary.provider.pdp.serviceURL, +}) + +const confirmation = await SP.waitForCreateDataSetAddPieces(result) +console.log(`DataSet: ${confirmation.dataSetId}`) +``` + +### SP-to-SP Pull + +```ts +import * as SP from "@filoz/synapse-core/sp" + +const response = await SP.waitForPullStatus(client, { + serviceURL: secondary.provider.pdp.serviceURL, + pieces: [{ + pieceCid, + sourceUrl: `${primary.provider.pdp.serviceURL}/pdp/piece/${pieceCid}`, + }], + payee: secondary.provider.serviceProvider, + payer: client.account.address, + cdn: false, + metadata: secondary.dataSetMetadata, +}) +``` + +This path requires manual EIP-712 signing. The `signAddPieces` and `signCreateDataSetAndAddPieces` functions from `@filoz/synapse-core/typed-data` handle the signature creation. + ## Next Steps -Now that you understand Storage Context and advanced operations: +- **[Storage Operations](/developer-guides/storage/storage-operations/)** — The high-level multi-copy upload API for most use cases. + _Start here if you haven't used `synapse.storage.upload()` yet._ -- **[Calculate Storage Costs →](/developer-guides/storage/storage-costs/)** - Plan your budget and fund your storage account. +- **[Calculate Storage Costs](/developer-guides/storage/storage-costs/)** — Plan your budget and fund your storage account. _Use the quick calculator to estimate monthly costs._ -- **[Storage Operations Basics →](/developer-guides/storage/storage-operations/)** - Review fundamental storage concepts and auto-managed operations. - _Good for a refresher on the simpler approach._ - -- **[Component Architecture →](/developer-guides/components/)** - Understand how StorageContext fits into the SDK design. +- **[Component Architecture](/developer-guides/components/)** — Understand how StorageContext fits into the SDK design. _Deep dive into the component architecture._ -- **[Payment Management →](/developer-guides/payments/payment-operations/)** - Manage deposits, approvals, and payment rails. +- **[Payment Management](/developer-guides/payments/payment-operations/)** — Manage deposits, approvals, and payment rails. _Required before your first upload._ diff --git a/docs/src/content/docs/developer-guides/storage/storage-operations.mdx b/docs/src/content/docs/developer-guides/storage/storage-operations.mdx index abadd1c5..c5fb7f08 100644 --- a/docs/src/content/docs/developer-guides/storage/storage-operations.mdx +++ b/docs/src/content/docs/developer-guides/storage/storage-operations.mdx @@ -7,14 +7,14 @@ sidebar: :::tip[Purpose] This guide shows you how to get started with storage operations in Synapse SDK. -You'll learn the fundamentals of uploading data, managing data sets, and retrieving stored content. +You'll learn how to upload data with multi-copy durability, handle results, and retrieve stored content. **Audience**: Developers new to Synapse SDK storage capabilities **Time to complete**: 10-15 minutes **Prerequisites**: Basic TypeScript knowledge, Synapse SDK installed ::: -This guide explains the core storage concepts and provides examples of how to use the Synapse SDK to store, retrieve, and manage data on Filecoin On-Chain Cloud. +This guide covers the primary storage API — `synapse.storage.upload()` — which stores your data with multiple providers for redundancy. For manual control over each upload phase, see [Split Operations](/developer-guides/storage/storage-context/). ## Key Concepts @@ -27,70 +27,238 @@ This guide explains the core storage concepts and provides examples of how to us - **Data Set Metadata**: Max 10 keys (e.g., `project`, `environment`) - **Piece Metadata**: Max 5 keys per piece (e.g., `filename`, `contentType`) -**Storage Manager**: The main entry point for storage operations. Handles provider selection, data set management, and provides downloads from any provider (provider-agnostic) using the StorageContext. +**Copies and Durability**: By default, `upload()` stores your data with 2 independent providers. Each provider maintains its own data set with separate PDP proofs and payment rails. If one provider goes down, your data is still available from the other. -**Storage Context**: A connection to a specific storage provider and data set. Created explicitly for fine-grained control or automatically by StorageManager. Enables uploads and downloads with the specific storage provider. +**Storage Manager**: The main entry point for storage operations (`synapse.storage`). Handles provider selection, multi-copy orchestration, data set management, and provider-agnostic downloads. -## Storage Approaches +## Quick Start -The SDK offers two ways to work with storage operations: +Upload data with a single call — the SDK selects providers and handles multi-copy replication automatically: -| Approach | Who It's For | What SDK Handles | When to Use | -| -------------------- | --------------- | ------------------------------------------------- | ------------------------------------------------------- | -| **Auto-Managed** | Most developers | Provider selection, data set creation, management | Getting started, simple apps, quick prototypes | -| **Explicit Control** | Advanced users | Nothing - you control everything | Batch operations, specific providers, cost optimization | +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" -**Recommendation**: Start with auto-managed, then explore explicit control only if needed. +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) -### Example 1: Quick Start (Auto-Managed) +const data = new Uint8Array([1, 2, 3, 4, 5]) -Upload and download data with zero configuration - SDK automatically selects a provider and manages the data set: +const { pieceCid, size, copies, failures } = await synapse.storage.upload(data) -```ts twoslash -// @lib: esnext,dom -import { Synapse } from "@filoz/synapse-sdk"; -import { privateKeyToAccount } from 'viem/accounts' +console.log("PieceCID:", pieceCid.toString()) +console.log("Size:", size, "bytes") +console.log("Stored on", copies.length, "providers") -const synapse = Synapse.create({ account: privateKeyToAccount('0x...') }); -// ---cut--- -const data = new Uint8Array([1, 2, 3, 4, 5]); - -const { pieceCid } = await synapse.storage.upload(data); -const downloaded = await synapse.storage.download({ pieceCid }); +for (const copy of copies) { + console.log(` Provider ${copy.providerId}: role=${copy.role}, dataSet=${copy.dataSetId}`) +} -console.log("Uploaded:", pieceCid); -console.log("Downloaded:", downloaded.length, "bytes"); +if (failures.length > 0) { + console.warn("Some copies failed:", failures) +} ``` -### Example 2: With Metadata (Auto-Managed) +:::caution[Always check the result] +`upload()` returns a result as long as **at least one** copy commits on-chain. It only throws when zero copies succeed. You **must** inspect `copies` and `failures` to know whether all requested copies were stored — a successful return does not guarantee all copies succeeded. +::: -Add metadata to organize uploads and enable faster data set reuse - SDK will reuse any existing data set matching the metadata: +The result contains: -```ts twoslash -// @lib: esnext,dom -import { Synapse } from "@filoz/synapse-sdk"; -import { privateKeyToAccount } from 'viem/accounts' +- **`pieceCid`** — content address of your data, used for downloads +- **`size`** — size of the uploaded data in bytes +- **`copies`** — array of successful copies, each with `providerId`, `dataSetId`, `pieceId`, `role` (`'primary'` or `'secondary'`), `retrievalUrl`, and `isNewDataSet` +- **`failures`** — array of failed copy attempts (partial failures are returned, not thrown), each with `providerId`, `role`, `error`, and `explicit` -const data = new Uint8Array([1, 2, 3, 4, 5]); -const synapse = Synapse.create({ account: privateKeyToAccount('0x...') }); -// ---cut--- -const context = await synapse.storage.createContext({ +## Upload with Metadata + +Attach metadata to organize uploads. The SDK reuses existing data sets when metadata matches, avoiding duplicate payment rails: + +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" + +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) + +const data = new TextEncoder().encode("Hello, Filecoin!") + +const result = await synapse.storage.upload(data, { metadata: { Application: "My DApp", Version: "1.0.0", Category: "Documents", }, -}); + pieceMetadata: { + filename: "hello.txt", + contentType: "text/plain", + }, +}) -const result = await synapse.storage.upload(data, { context }); -console.log("Uploaded:", result.pieceCid); +console.log("Uploaded:", result.pieceCid.toString()) ``` -:::tip[Need More Control?] -The examples above use auto-managed storage where the SDK handles provider selection and data set creation. +Subsequent uploads with the same `metadata` reuse the same data sets and payment rails. + +## Upload with Callbacks + +Track the lifecycle of a multi-copy upload with callbacks: + +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" + +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) + +const data = new Uint8Array(1024) // 1KB of data + +const result = await synapse.storage.upload(data, { + callbacks: { + onStored: (providerId, pieceCid) => { + console.log(`Data stored on provider ${providerId}`) + }, + onCopyComplete: (providerId, pieceCid) => { + console.log(`Secondary copy complete on provider ${providerId}`) + }, + onCopyFailed: (providerId, pieceCid, error) => { + console.warn(`Copy failed on provider ${providerId}:`, error.message) + }, + onPullProgress: (providerId, pieceCid, status) => { + console.log(`Pull to provider ${providerId}: ${status}`) + }, + onPiecesAdded: (txHash, providerId, pieces) => { + console.log(`On-chain commit submitted: ${txHash}`) + }, + onPiecesConfirmed: (dataSetId, providerId, pieces) => { + console.log(`Confirmed on-chain: dataSet=${dataSetId}, provider=${providerId}`) + }, + onProgress: (bytesUploaded) => { + console.log(`Uploaded ${bytesUploaded} bytes`) + }, + }, +}) +``` -**For advanced use cases** like batch uploads, custom callbacks, or explicit provider selection, see the [Storage Context Guide](/developer-guides/storage/storage-context/). -::: +Callback lifecycle: + +1. **`onProgress`** — fires during upload to primary provider +2. **`onStored`** — primary upload complete, piece parked on SP +3. **`onPullProgress`** — SP-to-SP transfer status for secondaries +4. **`onCopyComplete`** / **`onCopyFailed`** — secondary pull result +5. **`onPiecesAdded`** — commit transaction submitted +6. **`onPiecesConfirmed`** — commit confirmed on-chain + +## Controlling Copy Count + +Adjust the number of copies for your durability requirements: + +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" + +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) + +const data = new Uint8Array(256) + +// Store 3 copies for higher redundancy +const result3 = await synapse.storage.upload(data, { count: 3 }) +console.log("3 copies:", result3.copies.length) + +// Store a single copy when redundancy isn't needed +const result1 = await synapse.storage.upload(data, { count: 1 }) +console.log("1 copy:", result1.copies.length) +``` + +The default is 2 copies. The first copy is stored on an **endorsed** provider (high trust, curated), and secondary copies are pulled via SP-to-SP transfer from approved providers. + +## Understanding the Result + +`upload()` is designed around **partial success over atomicity**: it commits whatever succeeded rather than throwing away successful work. This means the return value is the primary interface for understanding what happened — not just whether it threw. + +### When `upload()` throws + +`upload()` only throws in these cases: + +| Error | What happened | What to do | +|-------|---------------|------------| +| **`StoreError`** | Primary upload failed — no data committed anywhere | Retry the upload | +| **`CommitError`** | Data is stored on providers but **all** on-chain commits failed | Use [split operations](/developer-guides/storage/storage-context/) to retry `commit()` without re-uploading | +| Selection error | No endorsed provider available or reachable | Check provider health / network | + +### When `upload()` returns + +If `upload()` returns (no throw), **at least one copy** is committed on-chain. But the result may contain fewer copies than requested. Every copy in `copies[]` represents a committed on-chain data set that the user is now paying for. + +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" + +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) + +const data = new Uint8Array(256) + +const result = await synapse.storage.upload(data, { count: 2 }) + +// Check: did we get all requested copies? +if (result.copies.length < 2) { + console.warn(`Only ${result.copies.length}/2 copies succeeded`) + for (const failure of result.failures) { + console.warn(` Provider ${failure.providerId} (${failure.role}): ${failure.error}`) + } +} + +// Check: did the endorsed primary succeed? +const primaryFailed = result.failures.find(f => f.role === "primary") +if (primaryFailed) { + console.warn(`Endorsed provider failed: ${primaryFailed.error}`) + // Data is only on non-endorsed secondaries +} + +// Every copy is committed and being paid for +for (const copy of result.copies) { + console.log(`Provider ${copy.providerId}, dataset ${copy.dataSetId}, piece ${copy.pieceId}`) +} +``` + +### Auto-retry behavior + +For auto-selected providers (no explicit `providerIds` or `dataSetIds`), the SDK automatically retries failed secondaries with alternate providers up to 5 times. If you explicitly specify providers, the SDK respects your choice and does not retry. + +## Download + +Download from any provider that has the piece — the SDK resolves the provider automatically: + +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" + +const synapse = Synapse.create({ account: privateKeyToAccount("0x...") }) + +// Download using PieceCID from a previous upload +const pieceCid = "bafkzcib..." // from upload result +const bytes = await synapse.storage.download({ pieceCid }) +const text = new TextDecoder().decode(bytes) +console.log("Downloaded:", text) +``` + +For CDN-accelerated downloads: + +```ts +import { Synapse } from "@filoz/synapse-sdk" +import { privateKeyToAccount } from "viem/accounts" + +// Enable CDN globally +const synapse = Synapse.create({ + account: privateKeyToAccount("0x..."), + withCDN: true, +}) + +const bytes = await synapse.storage.download({ pieceCid: "bafkzcib..." }) + +// Or per-download: +const bytes2 = await synapse.storage.download({ + pieceCid: "bafkzcib...", + withCDN: true, +}) +``` ## Data Set Management @@ -98,15 +266,6 @@ The examples above use auto-managed storage where the SDK handles provider selec These APIs are useful when you want to inspect existing data sets, query stored pieces, or retrieve metadata. For basic upload/download, you don't need these. ::: -Data sets are automatically created during your first upload to a provider. For explicit management of data sets, use these operations: - -**When You Need Explicit Data Sets:** - -- Uploading many files to same provider -- Want consistent provider for your application -- Need to track costs per data set -- Building batch upload workflows - ### Getting all data sets Retrieve all data sets owned by your account to inspect piece counts, CDN status, and metadata: @@ -132,7 +291,7 @@ for (const ds of dataSets) { ### Getting data set pieces -List all pieces stored in a specific data set by iterating through the context: +List all pieces stored in a specific data set by iterating through a context: ```ts twoslash // @lib: esnext,dom @@ -151,9 +310,9 @@ for await (const piece of context.getPieces()) { console.log(`Found ${pieces.length} pieces`); ``` -### Getting a data set piece metadata +### Getting piece metadata -Access custom metadata attached to individual pieces for organization and filtering: +Access custom metadata attached to individual pieces: ```ts twoslash // @lib: esnext,dom @@ -204,16 +363,8 @@ console.log("PDP URL:", providerInfo.pdp.serviceURL); ## Next Steps -**Ready to explore more?** Here's your learning path: - -- **[Advanced Operations →](/developer-guides/storage/storage-context/)** - Learn about batch uploads, lifecycle management, and download strategies. - _For developers building production applications with specific provider requirements._ +- **[Split Operations](/developer-guides/storage/storage-context/)** — Manual control over store, pull, and commit phases for batch uploads, custom error handling, and direct core library usage. -- **[Plan Storage Costs →](/developer-guides/storage/storage-costs/)** - Calculate your monthly costs and understand funding requirements. - _Use the quick calculator to estimate costs in under 5 minutes._ +- **[Plan Storage Costs](/developer-guides/storage/storage-costs/)** — Calculate your monthly costs and understand funding requirements. -- **[Payment Management →](/developer-guides/payments/payment-operations/)** - Manage deposits, approvals, and payment rails. - _Required before your first upload._ +- **[Payment Management](/developer-guides/payments/payment-operations/)** — Manage deposits, approvals, and payment rails. diff --git a/docs/src/content/docs/developer-guides/synapse.md b/docs/src/content/docs/developer-guides/synapse.md index 192fb73e..dab195e7 100644 --- a/docs/src/content/docs/developer-guides/synapse.md +++ b/docs/src/content/docs/developer-guides/synapse.md @@ -75,23 +75,22 @@ Fund your account and manage payments for Filecoin storage services. ### Storage Operations -The SDK provides comprehensive storage capabilities through two main approaches: +The SDK provides storage with multi-copy durability by default. Three tiers of increasing control: -- **Auto-managed storage**: Quick and simple - the SDK handles provider selection and data set creation. -- **Explicit control**: Full control over providers, data sets, and batch operations. - -To understand these storage approaches, you'll need to be familiar with several key concepts: +- **`synapse.storage.upload(data)`** — Multi-copy upload (default: 2 providers). Handles provider selection, SP-to-SP replication, and on-chain commit automatically. Returns `{ pieceCid, size, copies, failures }`. +- **Split operations** — Manual control over each upload phase (`store` → `pull` → `commit`) via `StorageContext`. For batch uploads, custom error handling, and pre-signing. +- **Core library** — Direct chain queries and SP HTTP calls via `@filoz/synapse-core` for custom orchestration. #### Core Concepts -- **Storage Contexts**: Manage storage lifecycle and provider connections. -- **Data Sets**: Organize related data pieces with shared payment rails. +- **Data Sets**: Organize related data pieces with shared payment rails per provider. - **PieceCIDs**: Unique content-addressed identifiers for stored data. +- **Copies**: Each upload is stored on multiple independent providers for redundancy. - **Service Providers**: Infrastructure for decentralized storage with cryptographic proofs. -[View Storage Operations Guide →](/developer-guides/storage/storage-operations/) - _Learn the basics in less than 10 minutes_ +[View Storage Operations Guide →](/developer-guides/storage/storage-operations/) - _Multi-copy uploads in less than 10 minutes_ -[View Storage Context Guide →](/developer-guides/storage/storage-context/) - _Learn the advanced storage concepts_ +[View Split Operations Guide →](/developer-guides/storage/storage-context/) - _Manual control over store, pull, and commit_ [View Storage Costs Guide →](/developer-guides/storage/storage-costs/) - _Learn how to calculate your storage costs_ diff --git a/docs/src/content/docs/getting-started/index.mdx b/docs/src/content/docs/getting-started/index.mdx index a3f682fb..e18c0590 100644 --- a/docs/src/content/docs/getting-started/index.mdx +++ b/docs/src/content/docs/getting-started/index.mdx @@ -92,17 +92,19 @@ async function main() { await synapse.client.waitForTransactionReceipt({ hash }) console.log(`✅ USDFC deposit and Warm Storage service approval successful!`); - // 3) Upload + // 3) Upload — stores 2 copies across independent providers for durability const file = new TextEncoder().encode( - `🚀 Welcome to decentralized storage on Filecoin Onchain Cloud! - Your data is safe here. - 🌍 You need to make sure to meet the minimum size + `🚀 Welcome to decentralized storage on Filecoin Onchain Cloud! + Your data is safe here. + 🌍 You need to make sure to meet the minimum size requirement of 127 bytes per upload.` ); - const { pieceCid, size } = await synapse.storage.upload(file) + const { pieceCid, size, copies, failures } = await synapse.storage.upload(file) console.log(`✅ Upload complete!`); console.log(`PieceCID: ${pieceCid}`); console.log(`Size: ${size} bytes`); + console.log(`Stored on ${copies.length} providers`); + if (failures.length > 0) console.warn(`${failures.length} copy attempt(s) failed`); // 4) Download const bytes = await synapse.storage.download({ pieceCid }) @@ -126,8 +128,8 @@ What you just did: - ✅ Initialized synapse SDK for Filecoin Calibration - ✅ Deposited USDFC as payment tokens - ✅ Authorized the storage service to use the token -- ✅ Uploaded data to decentralized storage -- ✅ Retrieved it using only the content address +- ✅ Uploaded data with 2 copies across independent providers for durability +- ✅ Retrieved it using only the content address (from any provider that has it) Now let's break down each step... @@ -180,14 +182,16 @@ import { Synapse } from "@filoz/synapse-sdk"; import { privateKeyToAccount } from 'viem/accounts' const synapse = Synapse.create({ account: privateKeyToAccount('0x...') }) // ---cut--- -// Upload data - SDK automatically selects provider and creates data set if needed +// Upload data — stores 2 copies by default for durability const file = new TextEncoder().encode( - `🚀 Welcome to decentralized storage on Filecoin Onchain Cloud! - Your data is safe here. - 🌍 You need to make sure to meet the minimum size + `🚀 Welcome to decentralized storage on Filecoin Onchain Cloud! + Your data is safe here. + 🌍 You need to make sure to meet the minimum size requirement of 127 bytes per upload.` ); -const { pieceCid } = await synapse.storage.upload(file); +const { pieceCid, copies, failures } = await synapse.storage.upload(file); +console.log(`Stored on ${copies.length} providers`); +if (failures.length > 0) console.warn(`${failures.length} copy attempt(s) failed`); // Download data from any provider that has it const downloadedData = await synapse.storage.download({ pieceCid }); diff --git a/package.json b/package.json index 2bd8fa0a..e6e87f2e 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "knip": "^5.71.0", "markdownlint-cli2": "^0.21.0", "typescript": "catalog:", + "viem": "catalog:", "wireit": "^0.14.12" }, "simple-git-hooks": { diff --git a/packages/synapse-core/src/warm-storage/fetch-provider-selection-input.ts b/packages/synapse-core/src/warm-storage/fetch-provider-selection-input.ts new file mode 100644 index 00000000..7c46bc5b --- /dev/null +++ b/packages/synapse-core/src/warm-storage/fetch-provider-selection-input.ts @@ -0,0 +1,46 @@ +import type { Address, Chain, Client, Transport } from 'viem' +import { getProviderIds } from '../endorsements/get-provider-ids.ts' +import { getApprovedPDPProviders } from '../sp-registry/get-pdp-providers.ts' +import { getPdpDataSets } from './get-pdp-data-sets.ts' +import type { ProviderSelectionInput } from './location-types.ts' + +export namespace fetchProviderSelectionInput { + export type OptionsType = { + /** Client wallet address (for dataset lookup) */ + address: Address + } +} + +/** + * Fetch all chain data needed for provider selection. + * + * Executes parallel queries for: + * - Approved PDP providers (via spRegistry) + * - Endorsed provider IDs (via endorsements) + * - Client's existing datasets with enrichment (via getPdpDataSets) + * + * Returns a ProviderSelectionInput ready to pass to selectProviders(). + * + * For users who need custom caching or only need a subset of this data, + * assemble ProviderSelectionInput manually instead. + * + * @param client - Viem public client configured for the target chain + * @param options - Client address for dataset lookup + * @returns ProviderSelectionInput (caller provides metadata via selectProviders options) + */ +export async function fetchProviderSelectionInput( + client: Client, + options: fetchProviderSelectionInput.OptionsType +): Promise { + const [providers, endorsedIds, pdpDataSets] = await Promise.all([ + getApprovedPDPProviders(client), + getProviderIds(client), + getPdpDataSets(client, { address: options.address }), + ]) + + return { + providers, + endorsedIds, + clientDataSets: pdpDataSets, + } +} diff --git a/packages/synapse-core/src/warm-storage/find-matching-data-sets.ts b/packages/synapse-core/src/warm-storage/find-matching-data-sets.ts new file mode 100644 index 00000000..729e1aee --- /dev/null +++ b/packages/synapse-core/src/warm-storage/find-matching-data-sets.ts @@ -0,0 +1,63 @@ +import type { MetadataObject } from '../utils/metadata.ts' +import type { SelectionDataSet } from './location-types.ts' + +/** + * Check if a dataset's metadata exactly matches the requested metadata. + * + * Both sets must have identical keys and values. Order does not matter. + * An empty requested metadata matches only datasets with empty metadata. + * + * @param dataSetMetadata - The metadata from the dataset + * @param requestedMetadata - The metadata to match against + * @returns true if metadata sets are exactly equal + */ +export function metadataMatches(dataSetMetadata: MetadataObject, requestedMetadata: MetadataObject): boolean { + const dataSetKeys = Object.keys(dataSetMetadata) + const requestedKeys = Object.keys(requestedMetadata) + + if (dataSetKeys.length !== requestedKeys.length) { + return false + } + + if (requestedKeys.length === 0) { + return true + } + + for (const key of requestedKeys) { + if (dataSetMetadata[key] !== requestedMetadata[key]) { + return false + } + } + + return true +} + +/** + * Find datasets matching the given metadata, sorted by preference. + * + * Matching is exact: a dataset matches only if its metadata keys and + * values are identical to the requested metadata. + * + * Only active datasets are considered (live, managed, pdpEndEpoch === 0n). + * + * Sort order: + * 1. Datasets with pieces (activePieceCount > 0) before empty datasets + * 2. Within each group, older datasets (lower ID) first + * + * @param dataSets - Datasets to search (typically filtered to a single provider) + * @param metadata - Desired metadata keys and values + * @returns Matching datasets in preference order + */ +export function findMatchingDataSets(dataSets: SelectionDataSet[], metadata: MetadataObject): SelectionDataSet[] { + const matching = dataSets.filter( + (ds) => ds.live && ds.managed && ds.pdpEndEpoch === 0n && metadataMatches(ds.metadata, metadata) + ) + + return matching.sort((a, b) => { + // Datasets with pieces sort before empty ones + if (a.activePieceCount > 0n && b.activePieceCount === 0n) return -1 + if (b.activePieceCount > 0n && a.activePieceCount === 0n) return 1 + // Within same group, oldest dataset first (lower ID) + return Number(a.dataSetId - b.dataSetId) + }) +} diff --git a/packages/synapse-core/src/warm-storage/get-pdp-data-set.ts b/packages/synapse-core/src/warm-storage/get-pdp-data-set.ts index cb93a9ad..d6cd2d00 100644 --- a/packages/synapse-core/src/warm-storage/get-pdp-data-set.ts +++ b/packages/synapse-core/src/warm-storage/get-pdp-data-set.ts @@ -2,6 +2,7 @@ import { type Address, type Chain, type Client, isAddressEqual, type ReadContrac import { multicall } from 'viem/actions' import { asChain } from '../chains.ts' import { dataSetLiveCall } from '../pdp-verifier/data-set-live.ts' +import { getActivePieceCountCall } from '../pdp-verifier/get-active-piece-count.ts' import { getDataSetListenerCall } from '../pdp-verifier/get-data-set-listener.ts' import { getPDPProviderCall, parsePDPProvider } from '../sp-registry/get-pdp-provider.ts' import { getAllDataSetMetadataCall, parseAllDataSetMetadata } from './get-all-data-set-metadata.ts' @@ -87,7 +88,7 @@ export async function readPdpDataSetInfo( } ): Promise { const chain = asChain(client.chain) - const [live, listener, _metadata, _pdpProvider] = await multicall(client, { + const [live, listener, _metadata, _pdpProvider, activePieceCount] = await multicall(client, { allowFailure: false, contracts: [ dataSetLiveCall({ @@ -106,6 +107,10 @@ export async function readPdpDataSetInfo( chain: client.chain, providerId: options.providerId, }), + getActivePieceCountCall({ + chain: client.chain, + dataSetId: options.dataSetInfo.dataSetId, + }), ], }) @@ -118,5 +123,6 @@ export async function readPdpDataSetInfo( cdn: options.dataSetInfo.cdnRailId > 0n && 'withCDN' in metadata, metadata, provider: pdpProvider, + activePieceCount, } } diff --git a/packages/synapse-core/src/warm-storage/index.ts b/packages/synapse-core/src/warm-storage/index.ts index 686c4adb..c9871649 100644 --- a/packages/synapse-core/src/warm-storage/index.ts +++ b/packages/synapse-core/src/warm-storage/index.ts @@ -10,6 +10,8 @@ */ export * from './add-approved-provider.ts' +export * from './fetch-provider-selection-input.ts' +export * from './find-matching-data-sets.ts' export * from './get-all-data-set-metadata.ts' export * from './get-all-piece-metadata.ts' export * from './get-approved-providers.ts' @@ -18,7 +20,9 @@ export * from './get-data-set.ts' export * from './get-pdp-data-set.ts' export * from './get-pdp-data-sets.ts' export * from './get-service-price.ts' +export * from './location-types.ts' export * from './read-addresses.ts' export * from './remove-approved-provider.ts' +export * from './select-providers.ts' export * from './terminate-service.ts' export * from './types.ts' diff --git a/packages/synapse-core/src/warm-storage/location-types.ts b/packages/synapse-core/src/warm-storage/location-types.ts new file mode 100644 index 00000000..80421d56 --- /dev/null +++ b/packages/synapse-core/src/warm-storage/location-types.ts @@ -0,0 +1,79 @@ +import type { PDPProvider } from '../sp-registry/types.ts' +import type { MetadataObject } from '../utils/metadata.ts' + +/** + * Dataset with piece count, for provider selection. + * + * Picks the fields that selectProviders() and findMatchingDataSets() + * need, plus activePieceCount which is fetched separately via multicall. + * + * Core callers can spread a PdpDataSet directly: `{ ...ds, activePieceCount }`. + * SDK callers map from EnhancedDataSetInfo (different field names). + */ +export interface SelectionDataSet { + /** PDP Verifier data set ID */ + dataSetId: bigint + /** Provider that owns this data set */ + providerId: bigint + /** Data set metadata (key-value pairs) */ + metadata: MetadataObject + /** Number of active pieces in the dataset (0 = empty) */ + activePieceCount: bigint + /** End epoch for PDP service (0 = active) */ + pdpEndEpoch: bigint + /** Whether the data set is live in the PDP Verifier */ + live: boolean + /** Whether the data set is managed by the current Warm Storage contract */ + managed: boolean +} + +/** + * Pre-fetched data for provider selection. + * + * The caller gathers this from chain queries (or cached results) + * and passes it to selectProviders(). Separating data fetching + * from selection keeps selectProviders() pure and testable. + * + * The `endorsedIds` set controls pool restriction: + * - Non-empty: only providers in this set are considered (primary selection) + * - Empty: all providers in the `providers` list are considered (secondary selection) + */ +export interface ProviderSelectionInput { + /** Available PDP providers (typically from getApprovedPDPProviders) */ + providers: PDPProvider[] + /** Set of endorsed provider IDs (from endorsements.getProviderIds). + * Non-empty = restrict to endorsed only. Empty = use all providers. */ + endorsedIds: Set + /** Client's existing datasets with metadata and piece counts */ + clientDataSets: SelectionDataSet[] +} + +/** + * Options for selectProviders(). Combines pre-fetched chain data + * with selection parameters in a single argument. + */ +export interface ProviderSelectionOptions extends ProviderSelectionInput { + /** Number of providers to select (default: 1) */ + count?: number + /** Provider IDs to exclude (for retry after ping failure or multi-copy exclusion) */ + excludeProviderIds?: Set + /** Desired metadata for dataset matching (empty object matches only empty-metadata datasets) */ + metadata?: MetadataObject +} + +/** + * A resolved provider+dataset pair ready for upload. + * + * The currency between selection and upload. selectProviders() returns + * an array of these; the caller passes them to upload/pull/commit functions. + */ +export interface ResolvedLocation { + /** The selected provider */ + provider: PDPProvider + /** Matched dataset ID, or null if a new dataset should be created */ + dataSetId: bigint | null + /** Whether this provider is endorsed */ + endorsed: boolean + /** Dataset metadata (matched from existing dataset, or the requested metadata for new datasets) */ + dataSetMetadata: MetadataObject +} diff --git a/packages/synapse-core/src/warm-storage/select-providers.ts b/packages/synapse-core/src/warm-storage/select-providers.ts new file mode 100644 index 00000000..61350b88 --- /dev/null +++ b/packages/synapse-core/src/warm-storage/select-providers.ts @@ -0,0 +1,91 @@ +import { findMatchingDataSets } from './find-matching-data-sets.ts' +import type { ProviderSelectionOptions, ResolvedLocation } from './location-types.ts' + +/** + * Select providers for storage from an eligible pool. + * + * The eligible provider pool is determined by `endorsedIds`: + * - Non-empty: only providers in this set are considered (use for primary selection) + * - Empty: all providers in the `providers` list are considered (use for secondary selection) + * + * Within the eligible pool, providers with an existing metadata-matching + * dataset are preferred (reuses payment rail). Otherwise a provider + * without a matching dataset is selected (new dataset created on commit). + * + * Within matching datasets, those with existing pieces sort before empty + * ones, and older datasets (lower ID) sort before newer ones. + * + * This function does NOT perform health checks — the caller should + * validate reachability via SP.ping() and call again with + * excludeProviderIds if a provider fails. + * + * @param options - Pre-fetched chain data and selection parameters + * @returns Ranked array of locations up to `count` length. + * May be shorter if fewer providers are available. + * Empty if no providers match constraints. + */ +export function selectProviders(options: ProviderSelectionOptions): ResolvedLocation[] { + const count = options.count ?? 1 + const excludeProviderIds = options.excludeProviderIds ?? new Set() + const metadata = options.metadata ?? {} + + // Determine the eligible pool: restricted to endorsed if endorsedIds is non-empty + const isPoolRestricted = options.endorsedIds.size > 0 + const eligibleProviders = options.providers.filter( + (p) => !excludeProviderIds.has(p.id) && (!isPoolRestricted || options.endorsedIds.has(p.id)) + ) + + if (eligibleProviders.length === 0) { + return [] + } + + const providerMap = new Map(eligibleProviders.map((p) => [p.id, p])) + + // Find metadata-matching datasets from eligible providers + const eligibleDataSets = options.clientDataSets.filter((ds) => providerMap.has(ds.providerId)) + const matchingDataSets = findMatchingDataSets(eligibleDataSets, metadata) + + const results: ResolvedLocation[] = [] + const selectedProviderIds = new Set() + + for (let i = 0; i < count; i++) { + let found = false + + // Prefer a provider with an existing matching dataset (reuses payment rail) + for (const ds of matchingDataSets) { + if (selectedProviderIds.has(ds.providerId)) continue + const provider = providerMap.get(ds.providerId) + if (provider == null) continue + + results.push({ + provider, + dataSetId: ds.dataSetId, + endorsed: options.endorsedIds.has(ds.providerId), + dataSetMetadata: ds.metadata, + }) + selectedProviderIds.add(ds.providerId) + found = true + break + } + + // Otherwise pick any eligible provider (new dataset created on commit) + if (!found) { + for (const provider of eligibleProviders) { + if (selectedProviderIds.has(provider.id)) continue + results.push({ + provider, + dataSetId: null, + endorsed: options.endorsedIds.has(provider.id), + dataSetMetadata: metadata, + }) + selectedProviderIds.add(provider.id) + found = true + break + } + } + + if (!found) break + } + + return results +} diff --git a/packages/synapse-core/src/warm-storage/types.ts b/packages/synapse-core/src/warm-storage/types.ts index e1cbf1b0..e265d211 100644 --- a/packages/synapse-core/src/warm-storage/types.ts +++ b/packages/synapse-core/src/warm-storage/types.ts @@ -44,6 +44,8 @@ export type PdpDataSetInfo = { metadata: MetadataObject /** PDP provider associated with the data set. */ provider: PDPProvider + /** Number of active (non-zero) pieces in the data set. */ + activePieceCount: bigint } export interface PdpDataSet extends DataSetInfo, PdpDataSetInfo {} diff --git a/packages/synapse-core/test/fetch-provider-selection-input.test.ts b/packages/synapse-core/test/fetch-provider-selection-input.test.ts new file mode 100644 index 00000000..1c765bf4 --- /dev/null +++ b/packages/synapse-core/test/fetch-provider-selection-input.test.ts @@ -0,0 +1,96 @@ +import assert from 'assert' +import { setup } from 'iso-web/msw' +import { createPublicClient, http } from 'viem' +import { calibration } from '../src/chains.ts' +import { ADDRESSES, JSONRPC, presets } from '../src/mocks/jsonrpc/index.ts' +import { fetchProviderSelectionInput } from '../src/warm-storage/fetch-provider-selection-input.ts' + +describe('fetchProviderSelectionInput', () => { + const server = setup() + + before(async () => { + await server.start() + }) + + after(() => { + server.stop() + }) + + beforeEach(() => { + server.resetHandlers() + }) + + it('assembles ProviderSelectionInput from chain data', async () => { + server.use(JSONRPC(presets.basic)) + + const client = createPublicClient({ + chain: calibration, + transport: http(), + }) + + const result = await fetchProviderSelectionInput(client, { + address: ADDRESSES.client1, + }) + + // Providers fetched from approved PDP providers + assert.ok(result.providers.length > 0) + assert.ok(result.providers.every((p) => p.pdp != null)) + + // Endorsed IDs from endorsements contract (empty in basic preset) + assert.ok(result.endorsedIds instanceof Set) + + // Client datasets populated with piece counts + assert.ok(Array.isArray(result.clientDataSets)) + }) + + it('returns empty clientDataSets when client has no datasets', async () => { + server.use( + JSONRPC({ + ...presets.basic, + warmStorageView: { + ...presets.basic.warmStorageView, + getClientDataSets: () => [[]], + }, + }) + ) + + const client = createPublicClient({ + chain: calibration, + transport: http(), + }) + + const result = await fetchProviderSelectionInput(client, { + address: ADDRESSES.client1, + }) + + assert.equal(result.clientDataSets.length, 0) + // Providers and endorsedIds still populated + assert.ok(result.providers.length > 0) + assert.ok(result.endorsedIds instanceof Set) + }) + + it('populates endorsedIds from endorsements contract', async () => { + server.use( + JSONRPC({ + ...presets.basic, + endorsements: { + ...presets.basic.endorsements, + getProviderIds: () => [[1n, 2n]], + }, + }) + ) + + const client = createPublicClient({ + chain: calibration, + transport: http(), + }) + + const result = await fetchProviderSelectionInput(client, { + address: ADDRESSES.client1, + }) + + assert.ok(result.endorsedIds.has(1n)) + assert.ok(result.endorsedIds.has(2n)) + assert.equal(result.endorsedIds.size, 2) + }) +}) diff --git a/packages/synapse-core/test/find-matching-data-sets.test.ts b/packages/synapse-core/test/find-matching-data-sets.test.ts new file mode 100644 index 00000000..fab4a879 --- /dev/null +++ b/packages/synapse-core/test/find-matching-data-sets.test.ts @@ -0,0 +1,151 @@ +import assert from 'assert' +import { findMatchingDataSets, metadataMatches } from '../src/warm-storage/find-matching-data-sets.ts' +import type { SelectionDataSet } from '../src/warm-storage/location-types.ts' + +/** Helper to create a SelectionDataSet with sensible defaults */ +function makeDataSet( + overrides: Partial & { dataSetId: bigint; providerId: bigint } +): SelectionDataSet { + return { + metadata: {}, + activePieceCount: 0n, + pdpEndEpoch: 0n, + live: true, + managed: true, + ...overrides, + } +} + +describe('metadataMatches', () => { + it('matches identical metadata', () => { + assert.equal(metadataMatches({ source: 'app', env: 'prod' }, { source: 'app', env: 'prod' }), true) + }) + + it('matches empty metadata', () => { + assert.equal(metadataMatches({}, {}), true) + }) + + it('rejects different values', () => { + assert.equal(metadataMatches({ source: 'app' }, { source: 'other' }), false) + }) + + it('rejects extra keys in dataset', () => { + assert.equal(metadataMatches({ source: 'app', extra: 'val' }, { source: 'app' }), false) + }) + + it('rejects missing keys in dataset', () => { + assert.equal(metadataMatches({ source: 'app' }, { source: 'app', extra: 'val' }), false) + }) + + it('rejects when dataset has keys but requested is empty', () => { + assert.equal(metadataMatches({ source: 'app' }, {}), false) + }) + + it('rejects when requested has keys but dataset is empty', () => { + assert.equal(metadataMatches({}, { source: 'app' }), false) + }) + + it('is order-independent', () => { + assert.equal(metadataMatches({ b: '2', a: '1', c: '3' }, { c: '3', a: '1', b: '2' }), true) + }) +}) + +describe('findMatchingDataSets', () => { + it('returns empty array when no datasets match metadata', () => { + const dataSets = [makeDataSet({ dataSetId: 1n, providerId: 1n, metadata: { source: 'other' } })] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + assert.equal(result.length, 0) + }) + + it('returns matching datasets', () => { + const dataSets = [ + makeDataSet({ dataSetId: 1n, providerId: 1n, metadata: { source: 'app' } }), + makeDataSet({ dataSetId: 2n, providerId: 2n, metadata: { source: 'other' } }), + makeDataSet({ dataSetId: 3n, providerId: 3n, metadata: { source: 'app' } }), + ] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + assert.equal(result.length, 2) + assert.equal(result[0].dataSetId, 1n) + assert.equal(result[1].dataSetId, 3n) + }) + + it('sorts datasets with pieces before empty ones', () => { + const dataSets = [ + makeDataSet({ dataSetId: 1n, providerId: 1n, metadata: { source: 'app' }, activePieceCount: 0n }), + makeDataSet({ dataSetId: 2n, providerId: 2n, metadata: { source: 'app' }, activePieceCount: 5n }), + ] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + assert.equal(result[0].dataSetId, 2n) + assert.equal(result[1].dataSetId, 1n) + }) + + it('sorts by ID ascending within same piece group', () => { + const dataSets = [ + makeDataSet({ dataSetId: 10n, providerId: 1n, metadata: { source: 'app' }, activePieceCount: 3n }), + makeDataSet({ dataSetId: 5n, providerId: 2n, metadata: { source: 'app' }, activePieceCount: 3n }), + ] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + assert.equal(result[0].dataSetId, 5n) + assert.equal(result[1].dataSetId, 10n) + }) + + it('excludes terminated datasets (pdpEndEpoch > 0)', () => { + const dataSets = [ + makeDataSet({ dataSetId: 1n, providerId: 1n, metadata: { source: 'app' }, pdpEndEpoch: 100n }), + makeDataSet({ dataSetId: 2n, providerId: 2n, metadata: { source: 'app' }, pdpEndEpoch: 0n }), + ] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + assert.equal(result.length, 1) + assert.equal(result[0].dataSetId, 2n) + }) + + it('excludes non-live datasets', () => { + const dataSets = [ + makeDataSet({ dataSetId: 1n, providerId: 1n, metadata: { source: 'app' }, live: false }), + makeDataSet({ dataSetId: 2n, providerId: 2n, metadata: { source: 'app' }, live: true }), + ] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + assert.equal(result.length, 1) + assert.equal(result[0].dataSetId, 2n) + }) + + it('excludes non-managed datasets', () => { + const dataSets = [ + makeDataSet({ dataSetId: 1n, providerId: 1n, metadata: { source: 'app' }, managed: false }), + makeDataSet({ dataSetId: 2n, providerId: 2n, metadata: { source: 'app' }, managed: true }), + ] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + assert.equal(result.length, 1) + assert.equal(result[0].dataSetId, 2n) + }) + + it('matches empty metadata against datasets with empty metadata', () => { + const dataSets = [ + makeDataSet({ dataSetId: 1n, providerId: 1n, metadata: {} }), + makeDataSet({ dataSetId: 2n, providerId: 2n, metadata: { source: 'app' } }), + ] + const result = findMatchingDataSets(dataSets, {}) + assert.equal(result.length, 1) + assert.equal(result[0].dataSetId, 1n) + }) + + it('returns empty array when input is empty', () => { + const result = findMatchingDataSets([], { source: 'app' }) + assert.equal(result.length, 0) + }) + + it('full sorting: pieces first, then by ID within groups', () => { + const dataSets = [ + makeDataSet({ dataSetId: 10n, providerId: 1n, metadata: { source: 'app' }, activePieceCount: 0n }), + makeDataSet({ dataSetId: 5n, providerId: 2n, metadata: { source: 'app' }, activePieceCount: 3n }), + makeDataSet({ dataSetId: 3n, providerId: 3n, metadata: { source: 'app' }, activePieceCount: 0n }), + makeDataSet({ dataSetId: 8n, providerId: 4n, metadata: { source: 'app' }, activePieceCount: 7n }), + ] + const result = findMatchingDataSets(dataSets, { source: 'app' }) + // Pieces first (5n, 8n by ID ascending), then empty (3n, 10n by ID ascending) + assert.deepEqual( + result.map((ds) => ds.dataSetId), + [5n, 8n, 3n, 10n] + ) + }) +}) diff --git a/packages/synapse-core/test/select-providers.test.ts b/packages/synapse-core/test/select-providers.test.ts new file mode 100644 index 00000000..15a9dfe8 --- /dev/null +++ b/packages/synapse-core/test/select-providers.test.ts @@ -0,0 +1,444 @@ +import assert from 'assert' +import type { Hex } from 'viem' +import type { PDPProvider } from '../src/sp-registry/types.ts' +import type { SelectionDataSet } from '../src/warm-storage/location-types.ts' +import { selectProviders } from '../src/warm-storage/select-providers.ts' + +/** Create a minimal PDPProvider fixture */ +function makeProvider(id: bigint, serviceURL = `https://sp${id}.example.com`): PDPProvider { + return { + id, + serviceProvider: `0x000000000000000000000000000000000000000${id}` as `0x${string}`, + payee: '0x1000000000000000000000000000000000000001' as `0x${string}`, + name: `Provider ${id}`, + description: `Test provider ${id}`, + isActive: true, + pdp: { + serviceURL, + minPieceSizeInBytes: 1024n, + maxPieceSizeInBytes: 32n * 1024n * 1024n * 1024n, + storagePricePerTibPerDay: 1000000n, + minProvingPeriodInEpochs: 30n, + location: 'us-east', + paymentTokenAddress: '0x0000000000000000000000000000000000000000' as Hex, + ipniPiece: false, + ipniIpfs: false, + }, + } +} + +/** Create a SelectionDataSet fixture */ +function makeDataSet( + overrides: Partial & { dataSetId: bigint; providerId: bigint } +): SelectionDataSet { + return { + metadata: {}, + activePieceCount: 0n, + pdpEndEpoch: 0n, + live: true, + managed: true, + ...overrides, + } +} + +describe('selectProviders', () => { + const provider1 = makeProvider(1n) + const provider2 = makeProvider(2n) + const provider3 = makeProvider(3n) + + describe('basic selection', () => { + it('returns empty when no providers available', () => { + const result = selectProviders({ + providers: [], + endorsedIds: new Set(), + clientDataSets: [], + }) + assert.equal(result.length, 0) + }) + + it('returns empty when all providers are excluded', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set(), + clientDataSets: [], + excludeProviderIds: new Set([1n, 2n]), + }) + assert.equal(result.length, 0) + }) + + it('selects provider with new dataset when no existing datasets', () => { + const result = selectProviders({ + providers: [provider1], + endorsedIds: new Set(), + clientDataSets: [], + metadata: { source: 'app' }, + }) + assert.equal(result.length, 1) + assert.equal(result[0].provider.id, 1n) + assert.equal(result[0].dataSetId, null) + assert.equal(result[0].endorsed, false) + assert.deepEqual(result[0].dataSetMetadata, { source: 'app' }) + }) + + it('respects count option', () => { + const result = selectProviders({ + providers: [provider1, provider2, provider3], + endorsedIds: new Set(), + clientDataSets: [], + count: 2, + }) + assert.equal(result.length, 2) + }) + + it('returns fewer than count if not enough providers', () => { + const result = selectProviders({ + providers: [provider1], + endorsedIds: new Set(), + clientDataSets: [], + count: 3, + }) + assert.equal(result.length, 1) + }) + + it('selects different providers for each result', () => { + const result = selectProviders({ + providers: [provider1, provider2, provider3], + endorsedIds: new Set(), + clientDataSets: [], + count: 3, + }) + const ids = result.map((r) => r.provider.id) + assert.equal(new Set(ids).size, 3) + }) + }) + + describe('endorsed pool restriction', () => { + it('restricts to endorsed providers when endorsedIds is non-empty', () => { + const result = selectProviders({ + providers: [provider1, provider2, provider3], + endorsedIds: new Set([1n]), + clientDataSets: [], + }) + assert.equal(result.length, 1) + assert.equal(result[0].provider.id, 1n) + assert.equal(result[0].endorsed, true) + }) + + it('prefers existing dataset on endorsed provider', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set([1n]), + clientDataSets: [ + makeDataSet({ + dataSetId: 10n, + providerId: 1n, + metadata: { source: 'app' }, + activePieceCount: 5n, + }), + makeDataSet({ + dataSetId: 20n, + providerId: 2n, + metadata: { source: 'app' }, + activePieceCount: 3n, + }), + ], + metadata: { source: 'app' }, + }) + assert.equal(result.length, 1) + assert.equal(result[0].provider.id, 1n) + assert.equal(result[0].dataSetId, 10n) + assert.equal(result[0].endorsed, true) + assert.deepEqual(result[0].dataSetMetadata, { source: 'app' }) + }) + + it('creates new dataset when endorsed provider has no matching datasets', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set([1n]), + clientDataSets: [ + makeDataSet({ + dataSetId: 20n, + providerId: 2n, + metadata: { source: 'app' }, + activePieceCount: 3n, + }), + ], + metadata: { source: 'app' }, + }) + assert.equal(result[0].provider.id, 1n) + assert.equal(result[0].dataSetId, null) + assert.equal(result[0].endorsed, true) + assert.deepEqual(result[0].dataSetMetadata, { source: 'app' }) + }) + + it('defaults metadata to empty object when not provided', () => { + const result = selectProviders({ + providers: [provider1], + endorsedIds: new Set(), + clientDataSets: [], + }) + assert.equal(result.length, 1) + assert.deepEqual(result[0].dataSetMetadata, {}) + }) + + it('ignores non-endorsed providers when endorsedIds is non-empty', () => { + const result = selectProviders({ + providers: [provider1, provider2, provider3], + endorsedIds: new Set([1n]), + clientDataSets: [ + makeDataSet({ + dataSetId: 20n, + providerId: 2n, + metadata: { source: 'app' }, + activePieceCount: 10n, + }), + ], + metadata: { source: 'app' }, + count: 3, + }) + assert.equal(result.length, 1) + assert.equal(result[0].provider.id, 1n) + }) + + it('uses all providers when endorsedIds is empty', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set(), + clientDataSets: [ + makeDataSet({ + dataSetId: 20n, + providerId: 2n, + metadata: { source: 'app' }, + activePieceCount: 3n, + }), + ], + metadata: { source: 'app' }, + }) + assert.equal(result[0].provider.id, 2n) + assert.equal(result[0].dataSetId, 20n) + assert.equal(result[0].endorsed, false) + }) + + it('creates new dataset when pool is unrestricted and no matching datasets', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set(), + clientDataSets: [], + metadata: { source: 'app' }, + }) + assert.equal(result.length, 1) + assert.equal(result[0].dataSetId, null) + assert.equal(result[0].endorsed, false) + }) + }) + + describe('multi-provider selection', () => { + it('selects multiple providers from the same pool', () => { + const result = selectProviders({ + providers: [provider1, provider2, provider3], + endorsedIds: new Set(), + clientDataSets: [], + metadata: { source: 'app' }, + count: 2, + }) + assert.equal(result.length, 2) + assert.notEqual(result[0].provider.id, result[1].provider.id) + }) + + it('reuses existing datasets across multiple providers', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set(), + clientDataSets: [ + makeDataSet({ + dataSetId: 10n, + providerId: 1n, + metadata: { source: 'app' }, + activePieceCount: 5n, + }), + makeDataSet({ + dataSetId: 20n, + providerId: 2n, + metadata: { source: 'app' }, + activePieceCount: 3n, + }), + ], + metadata: { source: 'app' }, + count: 2, + }) + assert.equal(result.length, 2) + assert.equal(result[0].provider.id, 1n) + assert.equal(result[0].dataSetId, 10n) + assert.equal(result[1].provider.id, 2n) + assert.equal(result[1].dataSetId, 20n) + }) + + it('falls through to new dataset when existing datasets are exhausted', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set(), + clientDataSets: [ + makeDataSet({ + dataSetId: 10n, + providerId: 1n, + metadata: { source: 'app' }, + activePieceCount: 5n, + }), + ], + metadata: { source: 'app' }, + count: 2, + }) + assert.equal(result.length, 2) + // First: existing dataset, metadata from dataset + assert.equal(result[0].dataSetId, 10n) + assert.deepEqual(result[0].dataSetMetadata, { source: 'app' }) + // Second: new dataset, metadata from request + assert.equal(result[1].provider.id, 2n) + assert.equal(result[1].dataSetId, null) + assert.deepEqual(result[1].dataSetMetadata, { source: 'app' }) + }) + }) + + describe('dataset preference', () => { + it('prefers dataset with pieces over empty dataset on same provider', () => { + const result = selectProviders({ + providers: [provider1], + endorsedIds: new Set(), + clientDataSets: [ + makeDataSet({ + dataSetId: 5n, + providerId: 1n, + metadata: { source: 'app' }, + activePieceCount: 0n, + }), + makeDataSet({ + dataSetId: 10n, + providerId: 1n, + metadata: { source: 'app' }, + activePieceCount: 3n, + }), + ], + metadata: { source: 'app' }, + }) + assert.equal(result[0].dataSetId, 10n) + }) + + it('prefers older dataset when both have pieces', () => { + const result = selectProviders({ + providers: [provider1], + endorsedIds: new Set(), + clientDataSets: [ + makeDataSet({ + dataSetId: 10n, + providerId: 1n, + metadata: { source: 'app' }, + activePieceCount: 3n, + }), + makeDataSet({ + dataSetId: 5n, + providerId: 1n, + metadata: { source: 'app' }, + activePieceCount: 3n, + }), + ], + metadata: { source: 'app' }, + }) + assert.equal(result[0].dataSetId, 5n) + }) + }) + + describe('metadata filtering', () => { + it('only considers datasets matching requested metadata', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set(), + clientDataSets: [ + makeDataSet({ + dataSetId: 10n, + providerId: 1n, + metadata: { env: 'prod' }, + activePieceCount: 5n, + }), + makeDataSet({ + dataSetId: 20n, + providerId: 2n, + metadata: { env: 'test' }, + activePieceCount: 5n, + }), + ], + metadata: { env: 'test' }, + }) + assert.equal(result[0].provider.id, 2n) + assert.equal(result[0].dataSetId, 20n) + assert.deepEqual(result[0].dataSetMetadata, { env: 'test' }) + }) + + it('creates new dataset when metadata does not match any existing', () => { + const result = selectProviders({ + providers: [provider1], + endorsedIds: new Set(), + clientDataSets: [ + makeDataSet({ + dataSetId: 10n, + providerId: 1n, + metadata: { env: 'prod' }, + }), + ], + metadata: { env: 'test' }, + }) + assert.equal(result[0].provider.id, 1n) + assert.equal(result[0].dataSetId, null) + assert.deepEqual(result[0].dataSetMetadata, { env: 'test' }) + }) + }) + + describe('exclusion', () => { + it('excludes specified provider IDs', () => { + const result = selectProviders({ + providers: [provider1, provider2, provider3], + endorsedIds: new Set(), + clientDataSets: [], + excludeProviderIds: new Set([1n]), + }) + assert.equal(result.length, 1) + assert.notEqual(result[0].provider.id, 1n) + }) + + it('excludes endorsed provider when specified', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set([1n]), + clientDataSets: [], + excludeProviderIds: new Set([1n]), + }) + // Provider 1 excluded, and endorsedIds restricts pool to only endorsed + // So no providers are eligible + assert.equal(result.length, 0) + }) + }) + + describe('endorsed flag on results', () => { + it('marks endorsed providers correctly', () => { + const result = selectProviders({ + providers: [provider1, provider2, provider3], + endorsedIds: new Set([2n]), + clientDataSets: [], + count: 3, + }) + // Only provider 2 is endorsed, and endorsedIds restricts the pool + assert.equal(result.length, 1) + assert.equal(result[0].provider.id, 2n) + assert.equal(result[0].endorsed, true) + }) + + it('marks all as non-endorsed when endorsedIds is empty', () => { + const result = selectProviders({ + providers: [provider1, provider2], + endorsedIds: new Set(), + clientDataSets: [], + count: 2, + }) + assert.ok(result.every((r) => !r.endorsed)) + }) + }) +}) diff --git a/packages/synapse-sdk/src/storage/context.ts b/packages/synapse-sdk/src/storage/context.ts index aa0118ef..bb8a2f08 100644 --- a/packages/synapse-sdk/src/storage/context.ts +++ b/packages/synapse-sdk/src/storage/context.ts @@ -28,7 +28,6 @@ */ import { asChain, type Chain as FilecoinChain } from '@filoz/synapse-core/chains' -import { getProviderIds as getEndorsedProviderIds } from '@filoz/synapse-core/endorsements' import { InvalidPieceCIDError } from '@filoz/synapse-core/errors' import * as PDPVerifier from '@filoz/synapse-core/pdp-verifier' import * as Piece from '@filoz/synapse-core/piece' @@ -42,10 +41,15 @@ import { epochToDate, type MetadataObject, pieceMetadataObjectToEntry, - randIndex, randU256, timeUntilEpoch, } from '@filoz/synapse-core/utils' +import { + fetchProviderSelectionInput, + metadataMatches, + type ResolvedLocation, + selectProviders, +} from '@filoz/synapse-core/warm-storage' import type { Account, Address, Chain, Client, Hash, Hex, Transport } from 'viem' import { getBlockNumber } from 'viem/actions' import { SPRegistryService } from '../sp-registry/index.ts' @@ -71,7 +75,7 @@ import type { UploadResult, } from '../types.ts' import { createError, SIZE_CONSTANTS } from '../utils/index.ts' -import { combineMetadata, metadataMatches } from '../utils/metadata.ts' +import { combineMetadata } from '../utils/metadata.ts' import type { WarmStorageService } from '../warm-storage/index.ts' const NO_REMAINING_PROVIDERS_ERROR_MESSAGE = 'No approved service providers available' @@ -239,30 +243,38 @@ export class StorageContext { ) } - // Fill remaining slots via smart selection if count exceeds explicit resolutions - const count = options.count ?? (resolutions.length > 0 ? resolutions.length : 2) - if (resolutions.length < count) { - const excludeProviderIds = [...(options.excludeProviderIds ?? []), ...resolutions.map((r) => r.provider.id)] - - for (let i = resolutions.length; i < count; i++) { - try { - const resolution = await StorageContext.smartSelectProvider( - clientAddress, - options.metadata ?? {}, - options.warmStorageService, - spRegistry, - excludeProviderIds, - resolutions.length === 0 ? await getEndorsedProviderIds(options.synapse.client) : new Set() + if (resolutions.length > 0) { + // Explicit path — validate count matches deduped results + const count = options.count ?? resolutions.length + if (resolutions.length !== count) { + throw createError( + 'StorageContext', + 'createContexts', + `Requested ${count} context(s) but ${hasDataSetIds ? 'dataSetIds' : 'providerIds'}` + + ` resolved to ${resolutions.length} after deduplication` + ) + } + // Multiple dataSetIds may resolve to the same provider — each context must target a unique provider + if (hasDataSetIds && resolutions.length > 1) { + const providerIds = resolutions.map((r) => r.provider.id) + if (new Set(providerIds).size !== providerIds.length) { + throw createError( + 'StorageContext', + 'createContexts', + 'dataSetIds resolve to duplicate providers - each context must use a unique provider' ) - excludeProviderIds.push(resolution.provider.id) - resolutions.push(resolution) - } catch (error) { - if (error instanceof Error && error.message.includes(NO_REMAINING_PROVIDERS_ERROR_MESSAGE)) { - break - } - throw error } } + } else { + // Smart selection path (neither dataSetIds nor providerIds provided) + const count = options.count ?? 2 + resolutions = await StorageContext.smartSelect({ + synapse: options.synapse, + metadata: options.metadata ?? {}, + count, + excludeProviderIds: new Set(options.excludeProviderIds ?? []), + requireEndorsedPrimary: true, + }) } return await Promise.all( @@ -313,9 +325,8 @@ export class StorageContext { console.error('Error in onProviderSelected callback:', error) } - if (resolution.dataSetId !== -1n) { + if (resolution.dataSetId != null) { options.callbacks?.onDataSetResolved?.({ - isExisting: resolution.dataSetId !== -1n, dataSetId: resolution.dataSetId, provider: resolution.provider, }) @@ -325,7 +336,7 @@ export class StorageContext { synapse, warmStorageService, provider: resolution.provider, - dataSetId: resolution.dataSetId === -1n ? undefined : resolution.dataSetId, + dataSetId: resolution.dataSetId ?? undefined, options, dataSetMetadata: resolution.dataSetMetadata, }) @@ -374,14 +385,17 @@ export class StorageContext { } // Smart selection when no specific parameters provided - return await StorageContext.smartSelectProvider( - clientAddress, - requestedMetadata, - warmStorageService, - spRegistry, - options.excludeProviderIds ?? [], - new Set() - ) + const results = await StorageContext.smartSelect({ + synapse, + metadata: requestedMetadata, + count: 1, + excludeProviderIds: new Set(options.excludeProviderIds ?? []), + requireEndorsedPrimary: false, + }) + if (results.length === 0) { + throw createError('StorageContext', 'resolveProviderAndDataSet', NO_REMAINING_PROVIDERS_ERROR_MESSAGE) + } + return results[0] } /** @@ -423,20 +437,22 @@ export class StorageContext { return { provider, dataSetId, - isExisting: true, dataSetMetadata, } } /** - * Resolve the best matching DataSet for a Provider using a specific provider ID + * Resolve the best matching DataSet for a Provider using a specific provider ID. * - * Selection Logic: + * Selection logic: * 1. Filters for datasets belonging to this provider * 2. Sorts by dataSetId ascending (oldest first) * 3. Searches in batches for metadata match * 4. Prioritizes datasets with pieces > 0, then falls back to the oldest valid dataset * 5. Exits early as soon as a non-empty matching dataset is found + * + * The batched enrichment exists to bound RPC calls for accounts with many + * datasets. Before simplifying, see https://github.com/FilOzone/synapse-sdk/issues/631 */ private static async resolveByProviderId( clientAddress: Address, @@ -470,6 +486,7 @@ export class StorageContext { return Number(a.dataSetId) - Number(b.dataSetId) }) + // Batch enrichment to bound concurrent RPC calls (PR #487) const MIN_BATCH_SIZE = 50 const MAX_BATCH_SIZE = 200 const BATCH_SIZE = Math.min(MAX_BATCH_SIZE, Math.max(MIN_BATCH_SIZE, Math.ceil(sortedDataSets.length / 3), 1)) @@ -527,231 +544,110 @@ export class StorageContext { return { provider, dataSetId: selectedDataSet.dataSetId, - isExisting: true, dataSetMetadata: selectedDataSet.dataSetMetadata, } } return { provider, - dataSetId: -1n, // Marker for new data set - isExisting: false, + dataSetId: null, dataSetMetadata: requestedMetadata, } } /** - * Select a provider and optionally an existing data set for storage. + * Smart provider selection with ping-retry. * - * Selection is 2-tier per role. Tier 1 prefers existing data sets (deterministic, - * sorted by piece count then data set ID). Tier 2 creates a new data set with a - * random provider. All candidates are ping-validated before selection. + * Fetches chain data once, then selects up to `count` providers from the + * cached result, pinging each candidate and excluding failures. When + * requireEndorsedPrimary=true, the first slot requires an endorsed provider; + * remaining slots accept any approved provider. * - * Role is determined by {@link endorsedProviderIds}: non-empty restricts to endorsed - * providers only (primary) and throws if none reachable; empty allows any approved - * provider (secondary). - * - * @param clientAddress - Wallet address to look up existing data sets for - * @param requestedMetadata - Dataset metadata filter; only data sets with matching metadata are considered - * @param warmStorageService - Service for data set and provider lookups - * @param spRegistry - Registry for provider details and PDP endpoints - * @param excludeProviderIds - Provider IDs to skip (already used by other contexts) - * @param endorsedProviderIds - Endorsed provider IDs; non-empty = primary (endorsed-only), empty = secondary (any approved) - * @returns Resolved provider, data set ID (-1n if new), and metadata - * @throws When no eligible provider passes health check + * @param options - Selection parameters + * @returns Resolved providers with dataset info, up to `count` length + * @throws When requireEndorsedPrimary and no healthy endorsed provider is available */ - private static async smartSelectProvider( - clientAddress: Address, - requestedMetadata: Record, - warmStorageService: WarmStorageService, - spRegistry: SPRegistryService, - excludeProviderIds: bigint[], - endorsedProviderIds: Set - ): Promise { - const dataSets = await warmStorageService.getClientDataSetsWithDetails({ address: clientAddress }) - - const skipProviderIds = new Set(excludeProviderIds) - const managedDataSets = dataSets.filter( - (ps) => - ps.isLive && - ps.isManaged && - ps.pdpEndEpoch === 0n && - !skipProviderIds.has(ps.providerId) && - metadataMatches(ps.metadata, requestedMetadata) - ) - - type DataSetWithDetails = (typeof managedDataSets)[number] - const sortDataSets = (sets: DataSetWithDetails[]): DataSetWithDetails[] => - [...sets].sort((a, b) => { - if (a.activePieceCount > 0n && b.activePieceCount === 0n) return -1 - if (b.activePieceCount > 0n && a.activePieceCount === 0n) return 1 - return Number(a.pdpVerifierDataSetId - b.pdpVerifierDataSetId) - }) - - const createDataSetProviderGenerator = (sets: DataSetWithDetails[]) => - async function* (): AsyncGenerator { - const yieldedProviders = new Set() - for (const dataSet of sets) { - if (yieldedProviders.has(dataSet.providerId)) continue - yieldedProviders.add(dataSet.providerId) - const provider = await spRegistry.getProvider({ providerId: dataSet.providerId }) - if (provider == null) { - console.warn( - `Provider ID ${dataSet.providerId} for data set ${dataSet.pdpVerifierDataSetId} is not currently approved` - ) - continue - } - yield provider - } - } - - const createResultFromDataSet = async ( - provider: PDPProvider, - sets: DataSetWithDetails[] - ): Promise => { - const matchingDataSet = sets.find((ps) => ps.providerId === provider.id) - if (matchingDataSet == null) { - console.warn( - `Could not match selected provider ${provider.serviceProvider} (ID: ${provider.id}) ` + - `to existing data sets. Falling back to new data set.` - ) - return { - provider, - dataSetId: -1n, - isExisting: false, - dataSetMetadata: requestedMetadata, - } - } - const dataSetMetadata = await warmStorageService.getDataSetMetadata({ - dataSetId: matchingDataSet.pdpVerifierDataSetId, - }) - return { - provider, - dataSetId: matchingDataSet.pdpVerifierDataSetId, - isExisting: true, - dataSetMetadata, - } - } + private static async smartSelect(options: { + synapse: Synapse + metadata: Record + count: number + excludeProviderIds: Set + requireEndorsedPrimary: boolean + }): Promise { + const { synapse, metadata, count, requireEndorsedPrimary } = options + const clientAddress = synapse.client.account.address - const createNewDataSetResult = (provider: PDPProvider): ProviderSelectionResult => ({ - provider, - dataSetId: -1n, - isExisting: false, - dataSetMetadata: requestedMetadata, + const input = await fetchProviderSelectionInput(synapse.client, { + address: clientAddress, }) - const isPrimarySelection = endorsedProviderIds.size > 0 - - // Fetch approved providers (needed for both paths) - const approvedIds = await warmStorageService.getApprovedProviderIds() - const approvedProviders = await spRegistry.getProviders({ providerIds: approvedIds }) - const allProviders = approvedProviders.filter((p: PDPProvider) => !excludeProviderIds.includes(p.id)) + // Inline ping-retry loop: select a candidate from core, ping it, + // exclude on failure, re-select. One outer iteration per copy needed. + const results: ProviderSelectionResult[] = [] + const excludeProviderIds = new Set(options.excludeProviderIds) + + for (let i = 0; i < count; i++) { + const endorsedSlot = requireEndorsedPrimary && i === 0 + let found = false + let pingFailures = 0 + + // Keep selecting and pinging until a healthy provider is found + // or all candidates are exhausted + for (;;) { + const candidates = selectProviders({ + ...input, + endorsedIds: endorsedSlot ? input.endorsedIds : new Set(), + count: 1, + excludeProviderIds, + metadata, + }) - if (isPrimarySelection) { - // Primary: endorsed providers only, no fallback to non-endorsed - const endorsedDataSets = managedDataSets.filter((ds) => endorsedProviderIds.has(ds.providerId)) + if (candidates.length === 0) break - // Tier 1: Existing data sets with endorsed providers - if (endorsedDataSets.length > 0) { - const sorted = sortDataSets(endorsedDataSets) - const provider = await StorageContext.selectProviderWithPing(createDataSetProviderGenerator(sorted)()) - if (provider != null) { - return await createResultFromDataSet(provider, sorted) - } - } - - // Tier 2: New data set with endorsed provider - const endorsedProviders = allProviders.filter((p: PDPProvider) => endorsedProviderIds.has(p.id)) - if (endorsedProviders.length > 0) { - const provider = await StorageContext.selectRandomProvider(endorsedProviders) - if (provider != null) { - return createNewDataSetResult(provider) + const candidate = candidates[0] + try { + await SP.ping(candidate.provider.pdp.serviceURL) + results.push(StorageContext.toProviderSelectionResult(candidate)) + excludeProviderIds.add(candidate.provider.id) + found = true + break + } catch (error) { + console.warn( + `Provider ${candidate.provider.serviceProvider} (ID: ${candidate.provider.id}) failed ping:`, + error instanceof Error ? error.message : String(error) + ) + excludeProviderIds.add(candidate.provider.id) + pingFailures++ } } - // All endorsed providers exhausted, no fall back to non-endorsed, this is a FOC system-level failure for the user - const endorsedCount = [...endorsedProviderIds].filter((id) => !excludeProviderIds.includes(id)).length - throw createError( - 'StorageContext', - 'smartSelectProvider', - endorsedCount > 0 - ? `No endorsed provider available — all ${endorsedCount} endorsed provider(s) failed health check` - : 'No endorsed provider available' - ) - } - - // Secondary: any approved provider - // Tier 1: Existing data sets with any approved provider - if (managedDataSets.length > 0) { - const sorted = sortDataSets(managedDataSets) - const provider = await StorageContext.selectProviderWithPing(createDataSetProviderGenerator(sorted)()) - if (provider != null) { - return await createResultFromDataSet(provider, sorted) - } - } - - // Tier 2: New data set with any approved provider - if (allProviders.length > 0) { - const provider = await StorageContext.selectRandomProvider(allProviders) - if (provider != null) { - return createNewDataSetResult(provider) + if (endorsedSlot && !found) { + throw createError( + 'StorageContext', + 'smartSelect', + pingFailures > 0 + ? `No endorsed provider available — all endorsed provider(s) failed health check` + : 'No endorsed provider available' + ) } - } - if (allProviders.length === 0) { - throw createError('StorageContext', 'smartSelectProvider', NO_REMAINING_PROVIDERS_ERROR_MESSAGE) - } - throw createError( - 'StorageContext', - 'smartSelectProvider', - `All ${allProviders.length} approved provider(s) failed health check` - ) - } - - /** - * Select a random provider from a list with ping validation. - * - * @param providers - Array of providers to select from - * @returns Selected provider - */ - private static async selectRandomProvider(providers: PDPProvider[]): Promise { - if (providers.length === 0) { - return null - } - - async function* generateRandomProviders(): AsyncGenerator { - const remaining = [...providers] - while (remaining.length > 0) { - const selected = remaining.splice(randIndex(remaining.length), 1)[0] - yield selected + if (!found) { + break } } - return await StorageContext.selectProviderWithPing(generateRandomProviders()) + return results } /** - * Select a provider from an async iterator with ping validation. - * This is shared logic used by both smart selection and random selection. - * - * @param providers - Async iterable of providers to try - * @returns The first provider that responds - * @throws If all providers fail + * Map core's ResolvedLocation to SDK's ProviderSelectionResult. */ - private static async selectProviderWithPing(providers: AsyncIterable): Promise { - for await (const provider of providers) { - try { - await SP.ping(provider.pdp.serviceURL) - return provider - } catch (error) { - console.warn( - `Provider ${provider.serviceProvider} failed ping test:`, - error instanceof Error ? error.message : String(error) - ) - } + private static toProviderSelectionResult(location: ResolvedLocation): ProviderSelectionResult { + return { + provider: location.provider, + dataSetId: location.dataSetId, + dataSetMetadata: location.dataSetMetadata, } - - return null } /** @@ -870,13 +766,13 @@ export class StorageContext { })) if (this._dataSetId) { - return signAddPieces(this._synapse.client, { + return signAddPieces(this._synapse.sessionClient ?? this._synapse.client, { clientDataSetId: await this.getClientDataSetId(), pieces: signingPieces, }) } - return signCreateDataSetAndAddPieces(this._synapse.client, { + return signCreateDataSetAndAddPieces(this._synapse.sessionClient ?? this._synapse.client, { clientDataSetId: randU256(), payee: this._provider.serviceProvider, payer: this._synapse.client.account.address, @@ -903,7 +799,7 @@ export class StorageContext { if (typeof from === 'string') { return createPieceUrlPDP({ cid: pieceCid.toString(), serviceURL: from }) } - return from.getPieceUrl(pieceCid) + return from(pieceCid) } const pullPiecesInput = pieces.map((pieceCid) => ({ @@ -945,7 +841,10 @@ export class StorageContext { metadata: this._dataSetMetadata, } - const response = await SP.waitForPullStatus(this._synapse.client, pullOptions as SP.waitForPullStatus.OptionsType) + const response = await SP.waitForPullStatus( + this._synapse.sessionClient ?? this._synapse.client, + pullOptions as SP.waitForPullStatus.OptionsType + ) const pieceResults = response.pieces.map((piece: { pieceCid: string; status: string }) => { const pieceCid = pieces.find((p) => p.toString() === piece.pieceCid) @@ -995,7 +894,7 @@ export class StorageContext { this.getClientDataSetId(), ]) - const addPiecesResult = await SP.addPieces(this._client, { + const addPiecesResult = await SP.addPieces(this._synapse.sessionClient ?? this._client, { dataSetId: this._dataSetId, clientDataSetId, pieces: pieceInputs, @@ -1016,10 +915,10 @@ export class StorageContext { } // Create new data set and add pieces - const result = await SP.createDataSetAndAddPieces(this._client, { + const result = await SP.createDataSetAndAddPieces(this._synapse.sessionClient ?? this._client, { cdn: this._withCDN, payee: this._provider.serviceProvider, - payer: this._client.account.address, + payer: this._synapse.client.account.address, recordKeeper: this._chain.contracts.fwss.address, pieces: pieceInputs, metadata: this._dataSetMetadata, diff --git a/packages/synapse-sdk/src/storage/manager.ts b/packages/synapse-sdk/src/storage/manager.ts index a589c259..a6e18309 100644 --- a/packages/synapse-sdk/src/storage/manager.ts +++ b/packages/synapse-sdk/src/storage/manager.ts @@ -23,7 +23,7 @@ import * as Piece from '@filoz/synapse-core/piece' import type { UploadPieceStreamingData } from '@filoz/synapse-core/sp' import { getPDPProviderByAddress } from '@filoz/synapse-core/sp-registry' -import { randIndex } from '@filoz/synapse-core/utils' +import { metadataMatches } from '@filoz/synapse-core/warm-storage' import { type Address, type Hash, type Hex, zeroAddress } from 'viem' import { CommitError, StoreError } from '../errors/storage.ts' import { SPRegistryService } from '../sp-registry/index.ts' @@ -44,14 +44,7 @@ import type { UploadCallbacks, UploadResult, } from '../types.ts' -import { - combineMetadata, - createError, - METADATA_KEYS, - metadataMatches, - SIZE_CONSTANTS, - TIME_CONSTANTS, -} from '../utils/index.ts' +import { combineMetadata, createError, METADATA_KEYS, SIZE_CONSTANTS, TIME_CONSTANTS } from '../utils/index.ts' import type { WarmStorageService } from '../warm-storage/index.ts' import { StorageContext } from './context.ts' @@ -358,16 +351,16 @@ export class StorageManager { } // Explicit providers disables auto-retry on failure - const explicitProviders = - options?.contexts != null || + const hasExplicitIds = (options?.providerIds != null && options.providerIds.length > 0) || (options?.dataSetIds != null && options.dataSetIds.length > 0) + const explicitProviders = options?.contexts != null || hasExplicitIds const contexts = options?.contexts ?? (await this.createContexts({ withCDN: options?.withCDN, - count: options?.count ?? DEFAULT_COPY_COUNT, + count: hasExplicitIds ? options?.count : (options?.count ?? DEFAULT_COPY_COUNT), metadata: options?.metadata, excludeProviderIds: options?.excludeProviderIds, providerIds: options?.providerIds, @@ -422,7 +415,7 @@ export class StorageManager { const providerId = currentSecondary.provider.id const pullResult = await currentSecondary.pull({ pieces: pieceCids, - from: primary, + from: (pieceCid) => primary.getPieceUrl(pieceCid), signal: options.signal, extraData, onProgress: options.onProgress @@ -653,7 +646,6 @@ export class StorageManager { if (defaultContext.dataSetId != null) { try { options.callbacks.onDataSetResolved?.({ - isExisting: true, // Always true for cached context dataSetId: defaultContext.dataSetId, provider: defaultContext.provider, }) @@ -718,7 +710,6 @@ export class StorageManager { if (defaultContext.dataSetId != null) { try { options.callbacks.onDataSetResolved?.({ - isExisting: true, // Always true for cached context dataSetId: defaultContext.dataSetId, provider: defaultContext.provider, }) diff --git a/packages/synapse-sdk/src/test/metadata-selection.test.ts b/packages/synapse-sdk/src/test/metadata-selection.test.ts index 66b7f269..3d923838 100644 --- a/packages/synapse-sdk/src/test/metadata-selection.test.ts +++ b/packages/synapse-sdk/src/test/metadata-selection.test.ts @@ -7,97 +7,9 @@ import { setup } from 'iso-web/msw' import { createWalletClient, http as viemHttp, zeroAddress } from 'viem' import { privateKeyToAccount } from 'viem/accounts' import { METADATA_KEYS } from '../utils/constants.ts' -import { metadataMatches } from '../utils/metadata.ts' import { WarmStorageService } from '../warm-storage/index.ts' describe('Metadata-based Data Set Selection', () => { - describe('Metadata Utilities', () => { - describe('metadataMatches', () => { - it('should not match when data set has extra keys', () => { - const dataSetMetadata: Record = { - environment: 'production', - [METADATA_KEYS.WITH_CDN]: '', - region: 'us-east', - } - - const requested: Record = { - [METADATA_KEYS.WITH_CDN]: '', - environment: 'production', - } - - // With exact matching, extra keys in dataSet mean no match - assert.isFalse(metadataMatches(dataSetMetadata, requested)) - }) - - it('should not match when requested value differs', () => { - const dataSetMetadata: Record = { - environment: 'production', - [METADATA_KEYS.WITH_CDN]: '', - } - - const requested: Record = { environment: 'development' } - - assert.isFalse(metadataMatches(dataSetMetadata, requested)) - }) - - it('should not match when requested key is missing', () => { - const dataSetMetadata: Record = { environment: 'production' } - - const requested: Record = { [METADATA_KEYS.WITH_CDN]: '' } - - assert.isFalse(metadataMatches(dataSetMetadata, requested)) - }) - - it('should not match when data set has metadata but empty requested', () => { - const dataSetMetadata: Record = { environment: 'production' } - - const requested: Record = {} - - // With exact matching, non-empty dataSet doesn't match empty request - assert.isFalse(metadataMatches(dataSetMetadata, requested)) - }) - - it('should be order-independent with exact matching', () => { - const dataSetMetadata: Record = { - b: '2', - a: '1', - c: '3', - } - - const requested: Record = { - c: '3', - a: '1', - b: '2', - } - - // Order doesn't matter, but must have exact same keys - assert.isTrue(metadataMatches(dataSetMetadata, requested)) - }) - - it('should match when both have empty metadata', () => { - const dataSetMetadata: Record = {} - const requested: Record = {} - - // Both empty = exact match - assert.isTrue(metadataMatches(dataSetMetadata, requested)) - }) - - it('should match when metadata is exactly the same', () => { - const dataSetMetadata: Record = { - [METADATA_KEYS.WITH_CDN]: '', - environment: 'production', - } - - const requested: Record = { - [METADATA_KEYS.WITH_CDN]: '', - environment: 'production', - } - - assert.isTrue(metadataMatches(dataSetMetadata, requested)) - }) - }) - }) - describe('WarmStorageService with Metadata', () => { let server: any let warmStorageService: WarmStorageService @@ -254,30 +166,5 @@ describe('Metadata-based Data Set Selection', () => { assert.isFalse(dataSets[2].withCDN) assert.deepEqual(dataSets[2].metadata, { [METADATA_KEYS.WITH_IPFS_INDEXING]: '' }) }) - - it('should prefer data sets with matching metadata', async () => { - const dataSets = await warmStorageService.getClientDataSetsWithDetails({ address: Mocks.ADDRESSES.client1 }) - - // Filter for data sets with withIPFSIndexing - const withIndexing = dataSets.filter((ds) => - metadataMatches(ds.metadata, { [METADATA_KEYS.WITH_IPFS_INDEXING]: '' }) - ) - - assert.equal(withIndexing.length, 1) - assert.equal(withIndexing[0].pdpVerifierDataSetId, 3n) - - // Filter for data sets with withCDN - const withCDN = dataSets.filter((ds) => metadataMatches(ds.metadata, { [METADATA_KEYS.WITH_CDN]: '' })) - - assert.equal(withCDN.length, 1) - assert.equal(withCDN[0].pdpVerifierDataSetId, 2n) - - // Filter for data sets with no specific metadata (exact empty match) - const noRequirements = dataSets.filter((ds) => metadataMatches(ds.metadata, {})) - - // With exact matching, only data set 1 with empty metadata matches - assert.equal(noRequirements.length, 1) - assert.equal(noRequirements[0].pdpVerifierDataSetId, 1n) - }) }) }) diff --git a/packages/synapse-sdk/src/test/session-keys.test.ts b/packages/synapse-sdk/src/test/session-keys.test.ts index 3ebad322..e3cecd57 100644 --- a/packages/synapse-sdk/src/test/session-keys.test.ts +++ b/packages/synapse-sdk/src/test/session-keys.test.ts @@ -110,8 +110,8 @@ describe('Synapse', () => { const synapse = new Synapse({ client, sessionClient: sessionKey.client }) const firstData = new Uint8Array(127).fill(1) // 127 bytes await synapse.storage.upload(firstData, { - forceCreateDataSet: true, - providerAddress: Mocks.ADDRESSES.serviceProvider1, + count: 1, + providerIds: [1n], }) }) diff --git a/packages/synapse-sdk/src/test/storage.test.ts b/packages/synapse-sdk/src/test/storage.test.ts index cf6bfc7e..427044fc 100644 --- a/packages/synapse-sdk/src/test/storage.test.ts +++ b/packages/synapse-sdk/src/test/storage.test.ts @@ -374,7 +374,6 @@ describe('StorageService', () => { providerCallbackFired = true }, onDataSetResolved: (info) => { - assert.isTrue(info.isExisting) assert.equal(info.dataSetId, 1n) dataSetCallbackFired = true }, @@ -554,8 +553,37 @@ describe('StorageService', () => { ...Mocks.presets.basic, warmStorageView: { ...Mocks.presets.basic.warmStorageView, - clientDataSets: () => [[1n, 2n]], - getAllDataSetMetadata: (args) => { + getClientDataSets: () => [ + [ + { + cacheMissRailId: 0n, + cdnRailId: 0n, + clientDataSetId: 0n, + commissionBps: 100n, + dataSetId: 1n, + payee: Mocks.ADDRESSES.serviceProvider1, + payer: Mocks.ADDRESSES.client1, + pdpEndEpoch: 0n, + pdpRailId: 1n, + providerId: 1n, + serviceProvider: Mocks.ADDRESSES.serviceProvider1, + }, + { + cacheMissRailId: 0n, + cdnRailId: 1n, + clientDataSetId: 0n, + commissionBps: 100n, + dataSetId: 2n, + payee: Mocks.ADDRESSES.serviceProvider1, + payer: Mocks.ADDRESSES.client1, + pdpEndEpoch: 0n, + pdpRailId: 2n, + providerId: 1n, + serviceProvider: Mocks.ADDRESSES.serviceProvider1, + }, + ], + ], + getAllDataSetMetadata: (args: any) => { const [dataSetId] = args if (dataSetId === 2n) { return [ @@ -565,42 +593,6 @@ describe('StorageService', () => { } return [[], []] // empty metadata for other data sets }, - getDataSet: (args) => { - const [dataSetId] = args - if (dataSetId === 1n) { - return [ - { - cacheMissRailId: 0n, - cdnRailId: 0n, - clientDataSetId: 0n, - commissionBps: 100n, - dataSetId: 1n, - payee: Mocks.ADDRESSES.serviceProvider1, - payer: Mocks.ADDRESSES.client1, - pdpEndEpoch: 0n, - pdpRailId: 1n, - providerId: 1n, - serviceProvider: Mocks.ADDRESSES.serviceProvider1, - }, - ] - } else { - return [ - { - cacheMissRailId: 0n, - cdnRailId: 1n, - clientDataSetId: 0n, - commissionBps: 100n, - dataSetId: 2n, - payee: Mocks.ADDRESSES.serviceProvider1, - payer: Mocks.ADDRESSES.client1, - pdpEndEpoch: 0n, - pdpRailId: 2n, - providerId: 1n, - serviceProvider: Mocks.ADDRESSES.serviceProvider1, - }, - ] - } - }, }, }), Mocks.PING() @@ -1345,8 +1337,8 @@ describe('StorageService', () => { await StorageContext.create({ synapse, warmStorageService }) assert.fail('Should have thrown error') } catch (error: any) { - assert.include(error.message, 'StorageContext smartSelectProvider failed') - assert.include(error.message, 'All 2 approved provider(s) failed health check') + assert.include(error.message, 'StorageContext resolveProviderAndDataSet failed') + assert.include(error.message, 'No approved service providers available') } }) }) diff --git a/packages/synapse-sdk/src/test/synapse.test.ts b/packages/synapse-sdk/src/test/synapse.test.ts index b8e1679c..61e24104 100644 --- a/packages/synapse-sdk/src/test/synapse.test.ts +++ b/packages/synapse-sdk/src/test/synapse.test.ts @@ -526,36 +526,105 @@ describe('Synapse', () => { } }) - it('does not create multiple contexts for the same data set from duplicate dataSetIds', async () => { + it('deduplicates dataSetIds and defaults count to deduped length', async () => { const metadata = { environment: 'test', withCDN: '', } const contexts = await synapse.storage.createContexts({ - count: 2, dataSetIds: [1n, 1n], metadata, }) - assert.equal(contexts.length, 2) - assert.equal((contexts[0] as any)._dataSetId, 1) - assert.notEqual((contexts[0] as any)._dataSetId, (contexts[1] as any)._dataSetId) - // should also use different providers in this case - assert.notEqual(contexts[0].provider.id, contexts[1].provider.id) + assert.equal(contexts.length, 1) + assert.equal((contexts[0] as any)._dataSetId, 1n) + }) + + it('throws when count mismatches deduped dataSetIds', async () => { + const metadata = { + environment: 'test', + withCDN: '', + } + try { + await synapse.storage.createContexts({ + count: 2, + dataSetIds: [1n, 1n], + metadata, + }) + assert.fail('Expected createContexts to throw for count mismatch') + } catch (error: any) { + assert.include(error.message, 'Requested 2 context(s)') + assert.include(error.message, 'resolved to 1 after deduplication') + } }) - it('does not create multiple contexts for the same data set from duplicate providerIds', async () => { + it('deduplicates providerIds and defaults count to deduped length', async () => { const metadata = { environment: 'test', withCDN: '', } const contexts = await synapse.storage.createContexts({ - count: 2, providerIds: [Mocks.PROVIDERS.provider1.providerId, Mocks.PROVIDERS.provider1.providerId], metadata, }) - assert.equal(contexts.length, 2) - assert.equal((contexts[0] as any)._dataSetId, 1) - assert.notEqual((contexts[0] as any)._dataSetId, (contexts[1] as any)._dataSetId) + assert.equal(contexts.length, 1) + assert.equal((contexts[0] as any)._dataSetId, 1n) + }) + + it('throws when count mismatches deduped providerIds', async () => { + const metadata = { + environment: 'test', + withCDN: '', + } + try { + await synapse.storage.createContexts({ + count: 2, + providerIds: [Mocks.PROVIDERS.provider1.providerId, Mocks.PROVIDERS.provider1.providerId], + metadata, + }) + assert.fail('Expected createContexts to throw for count mismatch') + } catch (error: any) { + assert.include(error.message, 'Requested 2 context(s)') + assert.include(error.message, 'resolved to 1 after deduplication') + } + }) + + it('throws when dataSetIds resolve to duplicate providers', async () => { + // Override getDataSet so both dataSetId 1 and 2 resolve to providerId 1 + server.use( + Mocks.JSONRPC({ + ...Mocks.presets.basic, + warmStorageView: { + ...Mocks.presets.basic.warmStorageView, + getDataSet: (args: readonly [bigint]) => { + const [dataSetId] = args + return [ + { + cacheMissRailId: 0n, + cdnRailId: 0n, + clientDataSetId: 0n, + commissionBps: 100n, + dataSetId, + payee: Mocks.ADDRESSES.serviceProvider1, + payer: Mocks.ADDRESSES.client1, + pdpEndEpoch: 0n, + pdpRailId: dataSetId, + providerId: 1n, // Same provider for both + serviceProvider: Mocks.ADDRESSES.serviceProvider1, + cdnEndEpoch: 0n, + }, + ] + }, + }, + }) + ) + try { + await synapse.storage.createContexts({ + dataSetIds: [1n, 2n], + }) + assert.fail('Expected error for duplicate providers') + } catch (error: any) { + assert.include(error.message, 'dataSetIds resolve to duplicate providers') + } }) it('throws when both dataSetIds and providerIds are specified', async () => { diff --git a/packages/synapse-sdk/src/types.ts b/packages/synapse-sdk/src/types.ts index 27b89f90..abc76562 100644 --- a/packages/synapse-sdk/src/types.ts +++ b/packages/synapse-sdk/src/types.ts @@ -236,10 +236,12 @@ export interface StorageContextCallbacks { onProviderSelected?: (provider: PDPProvider) => void /** - * Called when data set resolution is complete - * @param info - Information about the resolved data set + * Called when an existing data set is matched during provider selection. + * Not called when a new data set will be created (dataSetId is null on the + * resolution result); the data set ID is assigned during commit. + * @param info - The matched data set and its provider */ - onDataSetResolved?: (info: { isExisting: boolean; dataSetId: bigint; provider: PDPProvider }) => void + onDataSetResolved?: (info: { dataSetId: bigint; provider: PDPProvider }) => void } /** @@ -458,9 +460,10 @@ export interface StoreResult { } /** - * Source for pulling pieces from another provider + * Source for pulling pieces from another provider. + * Either a base URL string or a function that returns a piece URL for a given PieceCID. */ -export type PullSource = string | { getPieceUrl: (pieceCid: PieceCID) => string } +export type PullSource = string | ((pieceCid: PieceCID) => string) /** * Options for pulling pieces from a source provider @@ -634,10 +637,8 @@ export interface PieceStatus { export interface ProviderSelectionResult { /** Selected service provider */ provider: PDPProvider - /** Selected data set ID */ - dataSetId: bigint - /** Whether this is an existing data set */ - isExisting?: boolean + /** Selected data set ID, or null if a new data set will be created on commit */ + dataSetId: bigint | null /** Data set metadata */ dataSetMetadata: Record } diff --git a/packages/synapse-sdk/src/utils/index.ts b/packages/synapse-sdk/src/utils/index.ts index 28efe941..adcfc1b6 100644 --- a/packages/synapse-sdk/src/utils/index.ts +++ b/packages/synapse-sdk/src/utils/index.ts @@ -1,3 +1,3 @@ export * from './constants.ts' export { createError } from './errors.ts' -export { combineMetadata, metadataMatches } from './metadata.ts' +export { combineMetadata } from './metadata.ts' diff --git a/packages/synapse-sdk/src/utils/metadata.ts b/packages/synapse-sdk/src/utils/metadata.ts index 65e0f6e7..816956f2 100644 --- a/packages/synapse-sdk/src/utils/metadata.ts +++ b/packages/synapse-sdk/src/utils/metadata.ts @@ -1,39 +1,5 @@ import { METADATA_KEYS } from './constants.ts' -/** - * Checks if a data set's metadata exactly matches the requested metadata. - * - * The data set must contain exactly the same keys and values as requested. - * Order doesn't matter, but the sets must be identical. - * - * @param dataSetMetadata - The metadata from the data set - * @param requestedMetadata - The metadata requirements to match - * @returns true if metadata sets are exactly equal (same keys and values) - */ -export function metadataMatches( - dataSetMetadata: Record, - requestedMetadata: Record -): boolean { - const dataSetKeys = Object.keys(dataSetMetadata) - const requestedKeys = Object.keys(requestedMetadata) - - if (dataSetKeys.length !== requestedKeys.length) { - return false - } - - if (requestedKeys.length === 0) { - return true - } - - for (const key of requestedKeys) { - if (dataSetMetadata[key] !== requestedMetadata[key]) { - return false - } - } - - return true -} - /** * Combines metadata object with withCDN flag, ensuring consistent behavior. * If withCDN is true, adds the withCDN key only if not already present. diff --git a/utils/example-storage-e2e.js b/utils/example-storage-e2e.js index 79b8a934..777fe90a 100644 --- a/utils/example-storage-e2e.js +++ b/utils/example-storage-e2e.js @@ -117,10 +117,10 @@ async function main() { const result = await synapse.storage.upload(fileStream, { callbacks: { onProviderSelected: (provider) => { - console.log(` Selected provider: ${provider.serviceProvider}`) + console.log(` Selected SP ${provider.id} (${provider.serviceProvider})`) }, onDataSetResolved: (info) => { - const verb = info.isExisting ? 'Using existing' : 'Created new' + const verb = info.isExisting ? 'Using existing' : 'Creating new' console.log(` ${verb} data set: ${info.dataSetId}`) }, onProgress: (bytesUploaded) => { @@ -131,16 +131,25 @@ async function main() { } }, onStored: (providerId, pieceCid) => { - console.log(` Stored on provider ${providerId}: ${pieceCid}`) + console.log(` Stored on SP ${providerId}: ${pieceCid}`) + }, + onPullProgress: (providerId, pieceCid, status) => { + console.log(` Pulling to SP ${providerId}: ${pieceCid} (${status})`) + }, + onCopyComplete: (providerId, pieceCid) => { + console.log(` Copied to SP ${providerId}: ${pieceCid}`) + }, + onCopyFailed: (providerId, pieceCid, error) => { + console.log(` Copy failed on SP ${providerId}: ${pieceCid} - ${error.message}`) }, onPiecesAdded: (transaction, providerId, pieces) => { - console.log(` Pieces added for provider ${providerId}, tx: ${transaction}`) + console.log(` Pieces committed on SP ${providerId}, tx: ${transaction}`) for (const { pieceCid } of pieces) { console.log(` ${pieceCid}`) } }, onPiecesConfirmed: (dataSetId, providerId, pieces) => { - console.log(` Data set ${dataSetId} confirmed on provider ${providerId}`) + console.log(` Data set ${dataSetId} confirmed on SP ${providerId}`) for (const { pieceCid, pieceId } of pieces) { console.log(` ${pieceCid} -> pieceId ${pieceId}`) } @@ -163,10 +172,10 @@ async function main() { const contexts = await synapse.storage.createContexts({ callbacks: { onProviderSelected: (provider) => { - console.log(` Selected provider: ${provider.serviceProvider}`) + console.log(` Selected SP ${provider.id} (${provider.serviceProvider})`) }, onDataSetResolved: (info) => { - const verb = info.isExisting ? 'Using existing' : 'Created new' + const verb = info.isExisting ? 'Using existing' : 'Creating new' console.log(` ${verb} data set: ${info.dataSetId}`) }, }, @@ -336,7 +345,7 @@ function formatUSDFC(amount) { * Devnet mode (NETWORK=devnet): * Loads chain config from foc-devnet's devnet-info.json. PRIVATE_KEY is * optional - defaults to the first devnet user. - * - DEVNET: Path to devnet-info.json + * - DEVNET_INFO_PATH: Path to devnet-info.json * (default: ~/.foc-devnet/state/latest/devnet-info.json) * - DEVNET_USER_INDEX: Which user from devnet info (default: 0) * @@ -363,7 +372,7 @@ async function resolveConfig() { const { validateDevnetInfo, toChain } = await import('../packages/synapse-core/src/devnet/index.ts') const devnetInfoPath = - process.env.DEVNET || join(homedir(), '.foc-devnet', 'state', 'latest', 'devnet-info.json') + process.env.DEVNET_INFO_PATH || join(homedir(), '.foc-devnet', 'state', 'latest', 'devnet-info.json') const userIndex = Number(process.env.DEVNET_USER_INDEX || '0') console.log(`Loading devnet info from: ${devnetInfoPath}`)