diff --git a/FORK_NOTES.md b/FORK_NOTES.md new file mode 100644 index 0000000..042aa87 --- /dev/null +++ b/FORK_NOTES.md @@ -0,0 +1,62 @@ +# Fork Notes + +This file documents fork-specific behavior added on top of upstream v3.x. + +## Scope + +This fork focuses on Web UI storage behavior and cache management across multiple layouts. + +## Storage Modes in Web UI (serve) + +The server supports three default storage modes for Web UI downloads: + +- cache (default) + - HuggingFace-compatible cache layout with hub plus friendly view + - Best for Python ecosystem compatibility +- flat + - Plain files directly under the configured cache root + - No symlinks +- flat-structured + - Plain files under /// + - No symlinks + +Set default mode at startup: + +- hfdownloader serve +- hfdownloader serve --flat +- hfdownloader serve --flat-structured + +The Settings page can also update cache directory and default storage mode. + +## Analyze and Download Behavior + +Analyze and Download actions in the Web UI respect the selected storage mode. +The command preview on Analyze includes storage-mode flags so displayed commands match runtime behavior. + +## Cache Page Coverage + +The Cache page and Cache API discover repos across all supported layouts: + +- HuggingFace cache layout (hub/models--..., hub/datasets--...) +- flat-structured layout (//) +- flat-mode indexed downloads via .hfd-flat-index manifests + +## Deletion Behavior + +Delete from Cache supports all layouts above and includes cleanup for partial/interrupted downloads. + +- flat-mode delete removes indexed files and multipart artifacts (.part and .part-*) +- flat-structured and cache-mode delete remove repository directories with safety checks + +## Flat-Mode Filename Rules + +To reduce root-level collisions in flat mode: + +- .gitattributes is skipped +- README.md is renamed to .README.md +- generic root artifacts like mmproj* and imatrix* are prefixed with . + +## Compatibility Notes + +- Empty top-level cache directories (for example hub after delete) may remain and are generally harmless. +- Existing flat downloads created before flat indexing was introduced may not appear until re-downloaded. diff --git a/README.md b/README.md index f7cc4e4..4d9fd0a 100644 --- a/README.md +++ b/README.md @@ -282,6 +282,25 @@ hfdownloader download TheBloke/Mistral-7B-Instruct-v0.2-GGUF \ Both spellings are interchangeable; pick whichever reads better in your scripts. They are mutually exclusive on a single command line. +### Web UI Storage Modes (serve) + +When using the Web UI, the default storage mode is controlled by `serve` +flags and can be changed from Settings: + +```bash +# Default: HuggingFace cache layout (hub/ + friendly view) +hfdownloader serve + +# Flat: write files directly to cache root (no hub/, no symlinks) +hfdownloader serve --flat + +# Flat-structured: write files to /// +hfdownloader serve --flat-structured +``` + +The Web UI Settings page stores both the cache directory and default storage +mode, and Analyze/Download actions follow that mode. + ### Manifest Tracking Every download creates `hfd.yaml` so you know exactly what you have: @@ -332,7 +351,7 @@ Browse everything you've downloaded with stats, search, and filters: | **Jobs** | Real-time WebSocket progress, pause/resume/cancel, download history | | **Cache** | Browse downloaded repos, disk usage stats, search & filter | | **Mirror** | Configure targets, compare differences, push/pull sync | -| **Settings** | Token, connections, proxy, verification mode | +| **Settings** | Token, cache directory, default storage mode, connections, proxy, verification mode | ### Server Options diff --git a/docs/API.md b/docs/API.md index 8d64105..9623c05 100644 --- a/docs/API.md +++ b/docs/API.md @@ -119,6 +119,7 @@ Start a new download job. | `excludes` | string[] | No | `[]` | Exclude patterns | | `appendFilterSubdir` | boolean | No | `false` | Create filter subdirs | | `dryRun` | boolean | No | `false` | Plan only | +| `storageMode` | string | No | `cache` | Storage mode: `cache`, `flat`, `flat-structured` | **Filter Syntax** @@ -147,7 +148,7 @@ Or as separate field: "isDataset": false, "filters": ["q4_k_m"], "excludes": [], - "outputDir": "/home/user/.cache/huggingface/hub", + "outputDir": "/home/user/.cache/huggingface", "status": "queued", "progress": { "totalFiles": 0, @@ -208,7 +209,7 @@ Get download plan without starting download. **Request Body** -Same as `/api/download`. +Same as `/api/download`, including optional `storageMode`. **Response** `200 OK` @@ -261,7 +262,7 @@ List all download jobs. "isDataset": false, "filters": ["q4_k_m"], "excludes": [], - "outputDir": "/home/user/.cache/huggingface/hub", + "outputDir": "/home/user/.cache/huggingface", "status": "running", "progress": { "totalFiles": 3, @@ -462,7 +463,8 @@ Get current server settings. ```json { "token": "********mnop", - "cacheDir": "/home/user/.cache/huggingface/hub", + "cacheDir": "/home/user/.cache/huggingface", + "storageMode": "cache", "connections": 8, "maxActive": 3, "multipartThreshold": "32MiB", @@ -491,6 +493,8 @@ Update server settings. | Field | Type | Description | |-------|------|-------------| | `token` | string | HuggingFace token | +| `cacheDir` | string | Cache root directory used by server | +| `storageMode` | string | Default mode: `cache`, `flat`, `flat-structured` | | `connections` | integer | Connections per file | | `maxActive` | integer | Max concurrent downloads | | `multipartThreshold` | string | Min size for multipart | @@ -498,7 +502,6 @@ Update server settings. | `retries` | integer | Retry attempts | **Security Restrictions** -- `cacheDir` cannot be changed via API - `modelsDir` cannot be changed via API - `datasetsDir` cannot be changed via API @@ -518,6 +521,8 @@ curl -X POST http://localhost:8080/api/settings \ -H "Content-Type: application/json" \ -d '{ "token": "hf_xxxxx", + "cacheDir": "/home/user/ai_models", + "storageMode": "flat-structured", "connections": 16, "maxActive": 8 }' @@ -644,6 +649,12 @@ curl "http://localhost:8080/api/analyze/owner/repo?revision=v1.0" List cached repositories. +This endpoint scans all supported storage layouts: + +- HuggingFace cache layout (`hub/models--*`, `hub/datasets--*`) +- Flat-structured layout (`///`) +- Flat-mode indexed downloads (`/.hfd-flat-index/*.yaml`) + **Query Parameters** | Parameter | Type | Description | @@ -666,8 +677,17 @@ List cached repositories. "path": "/home/user/.cache/huggingface/hub/datasets--facebook--flores" } ], - "count": 2, - "cacheDir": "/home/user/.cache/huggingface/hub" + "stats": { + "totalModels": 1, + "totalDatasets": 1, + "totalSize": 123456789, + "totalSizeHuman": "117.7 MiB", + "totalFiles": 42 + }, + "cacheDir": "/home/user/.cache/huggingface", + "scannedCacheDirs": [ + "/home/user/.cache/huggingface" + ] } ``` diff --git a/docs/CLI.md b/docs/CLI.md index 7aa3cd2..5e0e632 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -246,6 +246,8 @@ hfdownloader serve [flags] | `--verify` | | string | `size` | Verification mode | | `--retries` | | int | `4` | Retry attempts | | `--endpoint` | | string | | Custom HF endpoint | +| `--flat` | | bool | `false` | Default Web UI storage mode: flat files at cache root | +| `--flat-structured` | | bool | `false` | Default Web UI storage mode: files under `///` | | `--auth-user` | | string | | Basic auth username | | `--auth-pass` | | string | | Basic auth password | | `--models-dir` | | string | `./Models` | Legacy models directory | @@ -271,8 +273,24 @@ hfdownloader serve --endpoint https://hf-mirror.com # High-performance settings hfdownloader serve -c 16 --max-active 8 + +# Default Web UI downloads to flat files at cache root +hfdownloader serve --flat + +# Default Web UI downloads to owner/repo directories +hfdownloader serve --flat-structured ``` +#### Storage Mode Defaults + +`serve` controls the default storage mode used by the Web UI: + +- Cache mode (default): HF-compatible `hub/` layout plus friendly symlinks +- Flat mode (`--flat`): plain files directly under cache root +- Flat-structured mode (`--flat-structured`): plain files under `///` + +Users can change this default later in the Web UI Settings page. + #### Server Features - **Web UI** at `http://localhost:8080` diff --git a/internal/assets/static/css/style.css b/internal/assets/static/css/style.css index 860d887..e4a3e73 100644 --- a/internal/assets/static/css/style.css +++ b/internal/assets/static/css/style.css @@ -376,6 +376,21 @@ a { border: 1px solid var(--color-border); border-radius: var(--radius-lg); overflow: hidden; + display: flex; + flex-direction: column; +} + +.card:has(.card-body.scrollable) { + overflow: visible; +} + +.card { + background: var(--gradient-surface); + border: 1px solid var(--color-border); + border-radius: var(--radius-lg); + overflow: hidden; + display: flex; + flex-direction: column; } .card-header { @@ -384,6 +399,7 @@ a { gap: 12px; padding: 20px 24px; border-bottom: 1px solid var(--color-border); + flex-shrink: 0; } .card-header h2, .card-header h3 { @@ -399,6 +415,39 @@ a { .card-body { padding: 24px; + flex: 1; +} + +.card-body.scrollable { + max-height: 500px; + overflow-y: auto; + overflow-x: hidden; + flex: 1; + min-height: 0; +} + +.card-body.scrollable::-webkit-scrollbar { + width: 8px; +} + +.card-body.scrollable::-webkit-scrollbar-track { + background: rgba(255, 255, 255, 0.05); + border-radius: 4px; +} + +.card-body.scrollable::-webkit-scrollbar-thumb { + background: rgba(255, 255, 255, 0.2); + border-radius: 4px; +} + +.card-body.scrollable::-webkit-scrollbar-thumb:hover { + background: rgba(255, 255, 255, 0.3); +} + +/* Firefox scrollbar */ +.card-body.scrollable { + scrollbar-width: thin; + scrollbar-color: rgba(255, 255, 255, 0.2) rgba(255, 255, 255, 0.05); } /* ============================================ diff --git a/internal/assets/static/index.html b/internal/assets/static/index.html index b52f571..a843bb1 100644 --- a/internal/assets/static/index.html +++ b/internal/assets/static/index.html @@ -177,6 +177,14 @@

Model

+
+ + +
@@ -212,6 +220,14 @@

Dataset

+
+ + +
@@ -597,16 +613,27 @@

Settings

Storage

-
+
- -
+ +

- Downloads use the standard HuggingFace cache structure for Python compatibility. + Directory where HuggingFace cache structure will be created (equivalent to HF_HOME environment variable). + Changes are saved to config file.

-

- To change: Set the HF_HOME environment variable before starting the server, - or use --cache-dir when launching hfdownloader serve. +

+
+ + +

+ Default: HF Cache (HuggingFace cache structure)
+ Flat: Files saved directly to output directory
+ Flat Structured: Files in owner/model subdirectories
+ Server CLI flags --flat or --flat-structured override this setting while the server is running.

@@ -691,7 +718,7 @@

Advanced

Proxy Settings

-
+
diff --git a/internal/assets/static/js/app.js b/internal/assets/static/js/app.js index fd87ad7..65554bd 100644 --- a/internal/assets/static/js/app.js +++ b/internal/assets/static/js/app.js @@ -627,11 +627,19 @@ const selectedQuants = Array.from(document.querySelectorAll('#quantOptions input[type="checkbox"]:checked')) .map(cb => cb.dataset.filter); - let cmd = `hfdownloader -r ${currentAnalysis.repo}`; + let cmd = `hfdownloader download ${currentAnalysis.repo}`; // Add dataset flag if (currentAnalysis.is_dataset) { - cmd += ' -d'; + cmd += ' --dataset'; + } + + // Add storage mode flags + const storageMode = state.settings.storageMode || 'cache'; + if (storageMode === 'flat') { + cmd += ' --flat'; + } else if (storageMode === 'flat-structured') { + cmd += ' --flat-structured'; } // Add revision if not main (from analysis) @@ -641,9 +649,9 @@ // Add filters - either from GGUF selection or advanced options if (selectedQuants.length > 0 && selectedQuants.length < (currentAnalysis.gguf?.quantizations?.length || 0)) { - cmd += ` -f "${selectedQuants.join(',')}"`; + cmd += ` -F "${selectedQuants.join(',')}"`; } else if (advancedOptions.filter) { - cmd += ` -f "${advancedOptions.filter}"`; + cmd += ` -F "${advancedOptions.filter}"`; } // Add excludes @@ -735,7 +743,8 @@ revision: currentAnalysis?.branch || 'main', dataset: isDataset, filters, - excludes + excludes, + storageMode: state.settings.storageMode || 'cache' }; await api('POST', '/download', body); @@ -748,13 +757,16 @@ // Make downloadFromAnalysis available globally window.downloadFromAnalysis = function(repo, isDataset) { - if (isDataset) { - $('#datasetRepo').value = repo; - navigateTo('download'); - } else { - $('#modelRepo').value = repo; - navigateTo('download'); - } + const prefix = isDataset ? 'dataset' : 'model'; + $(`#${prefix}Repo`).value = repo; + + // Set storage mode from current settings + const storageModeEl = $(`#${prefix}StorageMode`); + if (storageModeEl && state.settings.storageMode) { + storageModeEl.value = state.settings.storageMode; + } + + navigateTo('download'); }; // ========================================= @@ -787,6 +799,7 @@ const revision = $(`#${prefix}Revision`)?.value.trim() || 'main'; const filter = $(`#${prefix}Filter`)?.value.trim(); const exclude = $(`#${prefix}Exclude`)?.value.trim(); + const storageMode = $(`#${prefix}StorageMode`)?.value || 'cache'; if (!repo) { showToast('Please enter a repository', 'error'); @@ -798,7 +811,8 @@ revision, dataset: isDataset, filters: filter ? filter.split(',').map(s => s.trim()).filter(Boolean) : [], - excludes: exclude ? exclude.split(',').map(s => s.trim()).filter(Boolean) : [] + excludes: exclude ? exclude.split(',').map(s => s.trim()).filter(Boolean) : [], + storageMode: storageMode }; try { @@ -826,6 +840,7 @@ dataset: isDataset, filters: ($(`#${prefix}Filter`)?.value || '').split(',').map(s => s.trim()).filter(Boolean), excludes: ($(`#${prefix}Exclude`)?.value || '').split(',').map(s => s.trim()).filter(Boolean), + storageMode: $(`#${prefix}StorageMode`)?.value || 'cache', dryRun: true }; @@ -1133,7 +1148,7 @@ // Cache Page // ========================================= - let cacheData = { repos: [], stats: {}, cacheDir: '' }; + let cacheData = { repos: [], stats: {}, cacheDir: '', scannedCacheDirs: [] }; let cacheFilter = 'all'; let cacheSort = 'name'; let cacheView = 'grid'; @@ -1219,7 +1234,9 @@

${cacheData.repos?.length === 0 ? 'Cache is Empty' : 'No Results'}

${message}

- ${cacheData.cacheDir ? `

Cache: ${escapeHtml(cacheData.cacheDir)}

` : ''} + ${(cacheData.scannedCacheDirs && cacheData.scannedCacheDirs.length > 0) + ? `

Scanned cache roots: ${cacheData.scannedCacheDirs.map(d => escapeHtml(d)).join(' | ')}

` + : (cacheData.cacheDir ? `

Cache: ${escapeHtml(cacheData.cacheDir)}

` : '')}
`; return; @@ -1676,10 +1693,15 @@ const data = await api('GET', '/settings'); state.settings = data; - // Display cache directory (read-only) + // Display cache directory (now editable) const cacheDirEl = $('#cacheDir'); - if (cacheDirEl) { - cacheDirEl.textContent = data.cacheDir || '~/.cache/huggingface'; + if (cacheDirEl && cacheDirEl.tagName === 'INPUT') { + cacheDirEl.value = data.cacheDir || '~/.cache/huggingface'; + } + + const storageModeEl = $('#storageMode'); + if (storageModeEl) { + storageModeEl.value = data.storageMode || 'cache'; } // Display config file paths @@ -1736,7 +1758,23 @@ } async function saveSettings() { + const currentCacheDir = state.settings?.cacheDir || ''; + const newCacheDir = ($('#cacheDir')?.value || '').trim(); + if (currentCacheDir && newCacheDir && currentCacheDir !== newCacheDir) { + const activeJobs = Array.from(state.jobs.values()).filter(j => + j && (j.status === 'queued' || j.status === 'running' || j.status === 'paused') + ).length; + const warningMsg = activeJobs > 0 + ? `You have ${activeJobs} active/pending job(s). Changing cache directory now may leave partial/incomplete files in the old cache path. Continue?` + : 'Changing cache directory may leave old partial/incomplete downloads in the previous cache path. Continue?'; + if (!window.confirm(warningMsg)) { + return; + } + } + const body = { + cacheDir: $('#cacheDir')?.value || '', + storageMode: $('#storageMode')?.value || 'cache', token: $('#hfToken')?.value || '', connections: parseInt($('#connections')?.value) || 8, maxActive: parseInt($('#maxActive')?.value) || 3, @@ -1763,6 +1801,32 @@ try { const result = await api('POST', '/settings', body); + + // Keep client state in sync immediately so Analyze/Download pages + // use the new storage mode without requiring a full page refresh. + state.settings = { + ...state.settings, + ...body, + cacheDir: body.cacheDir, + storageMode: body.storageMode + }; + + // Sync download form storage mode selectors to the updated default. + const modelStorageModeEl = $('#modelStorageMode'); + if (modelStorageModeEl) { + modelStorageModeEl.value = state.settings.storageMode || 'cache'; + } + const datasetStorageModeEl = $('#datasetStorageMode'); + if (datasetStorageModeEl) { + datasetStorageModeEl.value = state.settings.storageMode || 'cache'; + } + + // If Analyze page is active, refresh command previews immediately. + if (currentAnalysis) { + updateCLICommandFromSelections(); + updateDownloadCommand(); + } + showToast(result.message || 'Settings saved', 'success'); // Clear password field after save if ($('#proxyPassword')) { @@ -2517,6 +2581,14 @@ cmd += ' --dataset'; } + // Add storage mode flags + const storageMode = state.settings.storageMode || 'cache'; + if (storageMode === 'flat') { + cmd += ' --flat'; + } else if (storageMode === 'flat-structured') { + cmd += ' --flat-structured'; + } + if (currentAnalysis.branch && currentAnalysis.branch !== 'main') { cmd += ` -b ${currentAnalysis.branch}`; } @@ -2666,6 +2738,7 @@ initModal(); // Load initial data + loadSettings(); // Load settings early so storage mode is available for downloads loadJobs(); } diff --git a/internal/cli/serve.go b/internal/cli/serve.go index 44bb6ca..2cd1c3d 100644 --- a/internal/cli/serve.go +++ b/internal/cli/serve.go @@ -31,6 +31,8 @@ func newServeCmd(ro *RootOpts) *cobra.Command { endpoint string authUser string authPass string + flat bool + flatStructured bool ) cmd := &cobra.Command{ @@ -51,6 +53,10 @@ Examples: hfdownloader serve --auth-user admin --auth-pass secret # With authentication hfdownloader serve --endpoint https://hf-mirror.com # Use mirror`, RunE: func(cmd *cobra.Command, args []string) error { + if flat && flatStructured { + return fmt.Errorf("--flat and --flat-structured cannot be used together") + } + // Build server config from CLI flags cfg := server.Config{ Addr: addr, @@ -67,6 +73,15 @@ Examples: fmt.Fprintf(os.Stderr, "Warning: could not load config file: %v\n", err) } + // Determine storage mode. CLI flags override config file. + if flatStructured { + cfg.StorageMode = server.StorageModeFlatStructured + } else if flat { + cfg.StorageMode = server.StorageModeFlat + } else if cfg.StorageMode == "" { + cfg.StorageMode = server.StorageModeCache + } + // Then override with CLI flags if explicitly set if cmd.Flags().Changed("connections") { cfg.Concurrency = conns @@ -136,6 +151,10 @@ Examples: cmd.Flags().IntVar(&retries, "retries", 4, "Max retry attempts per HTTP request") cmd.Flags().StringVar(&endpoint, "endpoint", "", "Custom HuggingFace endpoint URL (e.g., https://hf-mirror.com)") + // Storage modes + cmd.Flags().BoolVar(&flat, "flat", false, "Download files directly to output directory (flat file mode)") + cmd.Flags().BoolVar(&flatStructured, "flat-structured", false, "Download files in owner/model directory structure (flat mode)") + // Authentication cmd.Flags().StringVar(&authUser, "auth-user", "", "Username for basic auth (enables auth when set)") cmd.Flags().StringVar(&authPass, "auth-pass", "", "Password for basic auth") diff --git a/internal/server/api.go b/internal/server/api.go index b6d2d1d..3a1c189 100644 --- a/internal/server/api.go +++ b/internal/server/api.go @@ -21,13 +21,14 @@ import ( // Note: Output path is NOT configurable via API for security reasons. // The server uses its configured OutputDir (Models/ for models, Datasets/ for datasets). type DownloadRequest struct { - Repo string `json:"repo"` - Revision string `json:"revision,omitempty"` - Dataset bool `json:"dataset,omitempty"` + Repo string `json:"repo"` + Revision string `json:"revision,omitempty"` + Dataset bool `json:"dataset,omitempty"` Filters []string `json:"filters,omitempty"` Excludes []string `json:"excludes,omitempty"` - AppendFilterSubdir bool `json:"appendFilterSubdir,omitempty"` - DryRun bool `json:"dryRun,omitempty"` + AppendFilterSubdir bool `json:"appendFilterSubdir,omitempty"` + DryRun bool `json:"dryRun,omitempty"` + StorageMode string `json:"storageMode,omitempty"` // "cache" (default), "flat", or "flat-structured" } // PlanResponse is the response for a dry-run/plan request. @@ -50,6 +51,7 @@ type PlanFile struct { type SettingsResponse struct { Token string `json:"token,omitempty"` CacheDir string `json:"cacheDir"` + StorageMode string `json:"storageMode"` Concurrency int `json:"connections"` MaxActive int `json:"maxActive"` MultipartThreshold string `json:"multipartThreshold"` @@ -202,16 +204,48 @@ func (s *Server) handlePlanInternal(w http.ResponseWriter, req DownloadRequest) AppendFilterSubdir: req.AppendFilterSubdir, } - // Use server-configured output directory (not from request for security) - outputDir := s.config.ModelsDir - if req.Dataset { - outputDir = s.config.DatasetsDir + // Determine output directory based on storage mode + storageMode := req.StorageMode + if storageMode == "" { + storageMode = string(s.config.StorageMode) + } + if storageMode == "" { + storageMode = string(StorageModeCache) + } + + // Get base cache directory + baseDir := s.config.CacheDir + if baseDir == "" { + baseDir = hfdownloader.DefaultCacheDir() + } + + var outputDir string + switch storageMode { + case string(StorageModeFlat): + // Flat file mode: files go directly to base directory + outputDir = baseDir + case string(StorageModeFlatStructured): + // Flat structured mode: base path is baseDir; downloader appends owner/model once + outputDir = baseDir + default: + // Cache mode (default): use the cache directory itself (hfdownloader will create hub/models--) + outputDir = baseDir } settings := hfdownloader.Settings{ - OutputDir: outputDir, - Token: s.config.Token, - Endpoint: s.config.Endpoint, + Token: s.config.Token, + Endpoint: s.config.Endpoint, + } + + // Set output based on storage mode + if storageMode == string(StorageModeFlat) || storageMode == string(StorageModeFlatStructured) { + // Flat modes: use OutputDir directly, no HF cache structure + settings.OutputDir = outputDir + settings.CacheDir = "" + settings.NoRepoSubdir = storageMode == string(StorageModeFlat) + } else { + // Cache mode: use CacheDir (HuggingFace cache structure) + settings.CacheDir = outputDir } // Collect plan items @@ -369,9 +403,15 @@ func (s *Server) handleGetSettings(w http.ResponseWriter, r *http.Request) { cacheDir = hfdownloader.DefaultCacheDir() } + storageMode := string(s.config.StorageMode) + if storageMode == "" { + storageMode = string(StorageModeCache) + } + resp := SettingsResponse{ Token: tokenStatus, CacheDir: cacheDir, + StorageMode: storageMode, Concurrency: s.config.Concurrency, MaxActive: s.config.MaxActive, MultipartThreshold: s.config.MultipartThreshold, @@ -401,6 +441,8 @@ func (s *Server) handleGetSettings(w http.ResponseWriter, r *http.Request) { func (s *Server) handleUpdateSettings(w http.ResponseWriter, r *http.Request) { var req struct { Token *string `json:"token,omitempty"` + CacheDir *string `json:"cacheDir,omitempty"` + StorageMode *string `json:"storageMode,omitempty"` Concurrency *int `json:"connections,omitempty"` MaxActive *int `json:"maxActive,omitempty"` MultipartThreshold *string `json:"multipartThreshold,omitempty"` @@ -428,6 +470,23 @@ func (s *Server) handleUpdateSettings(w http.ResponseWriter, r *http.Request) { if req.Token != nil { s.config.Token = *req.Token } + if req.CacheDir != nil && *req.CacheDir != "" { + newCacheDir := strings.TrimSpace(*req.CacheDir) + if newCacheDir != "" && newCacheDir != s.config.CacheDir { + oldCacheDir := s.config.CacheDir + if oldCacheDir == "" { + oldCacheDir = hfdownloader.DefaultCacheDir() + } + s.config.PreviousCacheDirs = addRecentPath(s.config.PreviousCacheDirs, oldCacheDir, 5) + s.config.CacheDir = newCacheDir + } + } + if req.StorageMode != nil { + switch *req.StorageMode { + case string(StorageModeCache), string(StorageModeFlat), string(StorageModeFlatStructured): + s.config.StorageMode = StorageMode(*req.StorageMode) + } + } if req.Concurrency != nil && *req.Concurrency > 0 { s.config.Concurrency = *req.Concurrency } @@ -481,6 +540,9 @@ func (s *Server) handleUpdateSettings(w http.ResponseWriter, r *http.Request) { // Persist settings to config file fileCfg := &ConfigFile{ + CacheDir: s.config.CacheDir, + PreviousCacheDirs: append([]string(nil), s.config.PreviousCacheDirs...), + StorageMode: string(s.config.StorageMode), Token: s.config.Token, Connections: s.config.Concurrency, MaxActive: s.config.MaxActive, @@ -633,88 +695,93 @@ type CacheStats struct { // handleCacheList lists all cached repositories with rich metadata. func (s *Server) handleCacheList(w http.ResponseWriter, r *http.Request) { - cacheDir := s.config.CacheDir - if cacheDir == "" { - cacheDir = hfdownloader.DefaultCacheDir() + cacheRoots := cacheScanRoots(s.config) + if len(cacheRoots) == 0 { + cacheRoots = []string{hfdownloader.DefaultCacheDir()} } // Get query params repoType := r.URL.Query().Get("type") // "model" or "dataset" search := strings.ToLower(r.URL.Query().Get("search")) - cache := hfdownloader.NewHFCache(cacheDir, 0) - repoDirs, err := cache.ListRepos() - if err != nil { - writeError(w, http.StatusInternalServerError, "Failed to list cache", err.Error()) - return - } - var repos []CachedRepoInfo var stats CacheStats + seen := map[string]struct{}{} - for _, rd := range repoDirs { + for _, root := range cacheRoots { + cache := hfdownloader.NewHFCache(root, 0) + repoDirs, err := cache.ListRepos() + if err != nil { + repoDirs = nil + } + + for _, rd := range repoDirs { rdType := string(rd.Type()) repoID := rd.RepoID() + key := rdType + "|" + repoID + if _, ok := seen[key]; ok { + continue + } - // Filter by type if specified - if repoType != "" { - if repoType == "dataset" && rdType != "dataset" { - continue + // Filter by type if specified + if repoType != "" { + if repoType == "dataset" && rdType != "dataset" { + continue + } + if repoType == "model" && rdType != "model" { + continue + } } - if repoType == "model" && rdType != "model" { + + // Filter by search term + if search != "" && !strings.Contains(strings.ToLower(repoID), search) { continue } - } - // Filter by search term - if search != "" && !strings.Contains(strings.ToLower(repoID), search) { - continue - } + // Get size by walking blobs directory + blobsDir := rd.BlobsDir() + var totalSize int64 + var fileCount int + filepath.Walk(blobsDir, func(path string, info os.FileInfo, err error) error { + if err == nil && !info.IsDir() && !strings.HasSuffix(path, ".incomplete") && !strings.HasSuffix(path, ".meta") { + totalSize += info.Size() + fileCount++ + } + return nil + }) - // Get size by walking blobs directory - blobsDir := rd.BlobsDir() - var totalSize int64 - var fileCount int - filepath.Walk(blobsDir, func(path string, info os.FileInfo, err error) error { - if err == nil && !info.IsDir() && !strings.HasSuffix(path, ".incomplete") && !strings.HasSuffix(path, ".meta") { - totalSize += info.Size() - fileCount++ + // Update stats + if rdType == "model" { + stats.TotalModels++ + } else { + stats.TotalDatasets++ } - return nil - }) - - // Update stats - if rdType == "model" { - stats.TotalModels++ - } else { - stats.TotalDatasets++ - } - stats.TotalSize += totalSize - stats.TotalFiles += fileCount - - // Try to read commit from refs/main - branch := "main" - commit, _ := rd.ReadRef("main") - if commit == "" { - // Try other common refs - commit, _ = rd.ReadRef("master") - if commit != "" { - branch = "master" + stats.TotalSize += totalSize + stats.TotalFiles += fileCount + + // Try to read commit from refs/main + branch := "main" + commit, _ := rd.ReadRef("main") + if commit == "" { + // Try other common refs + commit, _ = rd.ReadRef("master") + if commit != "" { + branch = "master" + } } - } - // Get modification time from blobs dir - var downloaded string - if info, err := os.Stat(blobsDir); err == nil { - downloaded = info.ModTime().Format("2006-01-02") - } + // Get modification time from blobs dir + var downloaded string + if info, err := os.Stat(blobsDir); err == nil { + downloaded = info.ModTime().Format("2006-01-02") + } - // Try to read manifest from friendly path - var manifest *ManifestInfo - var downloadStatus string - friendlyPath := rd.FriendlyPath() - manifestPath := filepath.Join(friendlyPath, hfdownloader.ManifestFilename) - if m, err := hfdownloader.ReadManifest(manifestPath); err == nil { + // Try to read manifest from friendly path + var manifest *ManifestInfo + var downloadStatus string + friendlyPath := rd.FriendlyPath() + manifestPath := filepath.Join(friendlyPath, hfdownloader.ManifestFilename) + if m, err := hfdownloader.ReadManifest(manifestPath); err == nil { // Parse command for filter flags isFiltered, filters := parseCommandFilters(m.Command) @@ -743,42 +810,61 @@ func (s *Server) handleCacheList(w http.ResponseWriter, r *http.Request) { } else { downloadStatus = "complete" } - } else { - // No manifest - either downloaded by Python or external tool - downloadStatus = "unknown" - } + } else { + // No manifest - either downloaded by Python or external tool + downloadStatus = "unknown" + } - // Shorten commit hash - shortCommit := commit - if len(shortCommit) > 7 { - shortCommit = shortCommit[:7] - } + // Shorten commit hash + shortCommit := commit + if len(shortCommit) > 7 { + shortCommit = shortCommit[:7] + } - repo := CachedRepoInfo{ - Repo: repoID, - Owner: rd.Owner(), - Name: rd.Name(), - Type: rdType, - Path: rd.Path(), - FriendlyPath: friendlyPath, - Size: totalSize, - SizeHuman: humanSizeBytes(totalSize), - FileCount: fileCount, - Branch: branch, - Commit: shortCommit, - Downloaded: downloaded, - DownloadStatus: downloadStatus, - Manifest: manifest, + repo := CachedRepoInfo{ + Repo: repoID, + Owner: rd.Owner(), + Name: rd.Name(), + Type: rdType, + Path: rd.Path(), + FriendlyPath: friendlyPath, + Size: totalSize, + SizeHuman: humanSizeBytes(totalSize), + FileCount: fileCount, + Branch: branch, + Commit: shortCommit, + Downloaded: downloaded, + DownloadStatus: downloadStatus, + Manifest: manifest, + } + repos = append(repos, repo) + seen[key] = struct{}{} } - repos = append(repos, repo) + + // Also scan flat-structured repositories stored at // + flatRepos, flatStats := scanFlatStructuredRepos(root, repoType, search, seen) + repos = append(repos, flatRepos...) + stats.TotalModels += flatStats.TotalModels + stats.TotalDatasets += flatStats.TotalDatasets + stats.TotalSize += flatStats.TotalSize + stats.TotalFiles += flatStats.TotalFiles + + // Also scan flat-mode index manifests at /.hfd-flat-index/*.yaml + flatIndexedRepos, flatIndexedStats := scanFlatIndexedRepos(root, repoType, search, seen) + repos = append(repos, flatIndexedRepos...) + stats.TotalModels += flatIndexedStats.TotalModels + stats.TotalDatasets += flatIndexedStats.TotalDatasets + stats.TotalSize += flatIndexedStats.TotalSize + stats.TotalFiles += flatIndexedStats.TotalFiles } stats.TotalSizeHuman = humanSizeBytes(stats.TotalSize) writeJSON(w, http.StatusOK, map[string]any{ - "repos": repos, - "stats": stats, - "cacheDir": cacheDir, + "repos": repos, + "stats": stats, + "cacheDir": cacheRoots[0], + "scannedCacheDirs": cacheRoots, }) } @@ -790,28 +876,71 @@ func (s *Server) handleCacheInfo(w http.ResponseWriter, r *http.Request) { return } - cacheDir := s.config.CacheDir - if cacheDir == "" { - cacheDir = hfdownloader.DefaultCacheDir() + cacheRoots := cacheScanRoots(s.config) + if len(cacheRoots) == 0 { + cacheRoots = []string{hfdownloader.DefaultCacheDir()} } - cache := hfdownloader.NewHFCache(cacheDir, 0) + var repoDir *hfdownloader.RepoDir + found := false - // Try as model first - repoDir, err := cache.Repo(repo, hfdownloader.RepoTypeModel) - if err != nil { - writeError(w, http.StatusBadRequest, "Invalid repository format", err.Error()) - return - } + for _, root := range cacheRoots { + cache := hfdownloader.NewHFCache(root, 0) + + // Try as model first + candidate, err := cache.Repo(repo, hfdownloader.RepoTypeModel) + if err != nil { + writeError(w, http.StatusBadRequest, "Invalid repository format", err.Error()) + return + } + if _, err := os.Stat(candidate.Path()); err == nil { + repoDir = candidate + found = true + break + } - // Check if the path exists - if _, err := os.Stat(repoDir.Path()); os.IsNotExist(err) { // Try as dataset - repoDir, _ = cache.Repo(repo, hfdownloader.RepoTypeDataset) - if _, err := os.Stat(repoDir.Path()); os.IsNotExist(err) { - writeError(w, http.StatusNotFound, "Repository not found in cache", "") + candidate, _ = cache.Repo(repo, hfdownloader.RepoTypeDataset) + if _, err := os.Stat(candidate.Path()); err == nil { + repoDir = candidate + found = true + break + } + } + + if !found { + for _, root := range cacheRoots { + flatPath, ok := findFlatStructuredRepoPath(root, repo) + if !ok { + continue + } + + flatInfo, err := buildFlatRepoInfo(flatPath, repo, true) + if err != nil { + continue + } + + writeJSON(w, http.StatusOK, flatInfo) return } + + for _, root := range cacheRoots { + indexPath, ok := findFlatIndexManifestPath(root, repo) + if !ok { + continue + } + + flatInfo, err := buildFlatIndexedRepoInfo(root, indexPath, true) + if err != nil { + continue + } + + writeJSON(w, http.StatusOK, flatInfo) + return + } + + writeError(w, http.StatusNotFound, "Repository not found in cache", "") + return } // Get snapshots @@ -1036,11 +1165,17 @@ func (s *Server) handleCacheDelete(w http.ResponseWriter, r *http.Request) { return } - // Determine type from query param + // Determine preferred type from query param, but try both types for robustness. repoTypeStr := r.URL.Query().Get("type") - repoType := hfdownloader.RepoTypeModel + preferredType := hfdownloader.RepoTypeModel if repoTypeStr == "dataset" { - repoType = hfdownloader.RepoTypeDataset + preferredType = hfdownloader.RepoTypeDataset + } + repoTypes := []hfdownloader.RepoType{preferredType} + if preferredType == hfdownloader.RepoTypeModel { + repoTypes = append(repoTypes, hfdownloader.RepoTypeDataset) + } else { + repoTypes = append(repoTypes, hfdownloader.RepoTypeModel) } cacheDir := s.config.CacheDir @@ -1048,96 +1183,155 @@ func (s *Server) handleCacheDelete(w http.ResponseWriter, r *http.Request) { cacheDir = hfdownloader.DefaultCacheDir() } - cache := hfdownloader.NewHFCache(cacheDir, hfdownloader.DefaultStaleTimeout) + cacheDirs := cacheDeleteCandidates(cacheDir, s.config.PreviousCacheDirs) + deletedFrom := "" + deleted := false - // Find the repo directory - repoDir, err := cache.Repo(repo, repoType) - if err != nil { - writeError(w, http.StatusBadRequest, "Invalid repository ID", err.Error()) - return - } + for _, candidateCacheDir := range cacheDirs { + cache := hfdownloader.NewHFCache(candidateCacheDir, hfdownloader.DefaultStaleTimeout) - hubPath := repoDir.Path() - friendlyPath := repoDir.FriendlyPath() + for _, repoType := range repoTypes { + // Find the repo directory + repoDir, err := cache.Repo(repo, repoType) + if err != nil { + writeError(w, http.StatusBadRequest, "Invalid repository ID", err.Error()) + return + } - // Security Layer 5: Resolve absolute paths - absCacheDir, err := filepath.Abs(cacheDir) - if err != nil { - writeError(w, http.StatusInternalServerError, "Failed to resolve cache path", err.Error()) - return - } - // Ensure cache dir ends with separator to prevent /cache/huggingface-evil matching /cache/huggingface - absCacheDirWithSep := absCacheDir + string(filepath.Separator) + hubPath := repoDir.Path() + friendlyPath := repoDir.FriendlyPath() - absHubPath, err := filepath.Abs(hubPath) - if err != nil { - writeError(w, http.StatusInternalServerError, "Failed to resolve path", err.Error()) - return - } + // Security Layer 5: Resolve absolute paths + absCacheDir, err := filepath.Abs(candidateCacheDir) + if err != nil { + continue + } + absCacheDirWithSep := absCacheDir + string(filepath.Separator) - // Security Layer 6: Check if path exists and is not a symlink (TOCTOU mitigation) - hubInfo, err := os.Lstat(absHubPath) - if os.IsNotExist(err) { - writeError(w, http.StatusNotFound, "Repository not found in cache", repo) - return - } - if err != nil { - writeError(w, http.StatusInternalServerError, "Failed to check path", err.Error()) - return - } + absHubPath, err := filepath.Abs(hubPath) + if err != nil { + continue + } - // Security Layer 7: Reject if the hub path itself is a symlink (symlink attack prevention) - if hubInfo.Mode()&os.ModeSymlink != 0 { - writeError(w, http.StatusBadRequest, "Invalid path", "Cannot delete symlinked directories") - return - } + // Security Layer 6: Check if path exists and is not a symlink (TOCTOU mitigation) + hubInfo, err := os.Lstat(absHubPath) + if os.IsNotExist(err) { + continue + } + if err != nil { + continue + } - // Security Layer 8: Verify path is within cache (using cleaned absolute path) - if !strings.HasPrefix(absHubPath+string(filepath.Separator), absCacheDirWithSep) { - writeError(w, http.StatusBadRequest, "Invalid path", "Path outside cache directory") - return - } + // Security Layer 7: Reject if the hub path itself is a symlink (symlink attack prevention) + if hubInfo.Mode()&os.ModeSymlink != 0 { + continue + } - // Security Layer 9: Verify path follows expected HF cache structure - // Must be: {cacheDir}/hub/{models|datasets}--{owner}--{name} - expectedPrefix := "models--" - if repoType == hfdownloader.RepoTypeDataset { - expectedPrefix = "datasets--" - } - hubSubpath, err := filepath.Rel(absCacheDir, absHubPath) - if err != nil || !strings.HasPrefix(hubSubpath, filepath.Join("hub", expectedPrefix)) { - writeError(w, http.StatusBadRequest, "Invalid path", "Path does not match expected cache structure") - return - } + // Security Layer 8: Verify path is within cache (using cleaned absolute path) + if !strings.HasPrefix(absHubPath+string(filepath.Separator), absCacheDirWithSep) { + continue + } - // Security Layer 10: Resolve symlinks to verify final destination is also within cache - // This catches symlinks inside the directory structure - realHubPath, err := filepath.EvalSymlinks(absHubPath) - if err == nil && realHubPath != absHubPath { - // Path contained symlinks - verify resolved path is still within cache - if !strings.HasPrefix(realHubPath+string(filepath.Separator), absCacheDirWithSep) { - writeError(w, http.StatusBadRequest, "Invalid path", "Resolved path outside cache directory") - return + // Security Layer 9: Verify path follows expected HF cache structure + expectedPrefix := "models--" + if repoType == hfdownloader.RepoTypeDataset { + expectedPrefix = "datasets--" + } + hubSubpath, err := filepath.Rel(absCacheDir, absHubPath) + if err != nil || !strings.HasPrefix(hubSubpath, filepath.Join("hub", expectedPrefix)) { + continue + } + + // All security checks passed - proceed with deletion + if err := os.RemoveAll(absHubPath); err != nil { + writeError(w, http.StatusInternalServerError, "Failed to delete cache", err.Error()) + return + } + + // Delete the friendly view directory (symlinks) with same security checks + if friendlyPath != "" { + absFriendlyPath := friendlyPath + if !filepath.IsAbs(absFriendlyPath) { + absFriendlyPath = filepath.Join(absCacheDir, absFriendlyPath) + } + _ = safeDeleteFriendlyPath(absFriendlyPath, absCacheDirWithSep) + } + + deleted = true + deletedFrom = candidateCacheDir + break + } + + if deleted { + break } } - // All security checks passed - proceed with deletion - if err := os.RemoveAll(absHubPath); err != nil { - writeError(w, http.StatusInternalServerError, "Failed to delete cache", err.Error()) - return + if !deleted { + // Fallback: delete flat-structured repo path at // + for _, candidateCacheDir := range cacheDirs { + absCacheDir, err := filepath.Abs(candidateCacheDir) + if err != nil { + continue + } + absCacheDirWithSep := absCacheDir + string(filepath.Separator) + + flatPath, ok := findFlatStructuredRepoPath(absCacheDir, repo) + if !ok { + continue + } + + absFlatPath, err := filepath.Abs(flatPath) + if err != nil { + continue + } + if !strings.HasPrefix(absFlatPath+string(filepath.Separator), absCacheDirWithSep) { + continue + } + + info, err := os.Lstat(absFlatPath) + if err != nil || !info.IsDir() || info.Mode()&os.ModeSymlink != 0 { + continue + } + + if err := os.RemoveAll(absFlatPath); err != nil { + writeError(w, http.StatusInternalServerError, "Failed to delete cache", err.Error()) + return + } + + // Clean up empty owner directory under cache root + ownerDir := filepath.Dir(absFlatPath) + if strings.HasPrefix(ownerDir+string(filepath.Separator), absCacheDirWithSep) { + _ = os.Remove(ownerDir) + } + + deleted = true + deletedFrom = candidateCacheDir + break + } } - // Delete the friendly view directory (symlinks) with same security checks - if friendlyPath != "" { - if err := safeDeleteFriendlyPath(friendlyPath, absCacheDirWithSep); err != nil { - // Log but don't fail - hub directory was successfully deleted - // The friendly path might not exist or might be invalid + if !deleted { + // Fallback: delete flat-mode indexed files from /.hfd-flat-index + for _, candidateCacheDir := range cacheDirs { + if err := deleteFlatIndexedRepo(candidateCacheDir, repo); err != nil { + continue + } + deleted = true + deletedFrom = candidateCacheDir + break } } + if !deleted { + writeError(w, http.StatusNotFound, "Repository not found in cache", repo) + return + } + writeJSON(w, http.StatusOK, map[string]any{ - "success": true, - "message": fmt.Sprintf("Deleted %s from cache", repo), + "success": true, + "message": fmt.Sprintf("Deleted %s from cache", repo), + "deletedFrom": deletedFrom, }) } @@ -1163,7 +1357,8 @@ func isValidRepoComponent(s string) bool { return true } -// safeDeleteFriendlyPath safely deletes the friendly view path with security checks. +// safeDeleteFriendlyPath safely deletes the friendly view path with security checks, +// and cleans up empty parent directories. func safeDeleteFriendlyPath(friendlyPath, absCacheDirWithSep string) error { absFriendlyPath, err := filepath.Abs(friendlyPath) if err != nil { @@ -1184,15 +1379,582 @@ func safeDeleteFriendlyPath(friendlyPath, absCacheDirWithSep string) error { return fmt.Errorf("friendly path is a symlink") } - // Resolve symlinks and verify again - realPath, err := filepath.EvalSymlinks(absFriendlyPath) - if err == nil && realPath != absFriendlyPath { - if !strings.HasPrefix(realPath+string(filepath.Separator), absCacheDirWithSep) { - return fmt.Errorf("resolved friendly path outside cache") + // If it's a directory, recursively delete all contents + // Use this approach to ensure broken symlinks are also deleted + if info.IsDir() { + // List all entries in the directory + entries, err := os.ReadDir(absFriendlyPath) + if err == nil { + for _, entry := range entries { + entryPath := filepath.Join(absFriendlyPath, entry.Name()) + // Remove each entry (works for files, symlinks, and directories) + os.RemoveAll(entryPath) + } + } + } + + // Delete the directory itself + if err := os.RemoveAll(absFriendlyPath); err != nil { + return err + } + + // Clean up empty parent directories up to the cache root + cacheRootWithoutSep := strings.TrimSuffix(absCacheDirWithSep, string(filepath.Separator)) + for parent := filepath.Dir(absFriendlyPath); parent != cacheRootWithoutSep && parent != "." && parent != ""; parent = filepath.Dir(parent) { + // Stop if we've gone outside the cache directory + if !strings.HasPrefix(parent+string(filepath.Separator), absCacheDirWithSep) { + break + } + + // Try to remove the empty directory; stop if it fails (directory not empty) + if err := os.Remove(parent); err != nil { + break + } + } + + return nil +} + +// scanFlatStructuredRepos discovers repos stored as //. +func scanFlatStructuredRepos(root, repoTypeFilter, search string, seen map[string]struct{}) ([]CachedRepoInfo, CacheStats) { + var repos []CachedRepoInfo + var stats CacheStats + + entries, err := os.ReadDir(root) + if err != nil { + return repos, stats + } + + for _, ownerEntry := range entries { + if !ownerEntry.IsDir() { + continue + } + owner := ownerEntry.Name() + if !isValidRepoComponent(owner) { + continue + } + if owner == "hub" || owner == "models" || owner == "datasets" || strings.HasPrefix(owner, ".") { + continue + } + + ownerPath := filepath.Join(root, owner) + repoEntries, err := os.ReadDir(ownerPath) + if err != nil { + continue + } + + for _, repoEntry := range repoEntries { + if !repoEntry.IsDir() { + continue + } + name := repoEntry.Name() + if !isValidRepoComponent(name) { + continue + } + + repoID := owner + "/" + name + if search != "" && !strings.Contains(strings.ToLower(repoID), search) { + continue + } + + repoPath := filepath.Join(ownerPath, name) + flatInfo, err := buildFlatRepoInfo(repoPath, repoID, false) + if err != nil { + continue + } + + if repoTypeFilter != "" && flatInfo.Type != repoTypeFilter { + continue + } + + key := flatInfo.Type + "|" + repoID + if _, ok := seen[key]; ok { + continue + } + + repos = append(repos, flatInfo) + seen[key] = struct{}{} + + if flatInfo.Type == "dataset" { + stats.TotalDatasets++ + } else { + stats.TotalModels++ + } + stats.TotalSize += flatInfo.Size + stats.TotalFiles += flatInfo.FileCount } } - return os.RemoveAll(absFriendlyPath) + return repos, stats +} + +// findFlatStructuredRepoPath resolves // for a repo id. +func findFlatStructuredRepoPath(root, repo string) (string, bool) { + parts := strings.SplitN(repo, "/", 2) + if len(parts) != 2 { + return "", false + } + if !isValidRepoComponent(parts[0]) || !isValidRepoComponent(parts[1]) { + return "", false + } + + candidate := filepath.Join(root, parts[0], parts[1]) + info, err := os.Stat(candidate) + if err != nil || !info.IsDir() { + return "", false + } + + return candidate, true +} + +// buildFlatRepoInfo builds cache metadata for a flat-structured repo directory. +func buildFlatRepoInfo(repoPath, repoID string, includeFiles bool) (CachedRepoInfo, error) { + parts := strings.SplitN(repoID, "/", 2) + if len(parts) != 2 { + return CachedRepoInfo{}, fmt.Errorf("invalid repo id") + } + + var totalSize int64 + var fileCount int + files := make([]CachedFileInfo, 0) + + err := filepath.Walk(repoPath, func(path string, info os.FileInfo, err error) error { + if err != nil || info.IsDir() { + return nil + } + + relPath, relErr := filepath.Rel(repoPath, path) + if relErr != nil { + relPath = filepath.Base(path) + } + + if relPath == hfdownloader.ManifestFilename { + return nil + } + + totalSize += info.Size() + fileCount++ + + if includeFiles { + files = append(files, CachedFileInfo{ + Name: filepath.ToSlash(relPath), + Size: info.Size(), + SizeHuman: humanSizeBytes(info.Size()), + IsLFS: info.Size() > 10*1024*1024, + }) + } + + return nil + }) + if err != nil { + return CachedRepoInfo{}, err + } + + branch := "main" + commit := "" + var manifest *ManifestInfo + downloadStatus := "unknown" + + manifestPath := filepath.Join(repoPath, hfdownloader.ManifestFilename) + if m, err := hfdownloader.ReadManifest(manifestPath); err == nil { + isFiltered, filters := parseCommandFilters(m.Command) + manifest = &ManifestInfo{ + Branch: m.Branch, + Commit: m.Commit, + Downloaded: m.CompletedAt.Format("2006-01-02 15:04"), + Command: m.Command, + TotalSize: m.TotalSize, + TotalFiles: m.TotalFiles, + IsFiltered: isFiltered, + Filters: filters, + } + + if m.Branch != "" { + branch = m.Branch + } + if m.Commit != "" { + commit = m.Commit + } + if isFiltered { + downloadStatus = "filtered" + } else { + downloadStatus = "complete" + } + } + + shortCommit := commit + if len(shortCommit) > 7 { + shortCommit = shortCommit[:7] + } + + downloaded := "" + if info, err := os.Stat(repoPath); err == nil { + downloaded = info.ModTime().Format("2006-01-02") + } + if manifest != nil { + downloaded = strings.SplitN(manifest.Downloaded, " ", 2)[0] + } + + typ := "model" + if manifest != nil && manifest.Command != "" { + if strings.Contains(manifest.Command, " --dataset") || strings.Contains(manifest.Command, " -d") { + typ = "dataset" + } + } + + return CachedRepoInfo{ + Repo: repoID, + Owner: parts[0], + Name: parts[1], + Type: typ, + Path: repoPath, + FriendlyPath: repoPath, + Size: totalSize, + SizeHuman: humanSizeBytes(totalSize), + FileCount: fileCount, + Branch: branch, + Commit: shortCommit, + Downloaded: downloaded, + DownloadStatus: downloadStatus, + Files: files, + Manifest: manifest, + }, nil +} + +// scanFlatIndexedRepos discovers flat-mode repos from index manifests. +func scanFlatIndexedRepos(root, repoTypeFilter, search string, seen map[string]struct{}) ([]CachedRepoInfo, CacheStats) { + var repos []CachedRepoInfo + var stats CacheStats + + indexDir := filepath.Join(root, hfdownloader.FlatIndexDirname) + entries, err := os.ReadDir(indexDir) + if err != nil { + return repos, stats + } + + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(strings.ToLower(entry.Name()), ".yaml") { + continue + } + + indexPath := filepath.Join(indexDir, entry.Name()) + info, err := buildFlatIndexedRepoInfo(root, indexPath, false) + if err != nil { + continue + } + + if repoTypeFilter != "" && info.Type != repoTypeFilter { + continue + } + if search != "" && !strings.Contains(strings.ToLower(info.Repo), search) { + continue + } + + key := info.Type + "|" + info.Repo + if _, ok := seen[key]; ok { + continue + } + + repos = append(repos, info) + seen[key] = struct{}{} + + if info.Type == "dataset" { + stats.TotalDatasets++ + } else { + stats.TotalModels++ + } + stats.TotalSize += info.Size + stats.TotalFiles += info.FileCount + } + + return repos, stats +} + +// findFlatIndexManifestPath resolves the index manifest path for repo. +func findFlatIndexManifestPath(root, repo string) (string, bool) { + if !hfdownloader.IsValidModelName(repo) { + return "", false + } + path := hfdownloader.FlatIndexPath(root, repo) + info, err := os.Stat(path) + if err != nil || info.IsDir() { + return "", false + } + return path, true +} + +// buildFlatIndexedRepoInfo builds cache metadata for a flat-mode indexed repo. +func buildFlatIndexedRepoInfo(root, indexPath string, includeFiles bool) (CachedRepoInfo, error) { + m, err := hfdownloader.ReadManifest(indexPath) + if err != nil { + return CachedRepoInfo{}, err + } + + parts := strings.SplitN(m.Repo, "/", 2) + if len(parts) != 2 { + return CachedRepoInfo{}, fmt.Errorf("invalid repo in flat index") + } + + fileCount := len(m.Files) + totalSize := m.TotalSize + if totalSize == 0 { + for _, f := range m.Files { + totalSize += f.Size + } + } + + files := make([]CachedFileInfo, 0, len(m.Files)) + presentCount := 0 + if includeFiles { + for _, f := range m.Files { + if path := filepath.Join(root, filepath.Clean(f.Name)); path != "" { + if _, err := os.Stat(path); err == nil { + presentCount++ + } + } + files = append(files, CachedFileInfo{ + Name: f.Name, + Size: f.Size, + SizeHuman: humanSizeBytes(f.Size), + IsLFS: f.LFS, + }) + } + } else { + for _, f := range m.Files { + if path := filepath.Join(root, filepath.Clean(f.Name)); path != "" { + if _, err := os.Stat(path); err == nil { + presentCount++ + } + } + } + } + + isFiltered, filters := parseCommandFilters(m.Command) + manifest := &ManifestInfo{ + Branch: m.Branch, + Commit: m.Commit, + Downloaded: m.CompletedAt.Format("2006-01-02 15:04"), + Command: m.Command, + TotalSize: totalSize, + TotalFiles: fileCount, + IsFiltered: isFiltered, + Filters: filters, + } + + downloadStatus := "complete" + if isFiltered { + downloadStatus = "filtered" + } + if fileCount > 0 && presentCount < fileCount { + downloadStatus = "partial" + } + + shortCommit := m.Commit + if len(shortCommit) > 7 { + shortCommit = shortCommit[:7] + } + + repoType := m.Type + if repoType == "" { + repoType = "model" + } + + return CachedRepoInfo{ + Repo: m.Repo, + Owner: parts[0], + Name: parts[1], + Type: repoType, + Path: root, + FriendlyPath: root, + Size: totalSize, + SizeHuman: humanSizeBytes(totalSize), + FileCount: fileCount, + Branch: m.Branch, + Commit: shortCommit, + Downloaded: m.CompletedAt.Format("2006-01-02"), + DownloadStatus: downloadStatus, + Files: files, + Manifest: manifest, + }, nil +} + +// deleteFlatIndexedRepo deletes files recorded in a flat-mode index manifest. +func deleteFlatIndexedRepo(root, repo string) error { + indexPath, ok := findFlatIndexManifestPath(root, repo) + if !ok { + return fmt.Errorf("flat index not found") + } + + m, err := hfdownloader.ReadManifest(indexPath) + if err != nil { + return err + } + + absRoot, err := filepath.Abs(root) + if err != nil { + return err + } + absRootWithSep := absRoot + string(filepath.Separator) + + parts := strings.SplitN(repo, "/", 2) + repoName := repo + if len(parts) == 2 && parts[1] != "" { + repoName = parts[1] + } + + removedAny := false + var removalErrors []string + + for _, f := range m.Files { + candidates := []string{f.Name} + + // Handle README naming migration in flat mode. + base := filepath.Base(f.Name) + if base == "README.md" && !strings.Contains(filepath.ToSlash(f.Name), "/") { + candidates = append(candidates, repoName+".README.md") + } + if base == repoName+".README.md" { + candidates = append(candidates, "README.md") + } + + seenCandidates := map[string]struct{}{} + for _, rawRel := range candidates { + rel := filepath.Clean(rawRel) + if rel == "." || rel == "" || strings.HasPrefix(rel, "..") || filepath.IsAbs(rel) { + continue + } + if _, ok := seenCandidates[rel]; ok { + continue + } + seenCandidates[rel] = struct{}{} + + candidate := filepath.Join(absRoot, rel) + absCandidate, err := filepath.Abs(candidate) + if err != nil { + continue + } + if !strings.HasPrefix(absCandidate+string(filepath.Separator), absRootWithSep) { + continue + } + + // Clean up resumable/multipart temporary artifacts even when the + // assembled destination file does not exist (common for canceled downloads). + if p, err := filepath.Glob(absCandidate + ".part-*"); err == nil { + for _, partPath := range p { + if err := os.RemoveAll(partPath); err == nil { + removedAny = true + } + } + } + if err := os.RemoveAll(absCandidate + ".part"); err == nil { + removedAny = true + } + + if _, err := os.Lstat(absCandidate); err != nil { + continue + } + + if err := os.RemoveAll(absCandidate); err != nil { + removalErrors = append(removalErrors, rel+": "+err.Error()) + continue + } + removedAny = true + + for parent := filepath.Dir(absCandidate); parent != absRoot && parent != "." && parent != ""; parent = filepath.Dir(parent) { + if !strings.HasPrefix(parent+string(filepath.Separator), absRootWithSep) { + break + } + if err := os.Remove(parent); err != nil { + break + } + } + } + } + + if len(removalErrors) > 0 { + return fmt.Errorf("failed to delete flat files: %s", strings.Join(removalErrors, "; ")) + } + if !removedAny { + return fmt.Errorf("flat index found but no files were removed") + } + + if err := os.Remove(indexPath); err != nil && !os.IsNotExist(err) { + return err + } + _ = os.Remove(filepath.Dir(indexPath)) + + return nil +} + +// addRecentPath appends path to recent list (deduped), keeping at most max entries. +func addRecentPath(existing []string, path string, max int) []string { + path = strings.TrimSpace(path) + if path == "" { + return existing + } + out := make([]string, 0, len(existing)+1) + out = append(out, path) + for _, p := range existing { + if strings.TrimSpace(p) == "" || p == path { + continue + } + out = append(out, p) + if max > 0 && len(out) >= max { + break + } + } + return out +} + +// orderedUniquePaths returns non-empty unique paths, preserving order. +func orderedUniquePaths(primary string, extras []string, fallback string) []string { + seen := map[string]struct{}{} + paths := make([]string, 0, len(extras)+2) + appendIfNew := func(p string) { + p = strings.TrimSpace(p) + if p == "" { + return + } + if _, ok := seen[p]; ok { + return + } + seen[p] = struct{}{} + paths = append(paths, p) + } + + appendIfNew(primary) + for _, p := range extras { + appendIfNew(p) + } + appendIfNew(fallback) + return paths +} + +// scriptDefaultCacheDir returns the script-default cache path ~/.cache/huggingface, +// independent of HF_HOME overrides. +func scriptDefaultCacheDir() string { + home, err := os.UserHomeDir() + if err != nil || strings.TrimSpace(home) == "" { + return "" + } + return filepath.Join(home, ".cache", "huggingface") +} + +// cacheScanRoots returns cache roots to scan for list/info/delete operations. +// Order matters and is preserved: configured -> script default -> HF_HOME -> previous -> resolved default. +func cacheScanRoots(cfg Config) []string { + hfHome := strings.TrimSpace(os.Getenv("HF_HOME")) + extras := make([]string, 0, len(cfg.PreviousCacheDirs)+3) + extras = append(extras, scriptDefaultCacheDir()) + extras = append(extras, hfHome) + extras = append(extras, cfg.PreviousCacheDirs...) + return orderedUniquePaths(cfg.CacheDir, extras, hfdownloader.DefaultCacheDir()) +} + +// cacheDeleteCandidates uses the same root-selection policy as cache scanning. +func cacheDeleteCandidates(current string, previous []string) []string { + cfg := Config{CacheDir: current, PreviousCacheDirs: previous} + return cacheScanRoots(cfg) } // parseCommandFilters extracts filter information from a manifest command string. diff --git a/internal/server/config.go b/internal/server/config.go index fc8bfbd..8fdf906 100644 --- a/internal/server/config.go +++ b/internal/server/config.go @@ -18,6 +18,8 @@ import ( // This matches the CLI config file format for consistency. type ConfigFile struct { CacheDir string `json:"cache-dir,omitempty" yaml:"cache-dir,omitempty"` + PreviousCacheDirs []string `json:"previous-cache-dirs,omitempty" yaml:"previous-cache-dirs,omitempty"` + StorageMode string `json:"storage-mode,omitempty" yaml:"storage-mode,omitempty"` Token string `json:"token,omitempty" yaml:"token,omitempty"` Connections int `json:"connections,omitempty" yaml:"connections,omitempty"` MaxActive int `json:"max-active,omitempty" yaml:"max-active,omitempty"` @@ -150,6 +152,15 @@ func ApplyConfigToServer(serverCfg *Config) error { if serverCfg.CacheDir == "" && fileCfg.CacheDir != "" { serverCfg.CacheDir = fileCfg.CacheDir } + if len(serverCfg.PreviousCacheDirs) == 0 && len(fileCfg.PreviousCacheDirs) > 0 { + serverCfg.PreviousCacheDirs = append([]string(nil), fileCfg.PreviousCacheDirs...) + } + if serverCfg.StorageMode == "" && fileCfg.StorageMode != "" { + switch fileCfg.StorageMode { + case string(StorageModeCache), string(StorageModeFlat), string(StorageModeFlatStructured): + serverCfg.StorageMode = StorageMode(fileCfg.StorageMode) + } + } if serverCfg.Token == "" && fileCfg.Token != "" { serverCfg.Token = fileCfg.Token } diff --git a/internal/server/jobs.go b/internal/server/jobs.go index 5da62b4..9b97712 100644 --- a/internal/server/jobs.go +++ b/internal/server/jobs.go @@ -34,6 +34,7 @@ type Job struct { Filters []string `json:"filters,omitempty"` Excludes []string `json:"excludes,omitempty"` OutputDir string `json:"outputDir"` + StorageMode string `json:"storageMode,omitempty"` Status JobStatus `json:"status"` Progress JobProgress `json:"progress"` Error string `json:"error,omitempty"` @@ -150,10 +151,31 @@ func (m *JobManager) CreateJob(req DownloadRequest) (*Job, bool, error) { revision = "main" } - // Use HuggingFace cache directory (v3 mode) - cacheDir := m.config.CacheDir - if cacheDir == "" { - cacheDir = hfdownloader.DefaultCacheDir() + // Determine output directory and storage mode + storageMode := req.StorageMode + if storageMode == "" { + storageMode = string(m.config.StorageMode) + } + if storageMode == "" { + storageMode = string(StorageModeCache) + } + + var outputDir string + baseDir := m.config.CacheDir + if baseDir == "" { + baseDir = hfdownloader.DefaultCacheDir() + } + + switch storageMode { + case string(StorageModeFlat): + // Flat file mode: files go directly to base directory + outputDir = baseDir + case string(StorageModeFlatStructured): + // Flat structured mode: base path is baseDir; downloader appends owner/model once + outputDir = baseDir + default: + // Cache mode (default): use the cache directory (hfdownloader will create hub/models--) + outputDir = baseDir } // Check for existing active job with same repo+revision+type. @@ -172,16 +194,17 @@ func (m *JobManager) CreateJob(req DownloadRequest) (*Job, bool, error) { } job := &Job{ - ID: generateID(), - Repo: req.Repo, - Revision: revision, - IsDataset: req.Dataset, - Filters: req.Filters, - Excludes: req.Excludes, - OutputDir: cacheDir, // HuggingFace cache directory - Status: JobStatusQueued, - CreatedAt: time.Now(), - Progress: JobProgress{}, + ID: generateID(), + Repo: req.Repo, + Revision: revision, + IsDataset: req.Dataset, + Filters: req.Filters, + Excludes: req.Excludes, + OutputDir: outputDir, + StorageMode: storageMode, + Status: JobStatusQueued, + CreatedAt: time.Now(), + Progress: JobProgress{}, } m.jobs[job.ID] = job @@ -465,14 +488,8 @@ func (m *JobManager) runJob(job *Job) { AppendFilterSubdir: false, } - // Use HuggingFace cache structure (v3 mode) instead of legacy OutputDir - cacheDir := m.config.CacheDir - if cacheDir == "" { - cacheDir = hfdownloader.DefaultCacheDir() - } - + // Build settings based on storage mode settings := hfdownloader.Settings{ - CacheDir: cacheDir, // Use HF cache structure Concurrency: m.config.Concurrency, MaxActiveDownloads: m.config.MaxActive, Token: m.config.Token, @@ -485,6 +502,25 @@ func (m *JobManager) runJob(job *Job) { Proxy: m.config.Proxy, } + // Set output based on storage mode + storageMode := job.StorageMode + if storageMode == string(StorageModeFlat) || storageMode == string(StorageModeFlatStructured) { + // Flat modes: use OutputDir directly, no HF cache structure + // MUST clear CacheDir so hfdownloader doesn't create hub/models--owner--name/ + settings.OutputDir = job.OutputDir + settings.CacheDir = "" + settings.NoRepoSubdir = storageMode == string(StorageModeFlat) + settings.NoFriendlyView = true + settings.NoManifest = false // Still create manifest + } else { + // Cache mode: use CacheDir (HuggingFace cache structure) + cacheDir := m.config.CacheDir + if cacheDir == "" { + cacheDir = hfdownloader.DefaultCacheDir() + } + settings.CacheDir = cacheDir + } + // Progress callback - NOTE: must not hold lock when calling notifyListeners progressFunc := func(evt hfdownloader.ProgressEvent) { m.mu.Lock() diff --git a/internal/server/server.go b/internal/server/server.go index 9865729..6b9db93 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -16,6 +16,18 @@ import ( "github.com/bodaay/HuggingFaceModelDownloader/pkg/hfdownloader" ) +// StorageMode defines how files are stored during downloads. +type StorageMode string + +const ( + // StorageModeCache stores files in HuggingFace cache structure (default) + StorageModeCache StorageMode = "cache" + // StorageModeFlat stores files directly in output directory + StorageModeFlat StorageMode = "flat" + // StorageModeFlatStructured stores files in owner/model structure + StorageModeFlatStructured StorageMode = "flat-structured" +) + // Config holds server configuration. type Config struct { Addr string @@ -24,6 +36,8 @@ type Config struct { ModelsDir string // Output directory for models (not configurable via API) DatasetsDir string // Output directory for datasets (not configurable via API) CacheDir string // HuggingFace cache directory for v3 mode + PreviousCacheDirs []string // Recently used cache directories (for cache operations fallback) + StorageMode StorageMode // Default storage mode for downloads Concurrency int MaxActive int MultipartThreshold string // Minimum size for multipart download diff --git a/pkg/hfdownloader/downloader.go b/pkg/hfdownloader/downloader.go index b2b9ddc..1958847 100644 --- a/pkg/hfdownloader/downloader.go +++ b/pkg/hfdownloader/downloader.go @@ -159,9 +159,15 @@ func Download(ctx context.Context, job Job, cfg Settings, progress ProgressFunc) // Ensure destination root exists (only for legacy mode) // HF cache mode already created directories via repoDir.EnsureDirs() if !useHFCache { - if err := os.MkdirAll(destinationBase(job, cfg), 0o755); err != nil { + if err := os.MkdirAll(cfg.OutputDir, 0o755); err != nil { return err } + // Resolve symlinks in output root upfront to avoid os.Rename() issues + // when downloading to symlinked directories. + if realRoot, err := filepath.EvalSymlinks(cfg.OutputDir); err == nil { + cfg.OutputDir = realRoot + } + // If EvalSymlinks fails (not a symlink, doesn't exist yet), proceed with original path } // Overall concurrency limiter (ctx-aware acquisition) @@ -181,11 +187,26 @@ func Download(ctx context.Context, job Job, cfg Settings, progress ProgressFunc) // Manifest is always written unless explicitly disabled with NoManifest var manifestBuilder *ManifestBuilder var manifestMu sync.Mutex - if useHFCache && !cfg.NoManifest { + if !cfg.NoManifest { manifestBuilder = NewManifestBuilder(job, cfg.Command) manifestBuilder.SetCommit(plan.Commit) } + // Flat mode index should represent intended outputs, including interrupted runs. + if manifestBuilder != nil && !useHFCache && cfg.NoRepoSubdir { + for _, item := range plan.Items { + rel := item.RelativePath + if job.AppendFilterSubdir && item.Subdir != "" { + rel = filepath.ToSlash(filepath.Join(item.Subdir, item.RelativePath)) + } + mappedRel, skip := mapFlatNoRepoFile(job.Repo, rel) + if skip { + continue + } + manifestBuilder.AddFile(mappedRel, item.SHA256, item.Size, item.LFS) + } + } + LOOP: for _, item := range plan.Items { // Stop scheduling more work once canceled @@ -220,6 +241,18 @@ LOOP: finalRel = filepath.ToSlash(filepath.Join(it.Subdir, it.RelativePath)) } + if !useHFCache && cfg.NoRepoSubdir { + mappedRel, skip := mapFlatNoRepoFile(job.Repo, finalRel) + if skip { + if _, loaded := skipOnce.LoadOrStore(finalRel, struct{}{}); !loaded { + emit(ProgressEvent{Event: "file_done", Path: finalRel, Message: "skip (ignored in flat mode)"}) + atomic.AddInt64(&skippedCount, 1) + } + return + } + finalRel = mappedRel + } + var dst string var skipCheck func() (bool, string, error) @@ -259,6 +292,21 @@ LOOP: base := destinationBase(job, cfg) dst = filepath.Join(base, finalRel) skipCheck = func() (bool, string, error) { + // Fast path for large local files in size-verify mode. + // This avoids hashing huge existing LFS artifacts (e.g. GGUF) + // before deciding they can be skipped. + if cfg.Verify == "size" { + fi, err := os.Stat(dst) + if err != nil { + if os.IsNotExist(err) { + return false, "", nil + } + return false, "", err + } + if it.Size > 0 && fi.Size() == it.Size { + return true, "size match", nil + } + } return shouldSkipLocal(it, dst) } } @@ -271,6 +319,7 @@ LOOP: return } + // Check if we can skip alreadyOK, reason, err := skipCheck() if err != nil { @@ -285,9 +334,13 @@ LOOP: emit(ProgressEvent{Event: "file_done", Path: finalRel, Message: "skip (" + reason + ")"}) atomic.AddInt64(&skippedCount, 1) // Add to manifest (skipped files are still part of the download job) - if manifestBuilder != nil { + if manifestBuilder != nil && (useHFCache || !cfg.NoRepoSubdir) { + manifestName := it.RelativePath + if !useHFCache { + manifestName = finalRel + } manifestMu.Lock() - manifestBuilder.AddFile(it.RelativePath, it.SHA256, it.Size, it.LFS) + manifestBuilder.AddFile(manifestName, it.SHA256, it.Size, it.LFS) manifestMu.Unlock() } } @@ -364,9 +417,13 @@ LOOP: } // Add to manifest with actual LFS info from API and final SHA256 - if manifestBuilder != nil { + if manifestBuilder != nil && (useHFCache || !cfg.NoRepoSubdir) { + manifestName := it.RelativePath + if !useHFCache { + manifestName = finalRel + } manifestMu.Lock() - manifestBuilder.AddFile(it.RelativePath, finalSHA256, it.Size, it.LFS) + manifestBuilder.AddFile(manifestName, finalSHA256, it.Size, it.LFS) manifestMu.Unlock() } @@ -388,11 +445,22 @@ LOOP: } if firstErr != nil { emit(ProgressEvent{Level: "error", Event: "error", Message: firstErr.Error()}) - return firstErr } - if ctx.Err() != nil { - return ctx.Err() + finalErr := firstErr + if finalErr == nil && ctx.Err() != nil { + finalErr = ctx.Err() + } + + if manifestBuilder != nil && !useHFCache && cfg.NoRepoSubdir { + manifest := manifestBuilder.Build() + if _, err := manifest.WriteFlatIndex(cfg.OutputDir); err != nil { + emit(ProgressEvent{Level: "warn", Event: "warning", Message: fmt.Sprintf("failed to write flat index manifest: %v", err)}) + } + } + + if finalErr != nil { + return finalErr } // For HF Cache mode: write ref file and ensure friendly directory exists @@ -435,6 +503,35 @@ LOOP: return nil } +// mapFlatNoRepoFile normalizes filenames when using flat mode (no repo subdir). +// - Skip .gitattributes to keep flat roots clean. +// - Rename root README.md to .README.md to avoid collisions. +// - Prefix generic root artifacts (mmproj*, imatrix*) with . to avoid collisions. +func mapFlatNoRepoFile(repo, rel string) (string, bool) { + rel = filepath.ToSlash(filepath.Clean(rel)) + base := filepath.Base(rel) + lowerBase := strings.ToLower(base) + + repoName := repo + if parts := strings.SplitN(repo, "/", 2); len(parts) == 2 && parts[1] != "" { + repoName = parts[1] + } + + if base == ".gitattributes" { + return "", true + } + + if base == "README.md" && !strings.Contains(rel, "/") { + return repoName + ".README.md", false + } + + if !strings.Contains(rel, "/") && (strings.HasPrefix(lowerBase, "mmproj") || strings.HasPrefix(lowerBase, "imatrix")) { + return repoName + "." + base, false + } + + return rel, false +} + // downloadSingle downloads a file in a single request. // // Resume behavior: if a .part file already exists from a previous interrupted diff --git a/pkg/hfdownloader/hfcache.go b/pkg/hfdownloader/hfcache.go index 15b6475..c594cb4 100644 --- a/pkg/hfdownloader/hfcache.go +++ b/pkg/hfdownloader/hfcache.go @@ -80,6 +80,12 @@ func NewHFCache(root string, staleTimeout time.Duration) *HFCache { if staleTimeout == 0 { staleTimeout = DefaultStaleTimeout } + // Resolve symlinks in the cache root to avoid issues with file operations + // on symlinked directories (e.g., os.Rename failures). If resolution fails, + // we proceed with the original path. + if realRoot, err := filepath.EvalSymlinks(root); err == nil { + root = realRoot + } return &HFCache{ Root: root, StaleTimeout: staleTimeout, diff --git a/pkg/hfdownloader/manifest.go b/pkg/hfdownloader/manifest.go index c2e0d29..af5cb4b 100644 --- a/pkg/hfdownloader/manifest.go +++ b/pkg/hfdownloader/manifest.go @@ -16,6 +16,9 @@ import ( // ManifestFilename is the standard name for manifest files. const ManifestFilename = "hfd.yaml" +// FlatIndexDirname is where flat-mode repo manifests are stored. +const FlatIndexDirname = ".hfd-flat-index" + // DownloadManifest captures the complete record of a download job. // This file is written to the friendly view root (e.g., models/Owner/Repo/hfd.yaml) // and contains all information needed to understand and reproduce the download. @@ -140,6 +143,38 @@ func (m *DownloadManifest) Write(friendlyPath string) (string, error) { return manifestPath, nil } +// FlatIndexPath returns the flat-mode index manifest path for a repo. +func FlatIndexPath(baseDir, repo string) string { + name := strings.ReplaceAll(repo, "/", "--") + ".yaml" + return filepath.Join(baseDir, FlatIndexDirname, name) +} + +// WriteFlatIndex writes a manifest index for flat-mode downloads. +func (m *DownloadManifest) WriteFlatIndex(baseDir string) (string, error) { + manifestPath := FlatIndexPath(baseDir, m.Repo) + manifestDir := filepath.Dir(manifestPath) + + if err := os.MkdirAll(manifestDir, 0o755); err != nil { + return "", fmt.Errorf("create flat index dir: %w", err) + } + + data, err := yaml.Marshal(m) + if err != nil { + return "", fmt.Errorf("marshal flat index manifest: %w", err) + } + + header := fmt.Sprintf("# HuggingFace %s: %s\n", m.Type, m.Repo) + header += fmt.Sprintf("# Downloaded: %s\n", m.CompletedAt.Format(time.RFC3339)) + header += "# Flat mode index generated by hfdownloader\n\n" + content := header + string(data) + + if err := os.WriteFile(manifestPath, []byte(content), 0o644); err != nil { + return "", fmt.Errorf("write flat index manifest: %w", err) + } + + return manifestPath, nil +} + // ReadManifest reads a manifest from a file. func ReadManifest(path string) (*DownloadManifest, error) { data, err := os.ReadFile(path) diff --git a/pkg/hfdownloader/plan.go b/pkg/hfdownloader/plan.go index a9715b6..8ce7993 100644 --- a/pkg/hfdownloader/plan.go +++ b/pkg/hfdownloader/plan.go @@ -155,7 +155,10 @@ func scanRepo(ctx context.Context, httpc *http.Client, token string, job Job, cf // destinationBase returns the base output directory for a job. func destinationBase(job Job, cfg Settings) string { - // Always OutputDir/; per-file filter subdirs are applied in Download(). + // Legacy layout defaults to OutputDir/. Flat mode can opt out. + if cfg.NoRepoSubdir { + return cfg.OutputDir + } return filepath.Join(cfg.OutputDir, job.Repo) } diff --git a/pkg/hfdownloader/types.go b/pkg/hfdownloader/types.go index c7d8966..5cbdbd4 100644 --- a/pkg/hfdownloader/types.go +++ b/pkg/hfdownloader/types.go @@ -122,6 +122,11 @@ type Settings struct { // Will be removed in a future version. OutputDir string + // NoRepoSubdir controls legacy OutputDir layout. + // When false (default), files are written under OutputDir///. + // When true, files are written directly under OutputDir/. + NoRepoSubdir bool + // Concurrency is the number of parallel HTTP connections per file // when using multipart downloads. Higher values can improve speed // on fast networks but increase memory usage.