From 1f00c61551ceb6b852e82d5b93e04b2201ea5418 Mon Sep 17 00:00:00 2001 From: RoboShyim Date: Sat, 31 Jan 2026 09:07:41 +0000 Subject: [PATCH] Implement PURGE by URL via URL index --- src/cache.rs | 17 ++++++++++++- src/disk.rs | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/purge.rs | 23 ++++++++++------- 3 files changed, 98 insertions(+), 12 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index 229eaf7..77440dc 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -36,6 +36,11 @@ impl Cache { let inner = self.inner.read(); inner.disk.remove_by_tags(tags) } + + pub fn purge_url(&self, normalized_url: &str) -> Result { + let inner = self.inner.read(); + inner.disk.remove_by_url(normalized_url) + } } pub async fn handle_cached( @@ -100,7 +105,15 @@ pub async fn handle_cached( // Strip Set-Cookie on cacheable responses resp_headers.remove(http::header::SET_COOKIE); - store(&state.cache, &cache_key, status, &resp_headers, &bytes, ttl)?; + store( + &state.cache, + &cache_key, + &norm_uri, + status, + &resp_headers, + &bytes, + ttl, + )?; } Ok(build_response(status, resp_headers, bytes, &norm_uri)) @@ -190,6 +203,7 @@ fn lookup( fn store( cache: &Cache, key: &str, + url: &Uri, status: http::StatusCode, headers: &HeaderMap, body: &Bytes, @@ -213,6 +227,7 @@ fn store( .map(|s| s.to_string()); let meta = disk::StoredMeta { + url: url.to_string(), stored_at_ms: disk::now_ms(), ttl_ms: ttl.as_millis() as u64, grace_ms: grace.as_millis() as u64, diff --git a/src/disk.rs b/src/disk.rs index d0a31cf..1d3b9c7 100644 --- a/src/disk.rs +++ b/src/disk.rs @@ -18,6 +18,12 @@ pub struct DiskStore { #[derive(Debug, Serialize, Deserialize)] pub struct StoredMeta { + /// Normalized URL (path + sorted query) for this object. + /// + /// Added later; may be absent for older entries. + #[serde(default)] + pub url: String, + pub stored_at_ms: u64, pub ttl_ms: u64, pub grace_ms: u64, @@ -69,7 +75,7 @@ impl DiskStore { fs::write(dir.join("meta.json"), meta_bytes).map_err(|e| format!("write meta: {e}"))?; fs::write(dir.join("body.bin"), body).map_err(|e| format!("write body: {e}"))?; - // Update tag index in sled: tag: -> Vec + // Update tag index in sled: tag: -> Set for tag in &meta.tags { let k = format!("tag:{tag}"); let mut set: std::collections::BTreeSet = self @@ -85,6 +91,22 @@ impl DiskStore { .map_err(|e| format!("sled insert: {e}"))?; } + // Update URL index: url: -> Set + if !meta.url.is_empty() { + let k = format!("url:{}", meta.url); + let mut set: std::collections::BTreeSet = self + .db + .get(&k) + .map_err(|e| format!("sled get: {e}"))? + .map(|v| bincode::deserialize(&v).unwrap_or_default()) + .unwrap_or_default(); + set.insert(key.to_string()); + let enc = bincode::serialize(&set).map_err(|e| format!("bincode: {e}"))?; + self.db + .insert(k.as_bytes(), enc) + .map_err(|e| format!("sled insert: {e}"))?; + } + self.db.flush().map_err(|e| format!("sled flush: {e}"))?; Ok(()) } @@ -96,7 +118,7 @@ impl DiskStore { return Ok(false); } - // Read meta to remove tag index + // Read meta to remove secondary indexes if let Ok(meta_bytes) = fs::read(dir.join("meta.json")) { if let Ok(meta) = serde_json::from_slice::(&meta_bytes) { for tag in meta.tags { @@ -118,6 +140,26 @@ impl DiskStore { } } } + + if !meta.url.is_empty() { + let k = format!("url:{}", meta.url); + if let Some(v) = self.db.get(&k).map_err(|e| format!("sled get: {e}"))? { + let mut set: std::collections::BTreeSet = + bincode::deserialize(&v).unwrap_or_default(); + set.remove(key); + if set.is_empty() { + self.db + .remove(k.as_bytes()) + .map_err(|e| format!("sled remove: {e}"))?; + } else { + let enc = + bincode::serialize(&set).map_err(|e| format!("bincode: {e}"))?; + self.db + .insert(k.as_bytes(), enc) + .map_err(|e| format!("sled insert: {e}"))?; + } + } + } } } @@ -148,6 +190,30 @@ impl DiskStore { } Ok(gone) } + + pub fn remove_by_url(&self, normalized_url: &str) -> Result { + let _g = self.lock.lock(); + let idx_key = format!("url:{normalized_url}"); + let mut keys: std::collections::BTreeSet = Default::default(); + + if let Some(v) = self + .db + .get(&idx_key) + .map_err(|e| format!("sled get: {e}"))? + { + let set: std::collections::BTreeSet = + bincode::deserialize(&v).unwrap_or_default(); + keys.extend(set); + } + + let mut gone = 0; + for key in keys { + if self.remove_key(&key)? { + gone += 1; + } + } + Ok(gone) + } } pub fn headers_to_pairs(headers: &HeaderMap) -> Vec<(String, String)> { diff --git a/src/purge.rs b/src/purge.rs index 4a2c0f0..87137b2 100644 --- a/src/purge.rs +++ b/src/purge.rs @@ -20,11 +20,14 @@ pub fn handle_purge( } } - // TODO: implement URL->keys index (respect variants). For now: return 501 so it isn't misleading. - ( - StatusCode::NOT_IMPLEMENTED, - format!("PURGE-by-URL not implemented (requested {})", uri.path()), - ) + let normalized = crate::normalize::normalize_uri(uri); + match cache.purge_url(&normalized.to_string()) { + Ok(gone) => ( + StatusCode::OK, + format!("Invalidated {gone} objects for {normalized}"), + ), + Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e), + } } pub fn handle_ban(_cache: std::sync::Arc, uri: &Uri) -> (StatusCode, String) { @@ -58,15 +61,17 @@ mod tests { } #[test] - fn purge_without_xkey_returns_not_implemented() { + fn purge_by_url_is_ok_even_when_nothing_matches() { let dir = tempfile::tempdir().unwrap(); let cache = std::sync::Arc::new(Cache::new(dir.path().to_str().unwrap()).unwrap()); - let uri: Uri = "/foo".parse().unwrap(); + // Includes tracking params; purge should normalize first. + let uri: Uri = "/foo?utm_source=x&a=1".parse().unwrap(); let headers = HeaderMap::new(); let (status, body) = handle_purge(cache, &uri, &headers); - assert_eq!(status, StatusCode::NOT_IMPLEMENTED); - assert!(body.contains("PURGE-by-URL not implemented")); + assert_eq!(status, StatusCode::OK); + assert!(body.contains("Invalidated")); + assert!(body.contains("/foo?a=1")); } }