From ef586174613bc82b52aa73dd311d7d9f17699835 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 19:48:24 +0800 Subject: [PATCH 01/13] fix(media): validate Content-Type and magic bytes before sending to model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #776. When a Slack bot token lacks the `files:read` OAuth scope, Slack serves the workspace login HTML page (~55 KB) at HTTP 200 with a `text/html` Content-Type instead of the requested file binary. `download_and_encode_image` previously accepted this response because: 1. It never inspected the HTTP response `Content-Type` header. 2. On `resize_and_compress` failure for a body ≤ 1 MB it fell back to forwarding the raw bytes under the Slack-reported MIME (`image/png`), bypassing any format check. The result: a `ContentBlock::Image { media_type: "image/png", data: }` flowed through to Anthropic, which 400'd with "Could not process image". Because claude-agent-acp persists the user message into the session JSONL before the API reply, the bad block replayed on every subsequent turn in that Slack thread until an operator manually deleted the JSONL inside the pod. Changes: - Add `MediaFetchError` enum to `src/media.rs` so callers can distinguish "not an image, skip silently" (`NotAnImage`) from "claimed image, got unexpected bytes" (`UnsupportedResponseType`, `InvalidImageBody`). - Add `validate_image_response(content_type, body)` pure helper that: - Rejects any HTTP response whose Content-Type (stripped of params, lowercased) is not in `{image/png, image/jpeg, image/gif, image/webp}`. - Sniffs magic bytes via `image::ImageReader::with_guessed_format()` (no new dependencies) and rejects anything that doesn't decode as one of the four supported formats. - Change `download_and_encode_image` signature from `-> Option` to `-> Result`, capturing the Content-Type header before consuming the response with `.bytes()`. - Remove the ≤ 1 MB resize-error fallback that was the direct bug path. - Update `src/slack.rs` call site: on validation failure, collect filenames and post one aggregated user-visible warning to the Slack thread: ":warning: I couldn't access the file(s) you shared (``). This often means the bot is missing the `files:read` OAuth scope. Please ask an admin to reinstall the app with that scope." - Update `src/discord.rs` call site: `warn!` log on failure (Discord URLs are signed-public so the Slack scope hint is not applicable there). Preserve the existing `is_video_file` fallback for `Err(NotAnImage)`. - Add 12 unit tests for `validate_image_response` including the exact bug repro case (HTML body labeled `image/png`, first 8 bytes `3c21444f43545950`). Out of scope / follow-up issues: - Secondary defense: deferring claude-agent-acp JSONL persistence until after model returns 200 (requires changes in the claude-agent-acp Node project). - Startup preflight calling Slack `auth.test` to warn loudly on missing scopes. - Same Content-Type/magic-byte hardening for `download_and_transcribe` and `download_and_read_text_file`. Co-Authored-By: Claude Opus 4.7 --- src/discord.rs | 52 ++++++--- src/media.rs | 301 ++++++++++++++++++++++++++++++++++++++++++++++--- src/slack.rs | 63 +++++++++-- 3 files changed, 370 insertions(+), 46 deletions(-) diff --git a/src/discord.rs b/src/discord.rs index ce2b5e11..a811cfbc 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -702,25 +702,43 @@ impl EventHandler for Handler { debug!(filename = %attachment.filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - &attachment.url, - attachment.content_type.as_deref(), - &attachment.filename, - u64::from(attachment.size), - None, - ) - .await - { - debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); - extra_blocks.push(block); - } else if media::is_video_file(&attachment.filename, attachment.content_type.as_deref()) { - debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); - extra_blocks.push(video_attachment_block( - &attachment.filename, + } else { + match media::download_and_encode_image( + &attachment.url, attachment.content_type.as_deref(), + &attachment.filename, u64::from(attachment.size), - &attachment.url, - )); + None, + ) + .await + { + Ok(block) => { + debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => { + if media::is_video_file( + &attachment.filename, + attachment.content_type.as_deref(), + ) { + debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); + extra_blocks.push(video_attachment_block( + &attachment.filename, + attachment.content_type.as_deref(), + u64::from(attachment.size), + &attachment.url, + )); + } + } + Err(e) => { + tracing::warn!( + url = %attachment.url, + filename = %attachment.filename, + error = %e, + "image attachment failed" + ); + } + } } } diff --git a/src/media.rs b/src/media.rs index b42cfa6e..1f3b4fa0 100644 --- a/src/media.rs +++ b/src/media.rs @@ -21,7 +21,111 @@ const IMAGE_MAX_DIMENSION_PX: u32 = 1200; /// JPEG quality for compressed output. const IMAGE_JPEG_QUALITY: u8 = 75; +/// Error variants for `download_and_encode_image`. +#[derive(Debug)] +pub enum MediaFetchError { + /// URL empty or MIME/filename doesn't indicate an image; skip silently. + NotAnImage, + /// HTTP response Content-Type is not a supported image format. + UnsupportedResponseType { + hinted: Option, + actual: Option, + }, + /// Response body magic bytes don't match a supported image format. + InvalidImageBody { magic_prefix_hex: String }, + /// File exceeds the configured size limit. + SizeExceeded { actual: u64, limit: u64 }, + /// Network-level error (send or body-read). + Network(reqwest::Error), + /// Server returned a non-success HTTP status. + HttpStatus(reqwest::StatusCode), +} + +impl std::fmt::Display for MediaFetchError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotAnImage => write!(f, "not an image attachment"), + Self::UnsupportedResponseType { hinted, actual } => write!( + f, + "server returned unexpected content type (hinted: {}, actual: {})", + hinted.as_deref().unwrap_or("none"), + actual.as_deref().unwrap_or("none"), + ), + Self::InvalidImageBody { magic_prefix_hex } => write!( + f, + "response body is not a valid image (first 8 bytes: {magic_prefix_hex})" + ), + Self::SizeExceeded { actual, limit } => { + write!(f, "file size {actual} exceeds limit {limit}") + } + Self::Network(e) => write!(f, "network error: {e}"), + Self::HttpStatus(s) => write!(f, "HTTP {s}"), + } + } +} + +/// Format the first 8 bytes of a buffer as lowercase hex (no separator). +fn hex_prefix(body: &[u8]) -> String { + body.iter() + .take(8) + .map(|b| format!("{b:02x}")) + .collect::>() + .concat() +} + +/// Allowed MIME types for model-bound images. +const ALLOWED_IMAGE_MIME: &[&str] = &["image/png", "image/jpeg", "image/gif", "image/webp"]; + +/// Validate the HTTP response Content-Type and body magic bytes. +/// +/// Returns the detected `ImageFormat` on success. Both checks are applied: +/// the Content-Type allow-list rejects non-image responses early (e.g. Slack +/// serving an HTML login page when `files:read` scope is missing), and the +/// magic-byte sniff catches corrupted or mislabeled bodies regardless of the +/// header. +fn validate_image_response( + content_type: Option<&str>, + body: &[u8], +) -> Result { + if let Some(ct) = content_type { + let base = ct.split(';').next().unwrap_or(ct).trim().to_lowercase(); + if !ALLOWED_IMAGE_MIME.contains(&base.as_str()) { + return Err(MediaFetchError::UnsupportedResponseType { + hinted: None, + actual: Some(base), + }); + } + } + + let reader = match ImageReader::new(Cursor::new(body)).with_guessed_format() { + Ok(r) => r, + Err(_) => { + return Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }); + } + }; + + match reader.format() { + Some( + image::ImageFormat::Png + | image::ImageFormat::Jpeg + | image::ImageFormat::Gif + | image::ImageFormat::WebP, + ) => Ok(reader.format().unwrap()), + _ => Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }), + } +} + /// Download an image from a URL, resize/compress it, and return as a ContentBlock. +/// +/// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't +/// indicate an image (silent skip). Returns other `Err` variants when the +/// download succeeded but the response bytes fail Content-Type or magic-byte +/// validation — callers should surface these to the user. +/// /// Pass `auth_token` for platforms that require authentication (e.g. Slack private files). pub async fn download_and_encode_image( url: &str, @@ -29,11 +133,11 @@ pub async fn download_and_encode_image( filename: &str, size: u64, auth_token: Option<&str>, -) -> Option { +) -> Result { const MAX_SIZE: u64 = 10 * 1024 * 1024; // 10 MB if url.is_empty() { - return None; + return Err(MediaFetchError::NotAnImage); } let mime = mime_hint.or_else(|| { @@ -51,17 +155,20 @@ pub async fn download_and_encode_image( let Some(mime) = mime else { debug!(filename, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); }; let mime = mime.split(';').next().unwrap_or(mime).trim(); if !mime.starts_with("image/") { debug!(filename, mime, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); } if size > MAX_SIZE { error!(filename, size, "image exceeds 10MB limit"); - return None; + return Err(MediaFetchError::SizeExceeded { + actual: size, + limit: MAX_SIZE, + }); } let mut req = HTTP_CLIENT.get(url); @@ -73,39 +180,63 @@ pub async fn download_and_encode_image( Ok(resp) => resp, Err(e) => { error!(url, error = %e, "download failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; if !response.status().is_success() { error!(url, status = %response.status(), "HTTP error downloading image"); - return None; + return Err(MediaFetchError::HttpStatus(response.status())); } + + // Capture Content-Type BEFORE .bytes() consumes the response. + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(str::to_string); + let bytes = match response.bytes().await { Ok(b) => b, Err(e) => { error!(url, error = %e, "read failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; if bytes.len() as u64 > MAX_SIZE { + error!(filename, size = bytes.len(), "downloaded image exceeds limit"); + return Err(MediaFetchError::SizeExceeded { + actual: bytes.len() as u64, + limit: MAX_SIZE, + }); + } + + // Validate Content-Type and magic bytes before processing. + if let Err(e) = validate_image_response(content_type.as_deref(), &bytes) { error!( filename, - size = bytes.len(), - "downloaded image exceeds limit" + mime_hint = mime, + content_type = content_type.as_deref().unwrap_or("none"), + magic = hex_prefix(&bytes), + error = %e, + "image validation failed — body is not a supported image" ); - return None; + return Err(e); } let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { - if bytes.len() > 1024 * 1024 { - error!(filename, error = %e, size = bytes.len(), "resize failed and original too large, skipping"); - return None; - } - debug!(filename, error = %e, "resize failed, using original"); - (bytes.to_vec(), mime.to_string()) + error!( + filename, + error = %e, + magic = hex_prefix(&bytes), + size = bytes.len(), + "resize failed after successful validation" + ); + return Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(&bytes), + }); } }; @@ -117,7 +248,7 @@ pub async fn download_and_encode_image( ); let encoded = BASE64.encode(&output_bytes); - Some(ContentBlock::Image { + Ok(ContentBlock::Image { media_type: output_mime, data: encoded, }) @@ -348,6 +479,13 @@ mod tests { buf.into_inner() } + fn make_jpeg(width: u32, height: u32) -> Vec { + let img = image::RgbImage::new(width, height); + let mut buf = Cursor::new(Vec::new()); + img.write_to(&mut buf, image::ImageFormat::Jpeg).unwrap(); + buf.into_inner() + } + #[test] fn large_image_resized_to_max_dimension() { let png = make_png(3000, 2000); @@ -429,4 +567,131 @@ mod tests { assert!(is_video_file("clip.MOV", None)); assert!(!is_video_file("notes.txt", Some("text/plain"))); } + + // --- validate_image_response tests --- + + #[test] + fn validate_accepts_png_with_matching_content_type() { + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png"), &png).is_ok()); + } + + #[test] + fn validate_accepts_jpeg_with_matching_content_type() { + let jpeg = make_jpeg(1, 1); + assert!(validate_image_response(Some("image/jpeg"), &jpeg).is_ok()); + } + + #[test] + fn validate_accepts_gif_with_matching_content_type() { + let gif: Vec = vec![ + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x2C, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, + 0x3B, + ]; + assert!(validate_image_response(Some("image/gif"), &gif).is_ok()); + } + + #[test] + fn validate_accepts_missing_content_type_with_valid_png() { + // When Content-Type header is absent, fall back to magic-byte detection. + let png = make_png(1, 1); + assert!(validate_image_response(None, &png).is_ok()); + } + + #[test] + fn validate_content_type_strips_params() { + // "image/png; charset=binary" is a real header value — must be accepted. + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png; charset=binary"), &png).is_ok()); + } + + /// Exact reproduction of issue #776: Slack serves the workspace login HTML + /// page at HTTP 200 when the bot token lacks the `files:read` scope. + /// The Slack file metadata says `mimetype: image/png`; the response body + /// magic bytes are `Slack login"; + let result = validate_image_response(Some("image/png"), html_body); + match result { + Err(MediaFetchError::InvalidImageBody { magic_prefix_hex }) => { + assert_eq!(magic_prefix_hex, "3c21444f43545950"); + } + other => panic!("expected InvalidImageBody, got {other:?}"), + } + } + + #[test] + fn validate_rejects_text_html_content_type() { + // Even if the body were a valid image, a text/html Content-Type must be rejected. + let png = make_png(1, 1); + let result = validate_image_response(Some("text/html; charset=utf-8"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + + #[test] + fn validate_rejects_application_json_content_type() { + let png = make_png(1, 1); + let result = validate_image_response(Some("application/json"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + + #[test] + fn validate_rejects_empty_body() { + let result = validate_image_response(Some("image/png"), &[]); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn validate_rejects_truncated_png_header() { + // PNG magic is 8 bytes; 4 bytes is not enough to identify the format. + let truncated = [0x89u8, 0x50, 0x4e, 0x47]; + let result = validate_image_response(Some("image/png"), &truncated); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn media_fetch_error_display_renders() { + let _ = MediaFetchError::NotAnImage.to_string(); + let _ = MediaFetchError::UnsupportedResponseType { + hinted: Some("image/png".into()), + actual: Some("text/html".into()), + } + .to_string(); + let _ = MediaFetchError::InvalidImageBody { + magic_prefix_hex: "3c21444f43545950".into(), + } + .to_string(); + let _ = MediaFetchError::SizeExceeded { + actual: 11_000_000, + limit: 10_000_000, + } + .to_string(); + let _ = MediaFetchError::HttpStatus(reqwest::StatusCode::UNAUTHORIZED).to_string(); + } + + #[test] + fn hex_prefix_formats_first_8_bytes() { + let bytes = b""; + assert_eq!(hex_prefix(bytes), "3c21444f43545950"); + } + + #[test] + fn hex_prefix_handles_short_buffer() { + let bytes = [0xffu8, 0xd8]; + assert_eq!(hex_prefix(&bytes), "ffd8"); + } } diff --git a/src/slack.rs b/src/slack.rs index cbe101f2..e04497c2 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -934,6 +934,7 @@ async fn handle_message( let mut echo_entries: Vec = Vec::new(); let mut text_file_bytes: u64 = 0; let mut text_file_count: u32 = 0; + let mut failed_image_files: Vec = Vec::new(); if let Some(files) = files { for file in files { @@ -1032,21 +1033,61 @@ async fn handle_message( debug!(filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - url, - Some(mimetype), - filename, - size, - Some(bot_token), - ) - .await - { - debug!(filename, "adding image attachment"); - extra_blocks.push(block); + } else { + match media::download_and_encode_image( + url, + Some(mimetype), + filename, + size, + Some(bot_token), + ) + .await + { + Ok(block) => { + debug!(filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => {} + Err( + media::MediaFetchError::UnsupportedResponseType { .. } + | media::MediaFetchError::InvalidImageBody { .. }, + ) => { + warn!( + filename, + "image validation failed; server may have returned HTML (missing files:read scope?)" + ); + failed_image_files.push(filename.to_string()); + } + Err(e) => { + warn!(filename, error = %e, "image download failed"); + } + } } } } + // Notify user if any images couldn't be validated (likely missing files:read scope). + if !failed_image_files.is_empty() { + let warn_channel = ChannelRef { + platform: "slack".into(), + channel_id: channel_id.clone(), + thread_id: thread_ts.clone().or_else(|| Some(ts.clone())), + parent_id: None, + origin_event_id: None, + }; + let file_list = failed_image_files.join("`, `"); + let _ = adapter + .send_message( + &warn_channel, + &format!( + ":warning: I couldn't access the file(s) you shared (`{file_list}`). \ + This often means the bot is missing the `files:read` OAuth scope. \ + Please ask an admin to reinstall the app with that scope." + ), + ) + .await; + } + // Resolve Slack display name (best-effort, fallback to user_id) let display_name = adapter .resolve_user_name(&user_id) From 8aef57dd10c611d8811ab8866448203d533eef11 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 20:05:29 +0800 Subject: [PATCH 02/13] refactor(media): simplify per /simplify review - Remove dead hinted field from UnsupportedResponseType (always None) - Eliminate double reader.format() call with fmt@ binding - Deduplicate hex_prefix() in resize error path (compute once, reuse) - Promote strip_mime_params to media::strip_mime_params (pub crate), slack.rs delegates to it -- single source of truth for MIME stripping Co-Authored-By: Claude Opus 4.7 --- src/media.rs | 34 +++++++++++++++++----------------- src/slack.rs | 5 +---- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/media.rs b/src/media.rs index 1f3b4fa0..b79afe97 100644 --- a/src/media.rs +++ b/src/media.rs @@ -27,10 +27,7 @@ pub enum MediaFetchError { /// URL empty or MIME/filename doesn't indicate an image; skip silently. NotAnImage, /// HTTP response Content-Type is not a supported image format. - UnsupportedResponseType { - hinted: Option, - actual: Option, - }, + UnsupportedResponseType { actual: Option }, /// Response body magic bytes don't match a supported image format. InvalidImageBody { magic_prefix_hex: String }, /// File exceeds the configured size limit. @@ -45,10 +42,9 @@ impl std::fmt::Display for MediaFetchError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::NotAnImage => write!(f, "not an image attachment"), - Self::UnsupportedResponseType { hinted, actual } => write!( + Self::UnsupportedResponseType { actual } => write!( f, - "server returned unexpected content type (hinted: {}, actual: {})", - hinted.as_deref().unwrap_or("none"), + "server returned unexpected content type (actual: {})", actual.as_deref().unwrap_or("none"), ), Self::InvalidImageBody { magic_prefix_hex } => write!( @@ -64,6 +60,11 @@ impl std::fmt::Display for MediaFetchError { } } +/// Strip MIME parameters and trim whitespace. `"image/png; charset=binary"` → `"image/png"`. +pub(crate) fn strip_mime_params(mime: &str) -> &str { + mime.split(';').next().unwrap_or(mime).trim() +} + /// Format the first 8 bytes of a buffer as lowercase hex (no separator). fn hex_prefix(body: &[u8]) -> String { body.iter() @@ -88,10 +89,9 @@ fn validate_image_response( body: &[u8], ) -> Result { if let Some(ct) = content_type { - let base = ct.split(';').next().unwrap_or(ct).trim().to_lowercase(); + let base = strip_mime_params(ct).to_lowercase(); if !ALLOWED_IMAGE_MIME.contains(&base.as_str()) { return Err(MediaFetchError::UnsupportedResponseType { - hinted: None, actual: Some(base), }); } @@ -108,11 +108,11 @@ fn validate_image_response( match reader.format() { Some( - image::ImageFormat::Png - | image::ImageFormat::Jpeg - | image::ImageFormat::Gif - | image::ImageFormat::WebP, - ) => Ok(reader.format().unwrap()), + fmt @ (image::ImageFormat::Png + | image::ImageFormat::Jpeg + | image::ImageFormat::Gif + | image::ImageFormat::WebP), + ) => Ok(fmt), _ => Err(MediaFetchError::InvalidImageBody { magic_prefix_hex: hex_prefix(body), }), @@ -227,15 +227,16 @@ pub async fn download_and_encode_image( let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { + let magic = hex_prefix(&bytes); error!( filename, error = %e, - magic = hex_prefix(&bytes), + magic = %magic, size = bytes.len(), "resize failed after successful validation" ); return Err(MediaFetchError::InvalidImageBody { - magic_prefix_hex: hex_prefix(&bytes), + magic_prefix_hex: magic, }); } }; @@ -667,7 +668,6 @@ mod tests { fn media_fetch_error_display_renders() { let _ = MediaFetchError::NotAnImage.to_string(); let _ = MediaFetchError::UnsupportedResponseType { - hinted: Some("image/png".into()), actual: Some("text/html".into()), } .to_string(); diff --git a/src/slack.rs b/src/slack.rs index e04497c2..b0ccf7f2 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1205,11 +1205,8 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str { .unwrap_or("") } -/// Strip MIME parameters like `; charset=utf-8` so type-detection helpers see -/// the bare media type. Slack occasionally sends mimetypes like -/// `text/plain; charset=utf-8`; `media::is_text_file` expects the bare form. fn strip_mime_params(mimetype: &str) -> &str { - mimetype.split(';').next().unwrap_or(mimetype).trim() + media::strip_mime_params(mimetype) } /// True only when a Slack non-bot event represents a real user message From 4c12d9a17287a4c6b5f740a42740d1fd9afd3b42 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 20:40:07 +0800 Subject: [PATCH 03/13] fix(media): address cross-model review findings Critical: change Content-Type check from allow-list to block-list (Codex finding). The allow-list rejected application/octet-stream before magic-byte check ran, silently dropping valid images from CDNs. Only text/* is now rejected early; everything else falls through to magic-byte verification. Also: - Soften Slack warning message: no longer attributes all failures to files:read scope; now mentions format support as a second cause - Add SizeExceeded to Slack user notification (was silent) - Log failures from send_message() instead of using let _ = - Log discarded io::Error from with_guessed_format - Fix doc comments: download_and_encode_image (SizeExceeded fires pre-HTTP), validate_image_response (Content-Type check short-circuits, not sequential) - Replace inline "Validate Content-Type..." comment with WHY explanation - Restore doc comment on strip_mime_params wrapper in slack.rs - Add tests: octet-stream acceptance (Codex regression fix), JSON body rejection by magic bytes, missing Content-Type + invalid body Co-Authored-By: Claude Opus 4.7 --- src/media.rs | 61 +++++++++++++++++++++++++++++++++++++--------------- src/slack.rs | 30 +++++++++++++++----------- 2 files changed, 62 insertions(+), 29 deletions(-) diff --git a/src/media.rs b/src/media.rs index b79afe97..21fde4fc 100644 --- a/src/media.rs +++ b/src/media.rs @@ -74,23 +74,21 @@ fn hex_prefix(body: &[u8]) -> String { .concat() } -/// Allowed MIME types for model-bound images. -const ALLOWED_IMAGE_MIME: &[&str] = &["image/png", "image/jpeg", "image/gif", "image/webp"]; - /// Validate the HTTP response Content-Type and body magic bytes. /// -/// Returns the detected `ImageFormat` on success. Both checks are applied: -/// the Content-Type allow-list rejects non-image responses early (e.g. Slack -/// serving an HTML login page when `files:read` scope is missing), and the -/// magic-byte sniff catches corrupted or mislabeled bodies regardless of the -/// header. +/// If Content-Type is present and explicitly non-binary (e.g. `text/html` from +/// Slack's auth redirect when `files:read` scope is missing), rejects immediately. +/// Generic types such as `application/octet-stream` and absent headers pass through +/// to the magic-byte check, which is the authoritative gate for image validity. fn validate_image_response( content_type: Option<&str>, body: &[u8], ) -> Result { + // Reject explicitly-text responses early (e.g. Slack HTML login page at HTTP 200). + // application/octet-stream and other generic types pass through to magic-byte check. if let Some(ct) = content_type { let base = strip_mime_params(ct).to_lowercase(); - if !ALLOWED_IMAGE_MIME.contains(&base.as_str()) { + if base.starts_with("text/") { return Err(MediaFetchError::UnsupportedResponseType { actual: Some(base), }); @@ -99,7 +97,8 @@ fn validate_image_response( let reader = match ImageReader::new(Cursor::new(body)).with_guessed_format() { Ok(r) => r, - Err(_) => { + Err(e) => { + error!(error = %e, "image format detection I/O error"); return Err(MediaFetchError::InvalidImageBody { magic_prefix_hex: hex_prefix(body), }); @@ -122,9 +121,11 @@ fn validate_image_response( /// Download an image from a URL, resize/compress it, and return as a ContentBlock. /// /// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't -/// indicate an image (silent skip). Returns other `Err` variants when the -/// download succeeded but the response bytes fail Content-Type or magic-byte -/// validation — callers should surface these to the user. +/// indicate an image — callers should skip silently. Returns +/// `Err(MediaFetchError::SizeExceeded)` when the declared `size` exceeds the limit +/// before any request is made. Returns other `Err` variants (`Network`, +/// `HttpStatus`, `UnsupportedResponseType`, `InvalidImageBody`) after a request +/// attempt — callers should surface these to the user. /// /// Pass `auth_token` for platforms that require authentication (e.g. Slack private files). pub async fn download_and_encode_image( @@ -211,7 +212,8 @@ pub async fn download_and_encode_image( }); } - // Validate Content-Type and magic bytes before processing. + // Guard against HTTP 200 responses that are error pages (e.g. Slack auth redirect + // when files:read scope is missing), and against corrupted or mislabeled bodies. if let Err(e) = validate_image_response(content_type.as_deref(), &bytes) { error!( filename, @@ -634,13 +636,38 @@ mod tests { )); } + /// Regression test for the application/octet-stream fix: CDNs and generic + /// file download endpoints commonly serve any file with this Content-Type. + /// The old allow-list incorrectly rejected it before magic-byte check. #[test] - fn validate_rejects_application_json_content_type() { + fn validate_accepts_octet_stream_with_valid_png() { let png = make_png(1, 1); - let result = validate_image_response(Some("application/json"), &png); + assert!( + validate_image_response(Some("application/octet-stream"), &png).is_ok(), + "application/octet-stream must pass through to magic-byte check" + ); + } + + /// application/json body is rejected by magic bytes, not by Content-Type. + #[test] + fn validate_rejects_json_body_by_magic_bytes() { + let json_body = b"{\"error\":\"invalid_auth\",\"ok\":false}"; + let result = validate_image_response(Some("application/json"), json_body); assert!(matches!( result, - Err(MediaFetchError::UnsupportedResponseType { .. }) + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + /// Missing Content-Type with invalid body: CDN stripping the header should + /// still be caught by magic-byte detection. + #[test] + fn validate_rejects_html_body_with_missing_content_type() { + let html_body = b"error page"; + let result = validate_image_response(None, html_body); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) )); } diff --git a/src/slack.rs b/src/slack.rs index b0ccf7f2..6beda7fb 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1048,13 +1048,17 @@ async fn handle_message( extra_blocks.push(block); } Err(media::MediaFetchError::NotAnImage) => {} + Err(media::MediaFetchError::SizeExceeded { actual, limit }) => { + warn!(filename, actual, limit, "image exceeds size limit"); + failed_image_files.push(format!("{filename} (exceeds {limit} byte limit)")); + } Err( media::MediaFetchError::UnsupportedResponseType { .. } | media::MediaFetchError::InvalidImageBody { .. }, ) => { warn!( filename, - "image validation failed; server may have returned HTML (missing files:read scope?)" + "image validation failed; server may have returned non-image content" ); failed_image_files.push(filename.to_string()); } @@ -1066,7 +1070,7 @@ async fn handle_message( } } - // Notify user if any images couldn't be validated (likely missing files:read scope). + // Notify user if any images couldn't be processed. if !failed_image_files.is_empty() { let warn_channel = ChannelRef { platform: "slack".into(), @@ -1076,16 +1080,14 @@ async fn handle_message( origin_event_id: None, }; let file_list = failed_image_files.join("`, `"); - let _ = adapter - .send_message( - &warn_channel, - &format!( - ":warning: I couldn't access the file(s) you shared (`{file_list}`). \ - This often means the bot is missing the `files:read` OAuth scope. \ - Please ask an admin to reinstall the app with that scope." - ), - ) - .await; + let msg = format!( + ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ + This can happen when the bot lacks the `files:read` OAuth scope, \ + or when the file format isn't supported (PNG/JPEG/GIF/WebP only)." + ); + if let Err(e) = adapter.send_message(&warn_channel, &msg).await { + warn!(error = %e, "failed to send image validation warning to user"); + } } // Resolve Slack display name (best-effort, fallback to user_id) @@ -1205,6 +1207,10 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str { .unwrap_or("") } +/// Strip MIME parameters so type-detection helpers see the bare media type. +/// Delegates to media::strip_mime_params (single source of truth). +/// Needed because Slack occasionally sends `text/plain; charset=utf-8` and +/// `media::is_text_file` expects the bare form. fn strip_mime_params(mimetype: &str) -> &str { media::strip_mime_params(mimetype) } From 4e1a6827affa9ae0734e43a1b4f2edd4bab822c4 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 11 May 2026 20:57:42 +0800 Subject: [PATCH 04/13] fix(slack): sanitize filenames in warning message (mrkdwn injection) Codex adversarial review found that user-controlled filenames embedded in the mrkdwn warning message could inject Slack markup (backtick break-out, mentions, <@uid> pings). Replace backticks and angle brackets with safe ASCII equivalents before embedding in the message. Co-Authored-By: Claude Opus 4.7 --- src/slack.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/slack.rs b/src/slack.rs index 6beda7fb..2d3a8f35 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1079,7 +1079,15 @@ async fn handle_message( parent_id: None, origin_event_id: None, }; - let file_list = failed_image_files.join("`, `"); + // Sanitize filenames before embedding in mrkdwn: backticks and angle- + // bracket sequences (`<@U...>`, ``) are Slack markup delimiters + // that would allow injection if the filename is user-controlled. + let sanitize = |s: &str| s.replace('`', "'").replace('<', "(").replace('>', ")"); + let file_list = failed_image_files + .iter() + .map(|n| sanitize(n)) + .collect::>() + .join("`, `"); let msg = format!( ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ This can happen when the bot lacks the `files:read` OAuth scope, \ From 225ea2fe2c7cef2a231c112f44f071099f7465ae Mon Sep 17 00:00:00 2001 From: shaun-agent Date: Wed, 13 May 2026 10:00:49 +0000 Subject: [PATCH 05/13] fix(media): validate GIF bodies before pass-through --- src/media.rs | 62 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/media.rs b/src/media.rs index 21fde4fc..e720ecee 100644 --- a/src/media.rs +++ b/src/media.rs @@ -2,7 +2,8 @@ use crate::acp::ContentBlock; use crate::config::SttConfig; use base64::engine::general_purpose::STANDARD as BASE64; use base64::Engine; -use image::ImageReader; +use image::codecs::gif::GifDecoder; +use image::{AnimationDecoder, ImageReader}; use std::io::Cursor; use std::sync::LazyLock; use tracing::{debug, error}; @@ -89,9 +90,7 @@ fn validate_image_response( if let Some(ct) = content_type { let base = strip_mime_params(ct).to_lowercase(); if base.starts_with("text/") { - return Err(MediaFetchError::UnsupportedResponseType { - actual: Some(base), - }); + return Err(MediaFetchError::UnsupportedResponseType { actual: Some(base) }); } } @@ -107,17 +106,27 @@ fn validate_image_response( match reader.format() { Some( - fmt @ (image::ImageFormat::Png - | image::ImageFormat::Jpeg - | image::ImageFormat::Gif - | image::ImageFormat::WebP), + fmt @ (image::ImageFormat::Png | image::ImageFormat::Jpeg | image::ImageFormat::WebP), ) => Ok(fmt), + Some(image::ImageFormat::Gif) => { + validate_gif_body(body).map_err(|_| MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + })?; + Ok(image::ImageFormat::Gif) + } _ => Err(MediaFetchError::InvalidImageBody { magic_prefix_hex: hex_prefix(body), }), } } +fn validate_gif_body(raw: &[u8]) -> image::ImageResult<()> { + GifDecoder::new(Cursor::new(raw))? + .into_frames() + .collect_frames()?; + Ok(()) +} + /// Download an image from a URL, resize/compress it, and return as a ContentBlock. /// /// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't @@ -205,7 +214,11 @@ pub async fn download_and_encode_image( }; if bytes.len() as u64 > MAX_SIZE { - error!(filename, size = bytes.len(), "downloaded image exceeds limit"); + error!( + filename, + size = bytes.len(), + "downloaded image exceeds limit" + ); return Err(MediaFetchError::SizeExceeded { actual: bytes.len() as u64, limit: MAX_SIZE, @@ -304,6 +317,7 @@ pub fn resize_and_compress(raw: &[u8]) -> Result<(Vec, String), image::Image let format = reader.format(); if format == Some(image::ImageFormat::Gif) { + validate_gif_body(raw)?; return Ok((raw.to_vec(), "image/gif".to_string())); } @@ -489,6 +503,14 @@ mod tests { buf.into_inner() } + fn make_gif() -> Vec { + vec![ + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, 0x3B, + ] + } + #[test] fn large_image_resized_to_max_dimension() { let png = make_png(3000, 2000); @@ -546,11 +568,7 @@ mod tests { #[test] fn gif_passes_through_unchanged() { - let gif: Vec = vec![ - 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x2C, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, - 0x3B, - ]; + let gif = make_gif(); let (output, mime) = resize_and_compress(&gif).unwrap(); assert_eq!(mime, "image/gif"); @@ -587,14 +605,20 @@ mod tests { #[test] fn validate_accepts_gif_with_matching_content_type() { - let gif: Vec = vec![ - 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x2C, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, - 0x3B, - ]; + let gif = make_gif(); assert!(validate_image_response(Some("image/gif"), &gif).is_ok()); } + #[test] + fn validate_rejects_corrupt_gif_body() { + let corrupt_gif = b"GIF89a\x01\x00\x01\x00\x00\x00\x00"; + let result = validate_image_response(Some("image/gif"), corrupt_gif); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + #[test] fn validate_accepts_missing_content_type_with_valid_png() { // When Content-Type header is absent, fall back to magic-byte detection. From 63c4b8dd319dbd5aab84b55b67ba0bf18ccce868 Mon Sep 17 00:00:00 2001 From: shaun-agent Date: Wed, 13 May 2026 10:02:20 +0000 Subject: [PATCH 06/13] style(discord): apply rustfmt --- src/discord.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/discord.rs b/src/discord.rs index a811cfbc..0cf06986 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -89,7 +89,10 @@ impl ChatAdapter for DiscordAdapter { let builder = serenity::builder::CreateMessage::new() .content(content) .reference_message((ChannelId::new(ch_id), MessageId::new(msg_id))); - match ChannelId::new(ch_id).send_message(&self.http, builder).await { + match ChannelId::new(ch_id) + .send_message(&self.http, builder) + .await + { Ok(msg) => Ok(MessageRef { channel: channel.clone(), message_id: msg.id.to_string(), @@ -105,7 +108,9 @@ impl ChatAdapter for DiscordAdapter { async fn delete_message(&self, msg: &MessageRef) -> anyhow::Result<()> { let ch_id: u64 = Self::resolve_channel(&msg.channel).parse()?; let msg_id: u64 = msg.message_id.parse()?; - self.http.delete_message(ChannelId::new(ch_id), MessageId::new(msg_id), None).await?; + self.http + .delete_message(ChannelId::new(ch_id), MessageId::new(msg_id), None) + .await?; Ok(()) } @@ -415,10 +420,13 @@ impl EventHandler for Handler { let in_allowed_channel = self.allow_all_channels || self.allowed_channels.contains(&channel_id); - let is_mentioned = - msg.mentions_user_id(bot_id) || msg.content.contains(&format!("<@{}>", bot_id)) + let is_mentioned = msg.mentions_user_id(bot_id) + || msg.content.contains(&format!("<@{}>", bot_id)) || (!self.allowed_role_ids.is_empty() - && msg.mention_roles.iter().any(|r| self.allowed_role_ids.contains(&r.get()))); + && msg + .mention_roles + .iter() + .any(|r| self.allowed_role_ids.contains(&r.get()))); // Bot message gating (from upstream #321) if msg.author.bot { @@ -1341,7 +1349,9 @@ fn resolve_mentions(content: &str, bot_id: UserId, allowed_role_ids: &HashSet", id), "")) + allowed_role_ids + .iter() + .fold(out, |s, id| s.replace(&format!("<@&{}>", id), "")) }; // 3. Other user mentions: keep <@UID> as-is so the LLM can mention back // 4. Fallback: replace remaining role mentions only (user mentions are preserved) From 8dd6d288bf07595b9288e3115fc058b81112ca54 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Wed, 13 May 2026 20:56:40 +0800 Subject: [PATCH 07/13] fix(media): address chaodu-agent review findings (F1-F4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F1: validate_gif_body now decodes only the first frame instead of collect_frames() — avoids full in-memory decode of large animated GIFs. F2: remove duplicate validate_gif_body call from resize_and_compress; download_and_encode_image already runs validate_image_response before calling resize, so the second call was redundant. F3: add MediaFetchError::ProcessingFailed(image::ImageError) for the case where body passed validation but resize/compress failed — previously returned the misleading InvalidImageBody variant for a validated image. F4: extend Slack warning message to mention "file is too large" so the message is accurate when SizeExceeded failures are included. Co-Authored-By: Claude Sonnet 4.6 --- src/media.rs | 28 +++++++++++++++++++--------- src/slack.rs | 3 ++- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/media.rs b/src/media.rs index e720ecee..8fbe4935 100644 --- a/src/media.rs +++ b/src/media.rs @@ -37,6 +37,8 @@ pub enum MediaFetchError { Network(reqwest::Error), /// Server returned a non-success HTTP status. HttpStatus(reqwest::StatusCode), + /// Body was a valid image but post-processing (resize/compress) failed. + ProcessingFailed(image::ImageError), } impl std::fmt::Display for MediaFetchError { @@ -57,6 +59,7 @@ impl std::fmt::Display for MediaFetchError { } Self::Network(e) => write!(f, "network error: {e}"), Self::HttpStatus(s) => write!(f, "HTTP {s}"), + Self::ProcessingFailed(e) => write!(f, "image processing failed: {e}"), } } } @@ -121,9 +124,14 @@ fn validate_image_response( } fn validate_gif_body(raw: &[u8]) -> image::ImageResult<()> { - GifDecoder::new(Cursor::new(raw))? - .into_frames() - .collect_frames()?; + let decoder = GifDecoder::new(Cursor::new(raw))?; + let mut frames = decoder.into_frames(); + frames.next().ok_or_else(|| { + image::ImageError::Decoding(image::error::DecodingError::new( + image::error::ImageFormatHint::Exact(image::ImageFormat::Gif), + "GIF has no frames", + )) + })??; Ok(()) } @@ -242,17 +250,13 @@ pub async fn download_and_encode_image( let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { - let magic = hex_prefix(&bytes); error!( filename, error = %e, - magic = %magic, size = bytes.len(), "resize failed after successful validation" ); - return Err(MediaFetchError::InvalidImageBody { - magic_prefix_hex: magic, - }); + return Err(MediaFetchError::ProcessingFailed(e)); } }; @@ -317,7 +321,6 @@ pub fn resize_and_compress(raw: &[u8]) -> Result<(Vec, String), image::Image let format = reader.format(); if format == Some(image::ImageFormat::Gif) { - validate_gif_body(raw)?; return Ok((raw.to_vec(), "image/gif".to_string())); } @@ -732,6 +735,13 @@ mod tests { } .to_string(); let _ = MediaFetchError::HttpStatus(reqwest::StatusCode::UNAUTHORIZED).to_string(); + let _ = MediaFetchError::ProcessingFailed(image::ImageError::Unsupported( + image::error::UnsupportedError::from_format_and_kind( + image::error::ImageFormatHint::Unknown, + image::error::UnsupportedErrorKind::Color(image::ExtendedColorType::Rgba16), + ), + )) + .to_string(); } #[test] diff --git a/src/slack.rs b/src/slack.rs index 2d3a8f35..6782a1f5 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1091,7 +1091,8 @@ async fn handle_message( let msg = format!( ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ This can happen when the bot lacks the `files:read` OAuth scope, \ - or when the file format isn't supported (PNG/JPEG/GIF/WebP only)." + the file format isn't supported (PNG/JPEG/GIF/WebP only), \ + or the file is too large." ); if let Err(e) = adapter.send_message(&warn_channel, &msg).await { warn!(error = %e, "failed to send image validation warning to user"); From 49677187d6261f2be30ef44824bf10c5cd201477 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Wed, 13 May 2026 22:22:51 +0800 Subject: [PATCH 08/13] fix(media): address mob review findings (R1+R2 all 8 items) Behavior: - slack: add explicit ProcessingFailed arm -> push to failed_image_files and log "post-processing failed" (not "download failed") - slack: extract sanitize_slack_filename() pub(crate) fn; add 4 unit tests for backtick/angle-bracket injection prevention API: - validate_image_response: change return type Result -> Result<()> (sole caller only checked Ok/Err; format detection ran twice) Docs: - validate_image_response: add block-list vs allow-list design rationale - validate_gif_body: add doc comment explaining first-frame-only and cursor independence; log original error via debug! before mapping to InvalidImageBody - ProcessingFailed variant: expand doc to clarify semantic difference from InvalidImageBody and expected caller behavior - download_and_encode_image: add ProcessingFailed to error listing Tests: - validate_rejects_mixed_case_text_content_type: pin .to_lowercase() normalization Co-Authored-By: Claude Sonnet 4.6 --- src/media.rs | 49 +++++++++++++++++++++++++++++++++++++++++-------- src/slack.rs | 48 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 84 insertions(+), 13 deletions(-) diff --git a/src/media.rs b/src/media.rs index 8fbe4935..4aba43db 100644 --- a/src/media.rs +++ b/src/media.rs @@ -38,6 +38,9 @@ pub enum MediaFetchError { /// Server returned a non-success HTTP status. HttpStatus(reqwest::StatusCode), /// Body was a valid image but post-processing (resize/compress) failed. + /// Unlike `InvalidImageBody`, the bytes decoded successfully — this is an + /// unexpected processing error, not a content validation failure. Callers + /// should surface the same user-facing warning as `InvalidImageBody`. ProcessingFailed(image::ImageError), } @@ -84,10 +87,15 @@ fn hex_prefix(body: &[u8]) -> String { /// Slack's auth redirect when `files:read` scope is missing), rejects immediately. /// Generic types such as `application/octet-stream` and absent headers pass through /// to the magic-byte check, which is the authoritative gate for image validity. +/// +/// Content-Type is filtered with a block-list (`text/*`) rather than an allow-list +/// (`image/*`) because CDNs commonly serve any file type as `application/octet-stream`; +/// rejecting that header would silently break real downloads. The magic-byte check +/// examines the actual bytes regardless of what the server claims. fn validate_image_response( content_type: Option<&str>, body: &[u8], -) -> Result { +) -> Result<(), MediaFetchError> { // Reject explicitly-text responses early (e.g. Slack HTML login page at HTTP 200). // application/octet-stream and other generic types pass through to magic-byte check. if let Some(ct) = content_type { @@ -108,14 +116,17 @@ fn validate_image_response( }; match reader.format() { - Some( - fmt @ (image::ImageFormat::Png | image::ImageFormat::Jpeg | image::ImageFormat::WebP), - ) => Ok(fmt), + Some(image::ImageFormat::Png | image::ImageFormat::Jpeg | image::ImageFormat::WebP) => { + Ok(()) + } Some(image::ImageFormat::Gif) => { - validate_gif_body(body).map_err(|_| MediaFetchError::InvalidImageBody { - magic_prefix_hex: hex_prefix(body), + validate_gif_body(body).map_err(|e| { + debug!(error = %e, "GIF validation failed"); + MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + } })?; - Ok(image::ImageFormat::Gif) + Ok(()) } _ => Err(MediaFetchError::InvalidImageBody { magic_prefix_hex: hex_prefix(body), @@ -123,6 +134,15 @@ fn validate_image_response( } } +/// Validate a GIF body by attempting to decode exactly one frame. +/// +/// Decoding only the first frame is intentional: the GIF header and colour tables +/// must be valid before the first frame can be decoded, so this catches truncated +/// or corrupt payloads without the CPU/memory cost of decoding a large animated GIF +/// in full. +/// +/// Creates its own `Cursor` over `raw`; the caller can independently re-read the +/// same slice for resizing. fn validate_gif_body(raw: &[u8]) -> image::ImageResult<()> { let decoder = GifDecoder::new(Cursor::new(raw))?; let mut frames = decoder.into_frames(); @@ -142,7 +162,9 @@ fn validate_gif_body(raw: &[u8]) -> image::ImageResult<()> { /// `Err(MediaFetchError::SizeExceeded)` when the declared `size` exceeds the limit /// before any request is made. Returns other `Err` variants (`Network`, /// `HttpStatus`, `UnsupportedResponseType`, `InvalidImageBody`) after a request -/// attempt — callers should surface these to the user. +/// attempt — callers should surface these to the user. Returns +/// `Err(MediaFetchError::ProcessingFailed)` when the body is a valid image but +/// resize/compression fails — callers should warn the user and skip. /// /// Pass `auth_token` for platforms that require authentication (e.g. Slack private files). pub async fn download_and_encode_image( @@ -663,6 +685,17 @@ mod tests { )); } + #[test] + fn validate_rejects_mixed_case_text_content_type() { + // Mixed-case Content-Type must be normalised before rejection. + let png = make_png(1, 1); + let result = validate_image_response(Some("Text/HTML; Charset=utf-8"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + /// Regression test for the application/octet-stream fix: CDNs and generic /// file download endpoints commonly serve any file with this Content-Type. /// The old allow-list incorrectly rejected it before magic-byte check. diff --git a/src/slack.rs b/src/slack.rs index 6782a1f5..8dd25e46 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1062,6 +1062,10 @@ async fn handle_message( ); failed_image_files.push(filename.to_string()); } + Err(media::MediaFetchError::ProcessingFailed(ref e)) => { + warn!(filename, error = %e, "image post-processing failed"); + failed_image_files.push(filename.to_string()); + } Err(e) => { warn!(filename, error = %e, "image download failed"); } @@ -1079,13 +1083,9 @@ async fn handle_message( parent_id: None, origin_event_id: None, }; - // Sanitize filenames before embedding in mrkdwn: backticks and angle- - // bracket sequences (`<@U...>`, ``) are Slack markup delimiters - // that would allow injection if the filename is user-controlled. - let sanitize = |s: &str| s.replace('`', "'").replace('<', "(").replace('>', ")"); let file_list = failed_image_files .iter() - .map(|n| sanitize(n)) + .map(|n| sanitize_slack_filename(n)) .collect::>() .join("`, `"); let msg = format!( @@ -1224,6 +1224,15 @@ fn strip_mime_params(mimetype: &str) -> &str { media::strip_mime_params(mimetype) } +/// Sanitize a filename for safe embedding in a Slack mrkdwn message. +/// +/// Backticks (`) and angle brackets (`<`, `>`) are Slack markup delimiters. +/// Without sanitization, a user-controlled filename such as `` or +/// `` `<@U123>` `` would be rendered as a Slack mention or @-here ping. +pub(crate) fn sanitize_slack_filename(s: &str) -> String { + s.replace('`', "'").replace('<', "(").replace('>', ")") +} + /// True only when a Slack non-bot event represents a real user message /// that should reset the bot-turn counter. /// @@ -1391,6 +1400,35 @@ mod tests { assert_eq!(slack_file_download_url(&file), ""); } + // --- sanitize_slack_filename tests --- + + #[test] + fn sanitize_leaves_normal_filename_unchanged() { + assert_eq!(sanitize_slack_filename("photo.png"), "photo.png"); + assert_eq!(sanitize_slack_filename("my file (1).jpg"), "my file (1).jpg"); + } + + #[test] + fn sanitize_replaces_backtick() { + assert_eq!(sanitize_slack_filename("file`name.png"), "file'name.png"); + } + + #[test] + fn sanitize_replaces_angle_brackets() { + // Angle brackets are Slack mrkdwn delimiters; they must not pass through. + assert_eq!(sanitize_slack_filename("<@U123>"), "(@U123)"); + assert_eq!(sanitize_slack_filename(""), "(!here)"); + } + + #[test] + fn sanitize_combined_injection_attempt() { + // A filename constructed to inject a Slack @here ping. + assert_eq!( + sanitize_slack_filename("``"), + "'(!here)'" + ); + } + // --- strip_mime_params tests --- /// MIME with charset parameter strips to bare media type. From 6d7fd5fa793c56935bca0b92f2b10ea472a9582b Mon Sep 17 00:00:00 2001 From: chc-agent Date: Sat, 16 May 2026 13:58:23 +0000 Subject: [PATCH 09/13] fix(media): restore small-file fallback and notify user on HTTP 4xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Restore ≤1MB raw-byte fallback when resize_and_compress fails after validate_image_response passes. The body is confirmed valid by magic-byte check, so forwarding original bytes is safe. Prevents regression for formats the image crate can detect but not fully decode (e.g. animated WebP). - Add HttpStatus 4xx match arm in Slack handler to push filename into failed_image_files. HTTP 4xx (401/403) indicates a persistent permission problem (similar root cause to #776) and the user should be notified. 5xx and Network errors remain log-only (transient). --- src/media.rs | 19 ++++++++++++------- src/slack.rs | 6 ++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/media.rs b/src/media.rs index 4aba43db..d6e1c31a 100644 --- a/src/media.rs +++ b/src/media.rs @@ -272,13 +272,18 @@ pub async fn download_and_encode_image( let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { - error!( - filename, - error = %e, - size = bytes.len(), - "resize failed after successful validation" - ); - return Err(MediaFetchError::ProcessingFailed(e)); + if bytes.len() <= 1024 * 1024 { + debug!(filename, error = %e, "resize failed, using validated original"); + (bytes.to_vec(), mime.to_string()) + } else { + error!( + filename, + error = %e, + size = bytes.len(), + "resize failed after successful validation" + ); + return Err(MediaFetchError::ProcessingFailed(e)); + } } }; diff --git a/src/slack.rs b/src/slack.rs index 8dd25e46..bbd53610 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1066,6 +1066,12 @@ async fn handle_message( warn!(filename, error = %e, "image post-processing failed"); failed_image_files.push(filename.to_string()); } + Err(media::MediaFetchError::HttpStatus(status)) + if status.is_client_error() => + { + warn!(filename, %status, "image download denied"); + failed_image_files.push(filename.to_string()); + } Err(e) => { warn!(filename, error = %e, "image download failed"); } From b6be19465abd9da6789d380da6326805fe091f23 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 18 May 2026 16:28:53 +0800 Subject: [PATCH 10/13] fix(media): remove <=1MB fallback and fix sanitize_slack_filename & escape Address two blocking review findings from PR #793: 1. Remove the <=1MB raw-byte fallback in download_and_encode_image. validate_image_response only sniffs magic bytes; resize_and_compress does the full decode. The fallback forwarded raw bytes under Slack's claimed MIME when resize failed on a corrupt/truncated body, reopening the same JSONL poisoning class as #776. Now always returns ProcessingFailed on resize failure. 2. Add & -> & escape to sanitize_slack_filename before < and > replacements. Slack mrkdwn decodes HTML entities before markup parsing, so <@here> would bypass the angle-bracket replacement and render as a mention ping. Add regression tests for both fixes. Co-Authored-By: Claude Sonnet 4.6 --- src/media.rs | 39 +++++++++++++++++++++++++++------------ src/slack.rs | 12 +++++++++++- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/media.rs b/src/media.rs index d6e1c31a..b31e36bc 100644 --- a/src/media.rs +++ b/src/media.rs @@ -272,18 +272,13 @@ pub async fn download_and_encode_image( let (output_bytes, output_mime) = match resize_and_compress(&bytes) { Ok(result) => result, Err(e) => { - if bytes.len() <= 1024 * 1024 { - debug!(filename, error = %e, "resize failed, using validated original"); - (bytes.to_vec(), mime.to_string()) - } else { - error!( - filename, - error = %e, - size = bytes.len(), - "resize failed after successful validation" - ); - return Err(MediaFetchError::ProcessingFailed(e)); - } + error!( + filename, + error = %e, + size = bytes.len(), + "resize failed after successful validation" + ); + return Err(MediaFetchError::ProcessingFailed(e)); } }; @@ -756,6 +751,26 @@ mod tests { )); } + #[test] + fn truncated_png_body_must_not_produce_content_block() { + // Valid PNG magic bytes (8 bytes) + partial IHDR -- body is too short to decode. + // Previously: the <=1MB fallback in download_and_encode_image forwarded raw bytes + // after resize_and_compress failed, reproducing the #776 poisoning class. + // After removing the fallback, resize_and_compress failure must propagate as Err. + let mut truncated = vec![0x89u8, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]; + truncated.extend_from_slice(&[0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52]); + // validate_image_response passes (magic bytes match PNG) -- the fallback was the bug. + assert!( + validate_image_response(Some("image/png"), &truncated).is_ok(), + "magic-byte check still passes for truncated body" + ); + // resize_and_compress catches the truncated body at full-decode time. + assert!( + resize_and_compress(&truncated).is_err(), + "truncated PNG must fail at decode -- no raw-byte fallback allowed" + ); + } + #[test] fn media_fetch_error_display_renders() { let _ = MediaFetchError::NotAnImage.to_string(); diff --git a/src/slack.rs b/src/slack.rs index bbd53610..281e5247 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1236,7 +1236,7 @@ fn strip_mime_params(mimetype: &str) -> &str { /// Without sanitization, a user-controlled filename such as `` or /// `` `<@U123>` `` would be rendered as a Slack mention or @-here ping. pub(crate) fn sanitize_slack_filename(s: &str) -> String { - s.replace('`', "'").replace('<', "(").replace('>', ")") + s.replace('&', "&").replace('`', "'").replace('<', "(").replace('>', ")") } /// True only when a Slack non-bot event represents a real user message @@ -1435,6 +1435,16 @@ mod tests { ); } + #[test] + fn sanitize_escapes_ampersand_before_angle_brackets() { + // Slack mrkdwn decodes HTML entities before markup parsing. + // "<@here>" would round-trip back to "<@here>" and trigger a mention + // ping if & is not escaped. The & must be escaped first so downstream + // Slack entity decoding cannot reconstruct a mrkdwn delimiter. + assert_eq!(sanitize_slack_filename("<@here>"), "&lt;@here&gt;"); + assert_eq!(sanitize_slack_filename("file&name.png"), "file&name.png"); + } + // --- strip_mime_params tests --- /// MIME with charset parameter strips to bare media type. From 66824612c049ee9cb576a97772c6b2dc1ec52fbc Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 18 May 2026 17:21:12 +0800 Subject: [PATCH 11/13] refactor(media): simplify per /simplify review Replace mut vec + extend_from_slice with single immutable slice literal in truncated_png test; remove WHAT-restatement inline comments (assertion messages already carry the constraint reason). Co-Authored-By: Claude Sonnet 4.6 --- src/media.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/media.rs b/src/media.rs index b31e36bc..b6c4f359 100644 --- a/src/media.rs +++ b/src/media.rs @@ -757,16 +757,16 @@ mod tests { // Previously: the <=1MB fallback in download_and_encode_image forwarded raw bytes // after resize_and_compress failed, reproducing the #776 poisoning class. // After removing the fallback, resize_and_compress failure must propagate as Err. - let mut truncated = vec![0x89u8, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]; - truncated.extend_from_slice(&[0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52]); - // validate_image_response passes (magic bytes match PNG) -- the fallback was the bug. + let truncated: &[u8] = &[ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, // PNG magic + 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, // partial IHDR + ]; assert!( - validate_image_response(Some("image/png"), &truncated).is_ok(), + validate_image_response(Some("image/png"), truncated).is_ok(), "magic-byte check still passes for truncated body" ); - // resize_and_compress catches the truncated body at full-decode time. assert!( - resize_and_compress(&truncated).is_err(), + resize_and_compress(truncated).is_err(), "truncated PNG must fail at decode -- no raw-byte fallback allowed" ); } From e95f228064d725a02d6942eb0d1f4f3a16f3ffc4 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 18 May 2026 19:05:46 +0800 Subject: [PATCH 12/13] fix(media,slack,discord): address mob review findings (C1+C2+I1-I5+NITs) C1 (slack.rs): SizeExceeded now pushes bare filename to failed_image_files, removing the compound annotation that violated CLAUDE.md security convention. C2 (discord.rs): collect failed image filenames and send a user-facing warning after thread_channel is resolved; was silently logged only. I1 (slack.rs): catch-all error arm also pushes to failed_image_files so 5xx server errors surface to the user instead of being silently dropped. I2 (media.rs): download_and_encode_image doc comment now mentions that SizeExceeded can also fire post-download, not only pre-request. I3 (slack.rs): sanitize_slack_filename doc explains the & -> & escape and the HTML entity round-trip attack vector. I4 (media.rs): ProcessingFailed doc updated to describe the Slack/Discord asymmetry instead of claiming callers should always notify the user. I5 (media.rs): replace .ok()? silent drops in download_and_transcribe and download_and_read_text_file with explicit error logging. NITs: text-typed wording, GIF debug->warn, UnsupportedResponseType None display test, WebP magic-byte acceptance test, tracing::warn import. Co-Authored-By: Claude Sonnet 4.6 --- src/discord.rs | 19 ++++++++++++++++++ src/media.rs | 52 +++++++++++++++++++++++++++++++++++++++----------- src/slack.rs | 12 ++++++++---- 3 files changed, 68 insertions(+), 15 deletions(-) diff --git a/src/discord.rs b/src/discord.rs index 0cf06986..3c926d4a 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -645,6 +645,7 @@ impl EventHandler for Handler { // image -> encode, video -> URL for agent-side inspection). let mut extra_blocks = Vec::new(); let mut echo_entries: Vec = Vec::new(); + let mut failed_image_files: Vec = Vec::new(); let mut text_file_bytes: u64 = 0; let mut text_file_count: u32 = 0; const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments @@ -745,6 +746,7 @@ impl EventHandler for Handler { error = %e, "image attachment failed" ); + failed_image_files.push(attachment.filename.clone()); } } } @@ -776,6 +778,23 @@ impl EventHandler for Handler { } }; + // Notify user if any images couldn't be processed. + if !failed_image_files.is_empty() { + let file_list = failed_image_files + .iter() + .map(|n| format!("`{}`", n.replace('`', "'"))) + .collect::>() + .join(", "); + let warn_msg = format!( + ":warning: I couldn't process the image(s) you shared ({}). \ + The files may be inaccessible or in an unsupported format (PNG/JPEG/GIF/WebP only).", + file_list + ); + if let Err(e) = adapter.send_message(&thread_channel, &warn_msg).await { + tracing::warn!(error = %e, "failed to send image warning to user"); + } + } + let trigger_msg = discord_msg_ref(&msg); // Per-thread streaming: check if another bot is present in this thread diff --git a/src/media.rs b/src/media.rs index b6c4f359..434b89bc 100644 --- a/src/media.rs +++ b/src/media.rs @@ -6,7 +6,7 @@ use image::codecs::gif::GifDecoder; use image::{AnimationDecoder, ImageReader}; use std::io::Cursor; use std::sync::LazyLock; -use tracing::{debug, error}; +use tracing::{debug, error, warn}; /// Reusable HTTP client for downloading attachments (shared across adapters). pub static HTTP_CLIENT: LazyLock = LazyLock::new(|| { @@ -39,8 +39,9 @@ pub enum MediaFetchError { HttpStatus(reqwest::StatusCode), /// Body was a valid image but post-processing (resize/compress) failed. /// Unlike `InvalidImageBody`, the bytes decoded successfully — this is an - /// unexpected processing error, not a content validation failure. Callers - /// should surface the same user-facing warning as `InvalidImageBody`. + /// unexpected processing error, not a content validation failure. The Slack + /// adapter notifies the user; Discord logs at warn level (no user-facing + /// notification, as Discord attachment URLs have different auth semantics). ProcessingFailed(image::ImageError), } @@ -83,7 +84,7 @@ fn hex_prefix(body: &[u8]) -> String { /// Validate the HTTP response Content-Type and body magic bytes. /// -/// If Content-Type is present and explicitly non-binary (e.g. `text/html` from +/// If Content-Type is present and explicitly text-typed (e.g. `text/html` from /// Slack's auth redirect when `files:read` scope is missing), rejects immediately. /// Generic types such as `application/octet-stream` and absent headers pass through /// to the magic-byte check, which is the authoritative gate for image validity. @@ -121,7 +122,7 @@ fn validate_image_response( } Some(image::ImageFormat::Gif) => { validate_gif_body(body).map_err(|e| { - debug!(error = %e, "GIF validation failed"); + warn!(error = %e, "GIF validation failed"); MediaFetchError::InvalidImageBody { magic_prefix_hex: hex_prefix(body), } @@ -160,9 +161,9 @@ fn validate_gif_body(raw: &[u8]) -> image::ImageResult<()> { /// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't /// indicate an image — callers should skip silently. Returns /// `Err(MediaFetchError::SizeExceeded)` when the declared `size` exceeds the limit -/// before any request is made. Returns other `Err` variants (`Network`, -/// `HttpStatus`, `UnsupportedResponseType`, `InvalidImageBody`) after a request -/// attempt — callers should surface these to the user. Returns +/// before any request is made, or when the downloaded body exceeds the limit. Returns +/// other `Err` variants (`Network`, `HttpStatus`, `UnsupportedResponseType`, +/// `InvalidImageBody`) after a request attempt — callers should surface these to the user. Returns /// `Err(MediaFetchError::ProcessingFailed)` when the body is a valid image but /// resize/compression fails — callers should warn the user and skip. /// @@ -318,12 +319,24 @@ pub async fn download_and_transcribe( req = req.header("Authorization", format!("Bearer {token}")); } - let resp = req.send().await.ok()?; + let resp = match req.send().await { + Ok(r) => r, + Err(e) => { + error!(url, error = %e, "audio download request failed"); + return None; + } + }; if !resp.status().is_success() { error!(url, status = %resp.status(), "audio download failed"); return None; } - let bytes = resp.bytes().await.ok()?.to_vec(); + let bytes = match resp.bytes().await { + Ok(b) => b.to_vec(), + Err(e) => { + error!(url, error = %e, "audio body read failed"); + return None; + } + }; crate::stt::transcribe( &HTTP_CLIENT, @@ -479,7 +492,13 @@ pub async fn download_and_read_text_file( tracing::warn!(url, status = %resp.status(), "text file download failed"); return None; } - let bytes = resp.bytes().await.ok()?; + let bytes = match resp.bytes().await { + Ok(b) => b, + Err(e) => { + tracing::warn!(url, error = %e, "text file body read failed"); + return None; + } + }; let actual_size = bytes.len() as u64; // Defense-in-depth: verify actual download size @@ -778,6 +797,8 @@ mod tests { actual: Some("text/html".into()), } .to_string(); + let s = MediaFetchError::UnsupportedResponseType { actual: None }.to_string(); + assert!(s.contains("none"), "None branch should render as 'none'"); let _ = MediaFetchError::InvalidImageBody { magic_prefix_hex: "3c21444f43545950".into(), } @@ -797,6 +818,15 @@ mod tests { .to_string(); } + #[test] + fn validate_accepts_webp_by_magic_bytes() { + let img = image::RgbImage::new(1, 1); + let mut buf = std::io::Cursor::new(Vec::new()); + img.write_to(&mut buf, image::ImageFormat::WebP).unwrap(); + let webp_body = buf.into_inner(); + assert!(validate_image_response(Some("image/webp"), &webp_body).is_ok()); + } + #[test] fn hex_prefix_formats_first_8_bytes() { let bytes = b""; diff --git a/src/slack.rs b/src/slack.rs index 281e5247..ac201842 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -1050,7 +1050,7 @@ async fn handle_message( Err(media::MediaFetchError::NotAnImage) => {} Err(media::MediaFetchError::SizeExceeded { actual, limit }) => { warn!(filename, actual, limit, "image exceeds size limit"); - failed_image_files.push(format!("{filename} (exceeds {limit} byte limit)")); + failed_image_files.push(filename.to_string()); } Err( media::MediaFetchError::UnsupportedResponseType { .. } @@ -1074,6 +1074,7 @@ async fn handle_message( } Err(e) => { warn!(filename, error = %e, "image download failed"); + failed_image_files.push(filename.to_string()); } } } @@ -1232,9 +1233,12 @@ fn strip_mime_params(mimetype: &str) -> &str { /// Sanitize a filename for safe embedding in a Slack mrkdwn message. /// -/// Backticks (`) and angle brackets (`<`, `>`) are Slack markup delimiters. -/// Without sanitization, a user-controlled filename such as `` or -/// `` `<@U123>` `` would be rendered as a Slack mention or @-here ping. +/// Ampersands (`&`), backticks (`` ` ``), and angle brackets (`<`, `>`) are escaped. +/// `&` is encoded as `&` first because Slack decodes HTML entities before parsing +/// mrkdwn — a filename like `<@here>` would otherwise round-trip back to +/// `<@here>` and trigger a mention ping. Backticks and angle brackets are Slack +/// mrkdwn delimiters; without escaping, `` or `` `<@U123>` `` would render +/// as mentions or @-here pings. pub(crate) fn sanitize_slack_filename(s: &str) -> String { s.replace('&', "&").replace('`', "'").replace('<', "(").replace('>', ")") } From 0dbe75fa1f4ad8dfce76c8f576617f314d9237e9 Mon Sep 17 00:00:00 2001 From: howie <2318485+howie@users.noreply.github.com> Date: Mon, 18 May 2026 19:13:17 +0800 Subject: [PATCH 13/13] fix(media): add STT post-download size check and fix ProcessingFailed doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add post-download size verification in download_and_transcribe — the pre-request size check trusts platform-reported metadata which can be 0 or understated. Now also checks bytes.len() after reading, consistent with the image and text-file download paths. Update ProcessingFailed doc comment: both Slack and Discord adapters now surface user-facing warnings, so the previous asymmetry note was stale. Co-Authored-By: Claude Sonnet 4.6 --- src/media.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/media.rs b/src/media.rs index 434b89bc..33ea5901 100644 --- a/src/media.rs +++ b/src/media.rs @@ -39,9 +39,9 @@ pub enum MediaFetchError { HttpStatus(reqwest::StatusCode), /// Body was a valid image but post-processing (resize/compress) failed. /// Unlike `InvalidImageBody`, the bytes decoded successfully — this is an - /// unexpected processing error, not a content validation failure. The Slack - /// adapter notifies the user; Discord logs at warn level (no user-facing - /// notification, as Discord attachment URLs have different auth semantics). + /// unexpected processing error, not a content validation failure. Both the + /// Slack and Discord adapters surface this as a user-facing warning alongside + /// other image-validation failures. ProcessingFailed(image::ImageError), } @@ -338,6 +338,11 @@ pub async fn download_and_transcribe( } }; + if bytes.len() as u64 > MAX_SIZE { + error!(filename, size = bytes.len(), "downloaded audio exceeds 25MB limit"); + return None; + } + crate::stt::transcribe( &HTTP_CLIENT, stt_config,