diff --git a/src/discord.rs b/src/discord.rs index ce2b5e11..22ced21c 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -89,7 +89,10 @@ impl ChatAdapter for DiscordAdapter { let builder = serenity::builder::CreateMessage::new() .content(content) .reference_message((ChannelId::new(ch_id), MessageId::new(msg_id))); - match ChannelId::new(ch_id).send_message(&self.http, builder).await { + match ChannelId::new(ch_id) + .send_message(&self.http, builder) + .await + { Ok(msg) => Ok(MessageRef { channel: channel.clone(), message_id: msg.id.to_string(), @@ -105,7 +108,9 @@ impl ChatAdapter for DiscordAdapter { async fn delete_message(&self, msg: &MessageRef) -> anyhow::Result<()> { let ch_id: u64 = Self::resolve_channel(&msg.channel).parse()?; let msg_id: u64 = msg.message_id.parse()?; - self.http.delete_message(ChannelId::new(ch_id), MessageId::new(msg_id), None).await?; + self.http + .delete_message(ChannelId::new(ch_id), MessageId::new(msg_id), None) + .await?; Ok(()) } @@ -415,10 +420,13 @@ impl EventHandler for Handler { let in_allowed_channel = self.allow_all_channels || self.allowed_channels.contains(&channel_id); - let is_mentioned = - msg.mentions_user_id(bot_id) || msg.content.contains(&format!("<@{}>", bot_id)) + let is_mentioned = msg.mentions_user_id(bot_id) + || msg.content.contains(&format!("<@{}>", bot_id)) || (!self.allowed_role_ids.is_empty() - && msg.mention_roles.iter().any(|r| self.allowed_role_ids.contains(&r.get()))); + && msg + .mention_roles + .iter() + .any(|r| self.allowed_role_ids.contains(&r.get()))); // Bot message gating (from upstream #321) if msg.author.bot { @@ -639,6 +647,7 @@ impl EventHandler for Handler { let mut echo_entries: Vec = Vec::new(); let mut text_file_bytes: u64 = 0; let mut text_file_count: u32 = 0; + let mut failed_image_files: Vec = Vec::new(); const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // 1 MB total for all text file attachments const TEXT_FILE_COUNT_CAP: u32 = 5; @@ -702,25 +711,56 @@ impl EventHandler for Handler { debug!(filename = %attachment.filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - &attachment.url, - attachment.content_type.as_deref(), - &attachment.filename, - u64::from(attachment.size), - None, - ) - .await - { - debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); - extra_blocks.push(block); - } else if media::is_video_file(&attachment.filename, attachment.content_type.as_deref()) { - debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); - extra_blocks.push(video_attachment_block( - &attachment.filename, + } else { + match media::download_and_encode_image( + &attachment.url, attachment.content_type.as_deref(), + &attachment.filename, u64::from(attachment.size), - &attachment.url, - )); + None, + ) + .await + { + Ok(block) => { + debug!(url = %attachment.url, filename = %attachment.filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => { + if media::is_video_file( + &attachment.filename, + attachment.content_type.as_deref(), + ) { + debug!(url = %attachment.url, filename = %attachment.filename, "adding video attachment link"); + extra_blocks.push(video_attachment_block( + &attachment.filename, + attachment.content_type.as_deref(), + u64::from(attachment.size), + &attachment.url, + )); + } + } + Err(e) => { + if let Some(entry) = + media::failed_attachment_entry(&attachment.filename, &e) + { + tracing::warn!( + url = %attachment.url, + filename = %attachment.filename, + error = %e, + "image attachment failed" + ); + failed_image_files.push(entry); + } else { + // Network/HTTP 5xx failures: transient; warn in logs only. + tracing::warn!( + url = %attachment.url, + filename = %attachment.filename, + error = %e, + "image download failed" + ); + } + } + } } } @@ -744,12 +784,47 @@ impl EventHandler for Handler { match get_or_create_thread(&ctx, &adapter, &msg, &prompt).await { Ok(ch) => ch, Err(e) => { + // Thread creation failed — entire message is dropped. No reply, + // no warning, no agent note. The failed_image_files warning and + // agent note are NOT sent here: we have no thread to target and + // the agent won't run anyway. error!("failed to create thread: {e}"); return; } } }; + // Agent note and user warning are sent together or not at all. + // Push the agent note here (after thread_channel is resolved) so a + // thread-creation failure above silences both, keeping them in sync. + if !failed_image_files.is_empty() { + extra_blocks.push(ContentBlock::Text { + text: media::format_failed_attachment_note(&failed_image_files), + }); + } + + // Send user-visible warning into the correct thread now that we know where it is. + // For top-level channel messages, thread_channel is the newly-created thread — + // not msg.channel_id. Sending before get_or_create_thread would route to the parent. + if !failed_image_files.is_empty() { + let file_list = failed_image_files + .iter() + .map(|n| sanitize_discord_filename(n)) + .collect::>() + .join("`, `"); + let warn_msg = format!( + ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ + Supported formats are PNG / JPEG / GIF / WebP up to 10 MB." + ); + if let Err(e) = adapter.send_message(&thread_channel, &warn_msg).await { + warn!( + channel_id = %msg.channel_id, + error = %e, + "failed to send image validation warning to user" + ); + } + } + let trigger_msg = discord_msg_ref(&msg); // Per-thread streaming: check if another bot is present in this thread @@ -1323,7 +1398,9 @@ fn resolve_mentions(content: &str, bot_id: UserId, allowed_role_ids: &HashSet", id), "")) + allowed_role_ids + .iter() + .fold(out, |s, id| s.replace(&format!("<@&{}>", id), "")) }; // 3. Other user mentions: keep <@UID> as-is so the LLM can mention back // 4. Fallback: replace remaining role mentions only (user mentions are preserved) @@ -1331,6 +1408,14 @@ fn resolve_mentions(content: &str, bot_id: UserId, allowed_role_ids: &HashSet String { + media::sanitize_attachment_filename(s) +} + fn video_attachment_block( filename: &str, content_type: Option<&str>, @@ -1488,6 +1573,24 @@ mod tests { use super::*; use crate::bot_turns::{TurnResult, HARD_BOT_TURN_LIMIT}; + // --- sanitize_discord_filename tests --- + + #[test] + fn sanitize_discord_leaves_normal_filename_unchanged() { + assert_eq!(sanitize_discord_filename("photo.png"), "photo.png"); + assert_eq!(sanitize_discord_filename("my file (1).jpg"), "my file (1).jpg"); + } + + #[test] + fn sanitize_discord_replaces_backtick() { + assert_eq!(sanitize_discord_filename("file`name.png"), "file'name.png"); + } + + #[test] + fn sanitize_discord_combined_injection_attempt() { + assert_eq!(sanitize_discord_filename("``"), "'(!here)'"); + } + // --- resolve_mentions tests --- /// Bot's own <@UID> mention is stripped from the prompt. @@ -2125,4 +2228,5 @@ mod tests { fn normal_channel_creates_thread() { assert!(!should_skip_thread_creation(false, false)); } + } diff --git a/src/dispatch.rs b/src/dispatch.rs index 013ee3d8..b37b7890 100644 --- a/src/dispatch.rs +++ b/src/dispatch.rs @@ -796,6 +796,37 @@ mod tests { assert!(matches!(&blocks[3], ContentBlock::Image { .. })); } + #[test] + fn pack_arrival_event_keeps_text_extras_order_before_prompt() { + let extra = vec![ + ContentBlock::Text { + text: "[Voice message transcript]: hello".into(), + }, + ContentBlock::Text { + text: "[Attachment validation failed]: the user attempted to attach `photo.png`" + .into(), + }, + ContentBlock::Image { + media_type: "image/png".into(), + data: "abc".into(), + }, + ]; + let blocks = AdapterRouter::pack_arrival_event("{}", "typed prompt", extra); + + assert_eq!(blocks.len(), 5); + assert!( + matches!(&blocks[0], ContentBlock::Text { text } if text.contains("")) + ); + assert!( + matches!(&blocks[1], ContentBlock::Text { text } if text.contains("Voice message transcript")) + ); + assert!( + matches!(&blocks[2], ContentBlock::Text { text } if text.contains("Attachment validation failed")) + ); + assert!(matches!(&blocks[3], ContentBlock::Text { text } if text == "typed prompt")); + assert!(matches!(&blocks[4], ContentBlock::Image { .. })); + } + #[test] fn pack_arrival_event_batch_n2() { // Two arrival events concatenated → 2 (header + prompt) pairs = 4 blocks. diff --git a/src/media.rs b/src/media.rs index b42cfa6e..48d0a952 100644 --- a/src/media.rs +++ b/src/media.rs @@ -2,10 +2,11 @@ use crate::acp::ContentBlock; use crate::config::SttConfig; use base64::engine::general_purpose::STANDARD as BASE64; use base64::Engine; -use image::ImageReader; +use image::codecs::gif::GifDecoder; +use image::{AnimationDecoder, ImageReader}; use std::io::Cursor; use std::sync::LazyLock; -use tracing::{debug, error}; +use tracing::{debug, error, warn}; /// Reusable HTTP client for downloading attachments (shared across adapters). pub static HTTP_CLIENT: LazyLock = LazyLock::new(|| { @@ -21,7 +22,203 @@ const IMAGE_MAX_DIMENSION_PX: u32 = 1200; /// JPEG quality for compressed output. const IMAGE_JPEG_QUALITY: u8 = 75; +/// Error variants for `download_and_encode_image`. +#[derive(Debug)] +pub enum MediaFetchError { + /// URL empty or MIME/filename doesn't indicate an image; skip silently. + NotAnImage, + /// HTTP response Content-Type is not a supported image format. + UnsupportedResponseType { actual: Option }, + /// Response body magic bytes don't match a supported image format. + InvalidImageBody { magic_prefix_hex: String }, + /// File exceeds the configured size limit. + SizeExceeded { actual: u64, limit: u64 }, + /// Network-level error (send or body-read). + Network(reqwest::Error), + /// Server returned a non-success HTTP status. + HttpStatus(reqwest::StatusCode), + /// Body was a valid image but post-processing (resize/compress) failed. + /// Unlike `InvalidImageBody`, the bytes decoded successfully — this is an + /// unexpected processing error, not a content validation failure. Callers + /// should surface the same user-facing warning as `InvalidImageBody`. + ProcessingFailed(image::ImageError), +} + +impl std::fmt::Display for MediaFetchError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotAnImage => write!(f, "not an image attachment"), + Self::UnsupportedResponseType { actual } => write!( + f, + "server returned unexpected content type (actual: {})", + actual.as_deref().unwrap_or("none"), + ), + Self::InvalidImageBody { magic_prefix_hex } => write!( + f, + "response body is not a valid image (first 8 bytes: {magic_prefix_hex})" + ), + Self::SizeExceeded { actual, limit } => { + write!(f, "file size {actual} exceeds limit {limit}") + } + Self::Network(e) => write!(f, "network error: {e}"), + Self::HttpStatus(s) => write!(f, "HTTP {s}"), + Self::ProcessingFailed(e) => write!(f, "image processing failed: {e}"), + } + } +} + +/// Sanitize a user-controlled filename before embedding in adapter warnings or +/// agent-prompt notes. Replaces characters that function as markup delimiters +/// in Slack mrkdwn, Discord markdown, and LLM code-spans: +/// - `` ` `` → `'` (closes code spans in all three contexts) +/// - `<` → `(`, `>` → `)` (disables ``, `<@uid>` mention injection) +pub(crate) fn sanitize_attachment_filename(s: &str) -> String { + s.replace('`', "'").replace('<', "(").replace('>', ")") +} + +/// Build a `[Attachment validation failed]` note for injection into the agent prompt +/// so the agent's reply acknowledges the failure instead of asking "where's the image?". +/// +/// Filenames are sanitized via `sanitize_attachment_filename` internally; callers +/// should pass raw (unsanitized) strings. +pub fn format_failed_attachment_note(filenames: &[String]) -> String { + let list = filenames + .iter() + .map(|n| format!("`{}`", sanitize_attachment_filename(n))) + .collect::>() + .join(", "); + format!( + "[Attachment validation failed]: the user attempted to attach {list} but \ + the file(s) could not be processed (unsupported format, corrupt body, or \ + size limit exceeded). The user has been notified separately — acknowledge \ + the failure and proceed without apologising for not seeing the file." + ) +} + +/// Returns the filename to push into `failed_image_files` for the given error, or +/// `None` if the error is transient / network-level and should only be logged. +/// +/// Returns `None` for `NotAnImage` too — that is a deliberate silent skip (the +/// attachment simply isn't an image), not a transient failure. +/// +/// HTTP 4xx responses are treated as permanent-ish failures (missing scope, +/// revoked credentials, expired URL) and warrant user notification. HTTP 5xx +/// and network errors are transient and are logged only. +pub(crate) fn failed_attachment_entry(filename: &str, e: &MediaFetchError) -> Option { + match e { + MediaFetchError::NotAnImage => None, + MediaFetchError::SizeExceeded { .. } + | MediaFetchError::UnsupportedResponseType { .. } + | MediaFetchError::InvalidImageBody { .. } + | MediaFetchError::ProcessingFailed(_) => Some(filename.to_string()), + MediaFetchError::HttpStatus(status) if status.is_client_error() => { + Some(filename.to_string()) + } + // Network errors and non-4xx HTTP status are transient; log-only, no user notification. + MediaFetchError::Network(_) => None, + MediaFetchError::HttpStatus(_) => None, + } +} + +/// Strip MIME parameters and trim whitespace. `"image/png; charset=binary"` → `"image/png"`. +pub(crate) fn strip_mime_params(mime: &str) -> &str { + mime.split(';').next().unwrap_or(mime).trim() +} + +/// Format the first 8 bytes of a buffer as lowercase hex (no separator). +fn hex_prefix(body: &[u8]) -> String { + body.iter() + .take(8) + .map(|b| format!("{b:02x}")) + .collect::>() + .concat() +} + +/// Validate the HTTP response Content-Type and body magic bytes. +/// +/// If Content-Type is present and explicitly non-binary (e.g. `text/html` from +/// Slack's auth redirect when `files:read` scope is missing), rejects immediately. +/// Generic types such as `application/octet-stream` and absent headers pass through +/// to the magic-byte check, which is the authoritative gate for image validity. +/// +/// Content-Type is filtered with a block-list (`text/*`) rather than an allow-list +/// (`image/*`) because CDNs commonly serve any file type as `application/octet-stream`; +/// rejecting that header would silently break real downloads. The magic-byte check +/// examines the actual bytes regardless of what the server claims. +fn validate_image_response( + content_type: Option<&str>, + body: &[u8], +) -> Result<(), MediaFetchError> { + // Reject explicitly-text responses early (e.g. Slack HTML login page at HTTP 200). + // application/octet-stream and other generic types pass through to magic-byte check. + if let Some(ct) = content_type { + let base = strip_mime_params(ct).to_lowercase(); + if base.starts_with("text/") { + return Err(MediaFetchError::UnsupportedResponseType { actual: Some(base) }); + } + } + + let reader = match ImageReader::new(Cursor::new(body)).with_guessed_format() { + Ok(r) => r, + Err(e) => { + error!(error = %e, "image format detection I/O error"); + return Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }); + } + }; + + match reader.format() { + Some(image::ImageFormat::Png | image::ImageFormat::Jpeg | image::ImageFormat::WebP) => { + Ok(()) + } + Some(image::ImageFormat::Gif) => { + validate_gif_body(body).map_err(|e| { + debug!(error = %e, "GIF validation failed"); + MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + } + })?; + Ok(()) + } + _ => Err(MediaFetchError::InvalidImageBody { + magic_prefix_hex: hex_prefix(body), + }), + } +} + +/// Validate a GIF body by attempting to decode exactly one frame. +/// +/// Decoding only the first frame is intentional: the GIF header and colour tables +/// must be valid before the first frame can be decoded, so this catches truncated +/// or corrupt payloads without the CPU/memory cost of decoding a large animated GIF +/// in full. +/// +/// Creates its own `Cursor` over `raw`; the caller can independently re-read the +/// same slice for resizing. +fn validate_gif_body(raw: &[u8]) -> image::ImageResult<()> { + let decoder = GifDecoder::new(Cursor::new(raw))?; + let mut frames = decoder.into_frames(); + frames.next().ok_or_else(|| { + image::ImageError::Decoding(image::error::DecodingError::new( + image::error::ImageFormatHint::Exact(image::ImageFormat::Gif), + "GIF has no frames", + )) + })??; + Ok(()) +} + /// Download an image from a URL, resize/compress it, and return as a ContentBlock. +/// +/// Returns `Err(MediaFetchError::NotAnImage)` when the URL or MIME hint don't +/// indicate an image — callers should skip silently. Returns +/// `Err(MediaFetchError::SizeExceeded)` when the declared `size` exceeds the limit +/// before any request is made. Returns other `Err` variants (`Network`, +/// `HttpStatus`, `UnsupportedResponseType`, `InvalidImageBody`) after a request +/// attempt — callers should surface these to the user. Returns +/// `Err(MediaFetchError::ProcessingFailed)` when the body is a valid image but +/// resize/compression fails — callers should warn the user and skip. +/// /// Pass `auth_token` for platforms that require authentication (e.g. Slack private files). pub async fn download_and_encode_image( url: &str, @@ -29,11 +226,11 @@ pub async fn download_and_encode_image( filename: &str, size: u64, auth_token: Option<&str>, -) -> Option { +) -> Result { const MAX_SIZE: u64 = 10 * 1024 * 1024; // 10 MB if url.is_empty() { - return None; + return Err(MediaFetchError::NotAnImage); } let mime = mime_hint.or_else(|| { @@ -51,17 +248,20 @@ pub async fn download_and_encode_image( let Some(mime) = mime else { debug!(filename, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); }; let mime = mime.split(';').next().unwrap_or(mime).trim(); if !mime.starts_with("image/") { debug!(filename, mime, "skipping non-image attachment"); - return None; + return Err(MediaFetchError::NotAnImage); } if size > MAX_SIZE { error!(filename, size, "image exceeds 10MB limit"); - return None; + return Err(MediaFetchError::SizeExceeded { + actual: size, + limit: MAX_SIZE, + }); } let mut req = HTTP_CLIENT.get(url); @@ -73,18 +273,26 @@ pub async fn download_and_encode_image( Ok(resp) => resp, Err(e) => { error!(url, error = %e, "download failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; if !response.status().is_success() { error!(url, status = %response.status(), "HTTP error downloading image"); - return None; + return Err(MediaFetchError::HttpStatus(response.status())); } + + // Capture Content-Type BEFORE .bytes() consumes the response. + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(str::to_string); + let bytes = match response.bytes().await { Ok(b) => b, Err(e) => { error!(url, error = %e, "read failed"); - return None; + return Err(MediaFetchError::Network(e)); } }; @@ -94,18 +302,57 @@ pub async fn download_and_encode_image( size = bytes.len(), "downloaded image exceeds limit" ); - return None; + return Err(MediaFetchError::SizeExceeded { + actual: bytes.len() as u64, + limit: MAX_SIZE, + }); + } + + // Guard against HTTP 200 responses that are error pages (e.g. Slack auth redirect + // when files:read scope is missing), and against corrupted or mislabeled bodies. + if let Err(e) = validate_image_response(content_type.as_deref(), &bytes) { + error!( + filename, + mime_hint = mime, + content_type = content_type.as_deref().unwrap_or("none"), + magic = hex_prefix(&bytes), + error = %e, + "image validation failed — body is not a supported image" + ); + return Err(e); } - let (output_bytes, output_mime) = match resize_and_compress(&bytes) { + encode_validated_image(&bytes, mime, filename) +} + +/// Resize, compress and Base64-encode bytes that have already passed +/// `validate_image_response`. On resize failure, falls back to the raw +/// validated bytes when the body is ≤ 1 MB (e.g. animated WebP that the +/// image crate cannot decode but is small enough to pass through). +fn encode_validated_image( + bytes: &[u8], + mime: &str, + filename: &str, +) -> Result { + let (output_bytes, output_mime) = match resize_and_compress(bytes) { Ok(result) => result, Err(e) => { - if bytes.len() > 1024 * 1024 { - error!(filename, error = %e, size = bytes.len(), "resize failed and original too large, skipping"); - return None; + // Only fall back to raw bytes for small WebP files — the one known case + // where a file passes magic-byte validation but resize fails (animated + // WebP that the image crate cannot decode). Other formats that fail + // resize are treated as corrupt and surfaced as ProcessingFailed. + if bytes.len() <= 1024 * 1024 && mime == "image/webp" { + warn!(filename, error = %e, "resize failed on WebP, using validated original"); + (bytes.to_vec(), mime.to_string()) + } else { + error!( + filename, + error = %e, + size = bytes.len(), + "resize failed after successful validation" + ); + return Err(MediaFetchError::ProcessingFailed(e)); } - debug!(filename, error = %e, "resize failed, using original"); - (bytes.to_vec(), mime.to_string()) } }; @@ -117,7 +364,7 @@ pub async fn download_and_encode_image( ); let encoded = BASE64.encode(&output_bytes); - Some(ContentBlock::Image { + Ok(ContentBlock::Image { media_type: output_mime, data: encoded, }) @@ -348,6 +595,21 @@ mod tests { buf.into_inner() } + fn make_jpeg(width: u32, height: u32) -> Vec { + let img = image::RgbImage::new(width, height); + let mut buf = Cursor::new(Vec::new()); + img.write_to(&mut buf, image::ImageFormat::Jpeg).unwrap(); + buf.into_inner() + } + + fn make_gif() -> Vec { + vec![ + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, 0x3B, + ] + } + #[test] fn large_image_resized_to_max_dimension() { let png = make_png(3000, 2000); @@ -405,11 +667,7 @@ mod tests { #[test] fn gif_passes_through_unchanged() { - let gif: Vec = vec![ - 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x2C, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x02, 0x02, 0x44, 0x01, 0x00, - 0x3B, - ]; + let gif = make_gif(); let (output, mime) = resize_and_compress(&gif).unwrap(); assert_eq!(mime, "image/gif"); @@ -429,4 +687,295 @@ mod tests { assert!(is_video_file("clip.MOV", None)); assert!(!is_video_file("notes.txt", Some("text/plain"))); } + + // --- validate_image_response tests --- + + #[test] + fn validate_accepts_png_with_matching_content_type() { + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png"), &png).is_ok()); + } + + #[test] + fn validate_accepts_jpeg_with_matching_content_type() { + let jpeg = make_jpeg(1, 1); + assert!(validate_image_response(Some("image/jpeg"), &jpeg).is_ok()); + } + + #[test] + fn validate_accepts_gif_with_matching_content_type() { + let gif = make_gif(); + assert!(validate_image_response(Some("image/gif"), &gif).is_ok()); + } + + #[test] + fn validate_rejects_corrupt_gif_body() { + let corrupt_gif = b"GIF89a\x01\x00\x01\x00\x00\x00\x00"; + let result = validate_image_response(Some("image/gif"), corrupt_gif); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn validate_accepts_missing_content_type_with_valid_png() { + // When Content-Type header is absent, fall back to magic-byte detection. + let png = make_png(1, 1); + assert!(validate_image_response(None, &png).is_ok()); + } + + #[test] + fn validate_content_type_strips_params() { + // "image/png; charset=binary" is a real header value — must be accepted. + let png = make_png(1, 1); + assert!(validate_image_response(Some("image/png; charset=binary"), &png).is_ok()); + } + + /// Exact reproduction of issue #776: Slack serves the workspace login HTML + /// page at HTTP 200 when the bot token lacks the `files:read` scope. + /// The Slack file metadata says `mimetype: image/png`; the response body + /// magic bytes are `Slack login"; + let result = validate_image_response(Some("image/png"), html_body); + match result { + Err(MediaFetchError::InvalidImageBody { magic_prefix_hex }) => { + assert_eq!(magic_prefix_hex, "3c21444f43545950"); + } + other => panic!("expected InvalidImageBody, got {other:?}"), + } + } + + #[test] + fn validate_rejects_text_html_content_type() { + // Even if the body were a valid image, a text/html Content-Type must be rejected. + let png = make_png(1, 1); + let result = validate_image_response(Some("text/html; charset=utf-8"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + + #[test] + fn validate_rejects_mixed_case_text_content_type() { + // Mixed-case Content-Type must be normalised before rejection. + let png = make_png(1, 1); + let result = validate_image_response(Some("Text/HTML; Charset=utf-8"), &png); + assert!(matches!( + result, + Err(MediaFetchError::UnsupportedResponseType { .. }) + )); + } + + /// Regression test for the application/octet-stream fix: CDNs and generic + /// file download endpoints commonly serve any file with this Content-Type. + /// The old allow-list incorrectly rejected it before magic-byte check. + #[test] + fn validate_accepts_octet_stream_with_valid_png() { + let png = make_png(1, 1); + assert!( + validate_image_response(Some("application/octet-stream"), &png).is_ok(), + "application/octet-stream must pass through to magic-byte check" + ); + } + + /// application/json body is rejected by magic bytes, not by Content-Type. + #[test] + fn validate_rejects_json_body_by_magic_bytes() { + let json_body = b"{\"error\":\"invalid_auth\",\"ok\":false}"; + let result = validate_image_response(Some("application/json"), json_body); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + /// Missing Content-Type with invalid body: CDN stripping the header should + /// still be caught by magic-byte detection. + #[test] + fn validate_rejects_html_body_with_missing_content_type() { + let html_body = b"error page"; + let result = validate_image_response(None, html_body); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn validate_rejects_empty_body() { + let result = validate_image_response(Some("image/png"), &[]); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn validate_rejects_truncated_png_header() { + // PNG magic is 8 bytes; 4 bytes is not enough to identify the format. + let truncated = [0x89u8, 0x50, 0x4e, 0x47]; + let result = validate_image_response(Some("image/png"), &truncated); + assert!(matches!( + result, + Err(MediaFetchError::InvalidImageBody { .. }) + )); + } + + #[test] + fn media_fetch_error_display_renders() { + let _ = MediaFetchError::NotAnImage.to_string(); + let _ = MediaFetchError::UnsupportedResponseType { + actual: Some("text/html".into()), + } + .to_string(); + let _ = MediaFetchError::InvalidImageBody { + magic_prefix_hex: "3c21444f43545950".into(), + } + .to_string(); + let _ = MediaFetchError::SizeExceeded { + actual: 11_000_000, + limit: 10_000_000, + } + .to_string(); + let _ = MediaFetchError::HttpStatus(reqwest::StatusCode::UNAUTHORIZED).to_string(); + let _ = MediaFetchError::ProcessingFailed(image::ImageError::Unsupported( + image::error::UnsupportedError::from_format_and_kind( + image::error::ImageFormatHint::Unknown, + image::error::UnsupportedErrorKind::Color(image::ExtendedColorType::Rgba16), + ), + )) + .to_string(); + } + + #[test] + fn hex_prefix_formats_first_8_bytes() { + let bytes = b""; + assert_eq!(hex_prefix(bytes), "3c21444f43545950"); + } + + #[test] + fn hex_prefix_handles_short_buffer() { + let bytes = [0xffu8, 0xd8]; + assert_eq!(hex_prefix(&bytes), "ffd8"); + } + + // --- format_failed_attachment_note tests --- + + #[test] + fn format_failed_attachment_note_single() { + let note = super::format_failed_attachment_note(&["photo.png".to_string()]); + assert!(note.contains("`photo.png`")); + assert!(note.starts_with("[Attachment validation failed]:")); + assert!(note.contains("The user has been notified separately")); + } + + #[test] + fn format_failed_attachment_note_multiple() { + let note = super::format_failed_attachment_note(&["a.png".to_string(), "b.jpg".to_string()]); + assert!(note.contains("`a.png`")); + assert!(note.contains("`b.jpg`")); + assert!(note.contains(", ")); + } + + #[test] + fn format_failed_attachment_note_wraps_filename_in_backticks() { + let note = super::format_failed_attachment_note(&["big.png".to_string()]); + assert!(note.contains("`big.png`")); + } + + #[test] + fn format_failed_attachment_note_escapes_backtick_in_filename() { + let note = super::format_failed_attachment_note(&["fi`le.png".to_string()]); + // Backtick in filename is replaced with single-quote to avoid breaking the code-span. + assert!(note.contains("`fi'le.png`")); + assert!(!note.contains("``")); + } + + // --- encode_validated_image: resize-fallback threshold tests --- + + /// Minimal WebP header stub: RIFF magic + WEBP FourCC. + /// `validate_image_response` accepts it (format detected as WebP); + /// `resize_and_compress`'s `decode()` fails because there are no VP8 chunks. + fn make_webp_stub() -> Vec { + b"RIFF\x00\x00\x00\x00WEBP".to_vec() + } + + #[test] + fn resize_fail_under_1mb_falls_back_to_original_bytes() { + let bytes = make_webp_stub(); // 12 bytes << 1 MB + let result = encode_validated_image(&bytes, "image/webp", "test.webp"); + let block = result.expect("should fall back to original bytes, not error"); + match block { + ContentBlock::Image { media_type, data } => { + assert_eq!(media_type, "image/webp"); + // Data is Base64 of the original stub bytes. + let decoded = BASE64.decode(&data).expect("valid Base64"); + assert_eq!(decoded, bytes); + } + other => panic!("expected ContentBlock::Image, got {other:?}"), + } + } + + #[test] + fn resize_fail_over_1mb_returns_processing_failed() { + let mut bytes = make_webp_stub(); + // Pad to just over 1 MB so the >1 MB branch fires. + bytes.resize(1024 * 1024 + 1, 0x00); + let result = encode_validated_image(&bytes, "image/webp", "big.webp"); + assert!( + matches!(result, Err(MediaFetchError::ProcessingFailed(_))), + "expected ProcessingFailed for >1MB resize failure, got {result:?}" + ); + } + + #[test] + fn resize_fail_non_webp_always_returns_processing_failed() { + // A non-WebP file with decode failure must always return ProcessingFailed, + // even if small — the animated-WebP fallback must not fire for other formats. + let bytes = make_webp_stub(); // 12 bytes — well under 1 MB + let result = encode_validated_image(&bytes, "image/png", "corrupt.png"); + assert!( + matches!(result, Err(MediaFetchError::ProcessingFailed(_))), + "expected ProcessingFailed for non-WebP resize failure, got {result:?}" + ); + } + + // --- failed_attachment_entry tests --- + + #[test] + fn failed_attachment_entry_not_an_image_returns_none() { + let e = MediaFetchError::NotAnImage; + assert!(failed_attachment_entry("img.png", &e).is_none()); + } + + #[test] + fn failed_attachment_entry_size_exceeded_notifies_user() { + let e = MediaFetchError::SizeExceeded { actual: 20_000_000, limit: 10_000_000 }; + let entry = failed_attachment_entry("big.png", &e).unwrap(); + assert_eq!(entry, "big.png"); + } + + #[test] + fn failed_attachment_entry_http_4xx_notifies_user() { + let e = MediaFetchError::HttpStatus(reqwest::StatusCode::FORBIDDEN); + let entry = failed_attachment_entry("photo.png", &e).unwrap(); + assert_eq!(entry, "photo.png"); + } + + #[test] + fn failed_attachment_entry_http_5xx_is_logged_only() { + let e = MediaFetchError::HttpStatus(reqwest::StatusCode::BAD_GATEWAY); + assert!(failed_attachment_entry("photo.png", &e).is_none()); + } + + #[test] + fn failed_attachment_entry_http_401_notifies_user() { + let e = MediaFetchError::HttpStatus(reqwest::StatusCode::UNAUTHORIZED); + let entry = failed_attachment_entry("secret.png", &e).unwrap(); + assert_eq!(entry, "secret.png"); + } } diff --git a/src/slack.rs b/src/slack.rs index cbe101f2..b580eaee 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -934,6 +934,7 @@ async fn handle_message( let mut echo_entries: Vec = Vec::new(); let mut text_file_bytes: u64 = 0; let mut text_file_count: u32 = 0; + let mut failed_image_files: Vec = Vec::new(); if let Some(files) = files { for file in files { @@ -1032,21 +1033,66 @@ async fn handle_message( debug!(filename, "adding text file attachment"); extra_blocks.push(block); } - } else if let Some(block) = media::download_and_encode_image( - url, - Some(mimetype), - filename, - size, - Some(bot_token), - ) - .await - { - debug!(filename, "adding image attachment"); - extra_blocks.push(block); + } else { + match media::download_and_encode_image( + url, + Some(mimetype), + filename, + size, + Some(bot_token), + ) + .await + { + Ok(block) => { + debug!(filename, "adding image attachment"); + extra_blocks.push(block); + } + Err(media::MediaFetchError::NotAnImage) => {} + Err(e) => { + if let Some(entry) = media::failed_attachment_entry(filename, &e) { + warn!(filename, error = %e, "image attachment failed"); + failed_image_files.push(entry); + } else { + warn!(filename, error = %e, "image download failed"); + } + } + } } } } + // Notify user if any images couldn't be processed. + if !failed_image_files.is_empty() { + let warn_channel = ChannelRef { + platform: "slack".into(), + channel_id: channel_id.clone(), + thread_id: thread_ts.clone().or_else(|| Some(ts.clone())), + parent_id: None, + origin_event_id: None, + }; + let file_list = failed_image_files + .iter() + .map(|n| sanitize_slack_filename(n)) + .collect::>() + .join("`, `"); + let msg = format!( + ":warning: I couldn't process the file(s) you shared (`{file_list}`). \ + This can happen when the bot lacks the `files:read` OAuth scope, \ + the file format isn't supported (PNG/JPEG/GIF/WebP only), \ + or the file is too large." + ); + if let Err(e) = adapter.send_message(&warn_channel, &msg).await { + warn!(error = %e, "failed to send image validation warning to user"); + } + // push (not insert) is correct: pack_arrival_event places all Text extras before + // the typed prompt. The note lands after STT transcripts because STT uses + // insert(0, ...) at the front of extra_blocks — that convention must be preserved + // if the STT insertion point ever changes. + extra_blocks.push(ContentBlock::Text { + text: media::format_failed_attachment_note(&failed_image_files), + }); + } + // Resolve Slack display name (best-effort, fallback to user_id) let display_name = adapter .resolve_user_name(&user_id) @@ -1164,11 +1210,20 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str { .unwrap_or("") } -/// Strip MIME parameters like `; charset=utf-8` so type-detection helpers see -/// the bare media type. Slack occasionally sends mimetypes like -/// `text/plain; charset=utf-8`; `media::is_text_file` expects the bare form. +/// Strip MIME parameters so type-detection helpers see the bare media type. +/// Delegates to media::strip_mime_params (single source of truth). +/// Needed because Slack occasionally sends `text/plain; charset=utf-8` and +/// `media::is_text_file` expects the bare form. fn strip_mime_params(mimetype: &str) -> &str { - mimetype.split(';').next().unwrap_or(mimetype).trim() + media::strip_mime_params(mimetype) +} + +/// Sanitize a filename for safe embedding in a Slack mrkdwn message. +/// +/// Delegates to `media::sanitize_attachment_filename` so the escaping rules +/// stay in one place for both Slack and Discord. +pub(crate) fn sanitize_slack_filename(s: &str) -> String { + media::sanitize_attachment_filename(s) } /// True only when a Slack non-bot event represents a real user message @@ -1338,6 +1393,35 @@ mod tests { assert_eq!(slack_file_download_url(&file), ""); } + // --- sanitize_slack_filename tests --- + + #[test] + fn sanitize_leaves_normal_filename_unchanged() { + assert_eq!(sanitize_slack_filename("photo.png"), "photo.png"); + assert_eq!(sanitize_slack_filename("my file (1).jpg"), "my file (1).jpg"); + } + + #[test] + fn sanitize_replaces_backtick() { + assert_eq!(sanitize_slack_filename("file`name.png"), "file'name.png"); + } + + #[test] + fn sanitize_replaces_angle_brackets() { + // Angle brackets are Slack mrkdwn delimiters; they must not pass through. + assert_eq!(sanitize_slack_filename("<@U123>"), "(@U123)"); + assert_eq!(sanitize_slack_filename(""), "(!here)"); + } + + #[test] + fn sanitize_combined_injection_attempt() { + // A filename constructed to inject a Slack @here ping. + assert_eq!( + sanitize_slack_filename("``"), + "'(!here)'" + ); + } + // --- strip_mime_params tests --- /// MIME with charset parameter strips to bare media type.