From b34be83247a1e7b38952e11107caa3ec76df5637 Mon Sep 17 00:00:00 2001 From: Alexander Vostres Date: Sat, 18 Apr 2026 19:52:52 +0300 Subject: [PATCH 1/2] Fix Windows shell tool crash on non-UTF-8 subprocess output --- crates/forge_infra/src/executor.rs | 124 ++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 2 deletions(-) diff --git a/crates/forge_infra/src/executor.rs b/crates/forge_infra/src/executor.rs index 34e1327648..9123829707 100644 --- a/crates/forge_infra/src/executor.rs +++ b/crates/forge_infra/src/executor.rs @@ -190,20 +190,67 @@ async fn stream( let mut output = Vec::new(); if let Some(io) = io.as_mut() { let mut buff = [0; 1024]; + // Carry incomplete trailing UTF-8 codepoint bytes across reads — Windows + // console stdio rejects even one byte of a split codepoint. + let mut pending = Vec::::new(); loop { let n = io.read(&mut buff).await?; if n == 0 { break; } - writer.write_all(buff.get(..n).unwrap_or(&[]))?; + let chunk = buff.get(..n).unwrap_or(&[]); + output.extend_from_slice(chunk); + + let mut working = std::mem::take(&mut pending); + working.extend_from_slice(chunk); + pending = write_lossy_utf8(&mut writer, &working)?; // note: flush is necessary else we get the cursor could not be found error. writer.flush()?; - output.extend_from_slice(buff.get(..n).unwrap_or(&[])); + } + // Flush dangling bytes from a stream that ended mid-codepoint. + if !pending.is_empty() { + writer.write_all(String::from_utf8_lossy(&pending).as_bytes())?; + writer.flush()?; } } Ok(output) } +/// Writes `buf` as valid UTF-8 (invalid bytes → `U+FFFD`) and returns any +/// incomplete trailing codepoint bytes for the caller to carry into the next +/// chunk. +fn write_lossy_utf8(writer: &mut W, buf: &[u8]) -> io::Result> { + let mut cursor = 0; + while cursor < buf.len() { + match std::str::from_utf8(buf.get(cursor..).unwrap_or(&[])) { + Ok(tail) => { + writer.write_all(tail.as_bytes())?; + return Ok(Vec::new()); + } + Err(e) => { + let valid_up_to = e.valid_up_to(); + let valid = buf + .get(cursor..cursor + valid_up_to) + .unwrap_or(&[]); + writer.write_all(valid)?; + match e.error_len() { + Some(len) => { + writer.write_all("\u{FFFD}".as_bytes())?; + cursor += valid_up_to + len; + } + None => { + return Ok(buf + .get(cursor + valid_up_to..) + .unwrap_or(&[]) + .to_vec()); + } + } + } + } + } + Ok(Vec::new()) +} + /// The implementation for CommandExecutorService #[async_trait::async_trait] impl CommandInfra for ForgeCommandExecutorService { @@ -429,4 +476,77 @@ mod tests { assert_eq!(actual.stderr, expected.stderr); assert_eq!(actual.success(), expected.success()); } + + mod write_lossy_utf8 { + use pretty_assertions::assert_eq; + + use super::super::write_lossy_utf8; + + fn run(buf: &[u8]) -> (Vec, Vec) { + let mut out = Vec::::new(); + let pending = write_lossy_utf8(&mut out, buf).unwrap(); + (out, pending) + } + + #[test] + fn valid_ascii_passes_through() { + let (out, pending) = run(b"hello"); + assert_eq!(out, b"hello"); + assert!(pending.is_empty()); + } + + #[test] + fn valid_multibyte_passes_through() { + // "héllo ✓" — mixed 2-byte and 3-byte codepoints. + let input = "héllo ✓".as_bytes(); + let (out, pending) = run(input); + assert_eq!(out, input); + assert!(pending.is_empty()); + } + + #[test] + fn incomplete_trailing_codepoint_is_buffered() { + // "é" is 0xC3 0xA9 — leading byte alone must be held back. + let (out, pending) = run(&[b'a', 0xC3]); + assert_eq!(out, b"a"); + assert_eq!(pending, vec![0xC3]); + } + + #[test] + fn multibyte_split_across_two_chunks_emits_once_whole() { + let mut out = Vec::::new(); + let pending = write_lossy_utf8(&mut out, &[b'a', 0xC3]).unwrap(); + assert_eq!(pending, vec![0xC3]); + assert_eq!(out, b"a"); + + let mut working = pending; + working.push(0xA9); + let pending = write_lossy_utf8(&mut out, &working).unwrap(); + assert!(pending.is_empty()); + assert_eq!(out, "aé".as_bytes()); + } + + #[test] + fn invalid_byte_in_middle_becomes_replacement() { + let (out, pending) = run(&[b'a', 0xFF, b'b']); + assert_eq!(out, "a\u{FFFD}b".as_bytes()); + assert!(pending.is_empty()); + } + + #[test] + fn lone_continuation_byte_becomes_replacement() { + let (out, pending) = run(&[b'a', 0x80, b'b']); + assert_eq!(out, "a\u{FFFD}b".as_bytes()); + assert!(pending.is_empty()); + } + + #[test] + fn windows_1252_smart_quote_becomes_replacement() { + // Regression: 0x91/0x92 land as bare continuation bytes and broke + // console stdio on Windows before this fix. + let (out, pending) = run(b"quote: \x91hi\x92"); + assert_eq!(out, "quote: \u{FFFD}hi\u{FFFD}".as_bytes()); + assert!(pending.is_empty()); + } + } } From 449a776e01e331459f161c9cd29c0d067beb97c6 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:09:22 +0000 Subject: [PATCH 2/2] [autofix.ci] apply automated fixes --- crates/forge_infra/src/executor.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/crates/forge_infra/src/executor.rs b/crates/forge_infra/src/executor.rs index 9123829707..1cfc6483c5 100644 --- a/crates/forge_infra/src/executor.rs +++ b/crates/forge_infra/src/executor.rs @@ -229,9 +229,7 @@ fn write_lossy_utf8(writer: &mut W, buf: &[u8]) -> io::Result> } Err(e) => { let valid_up_to = e.valid_up_to(); - let valid = buf - .get(cursor..cursor + valid_up_to) - .unwrap_or(&[]); + let valid = buf.get(cursor..cursor + valid_up_to).unwrap_or(&[]); writer.write_all(valid)?; match e.error_len() { Some(len) => { @@ -239,10 +237,7 @@ fn write_lossy_utf8(writer: &mut W, buf: &[u8]) -> io::Result> cursor += valid_up_to + len; } None => { - return Ok(buf - .get(cursor + valid_up_to..) - .unwrap_or(&[]) - .to_vec()); + return Ok(buf.get(cursor + valid_up_to..).unwrap_or(&[]).to_vec()); } } }