diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs index 34b4d84..33eeeb5 100644 --- a/crates/synth-backend/src/arm_backend.rs +++ b/crates/synth-backend/src/arm_backend.rs @@ -175,7 +175,8 @@ fn compile_wasm_to_arm( // exactly the code that compiled it yesterday (bit-identity is structural, // not behavioural). let select_direct_attempt = |spill_on_exhaustion: bool, - param_backing_on_exhaustion: bool| + param_backing_on_exhaustion: bool, + local_promote: bool| -> Result, synth_core::Error> { let db = RuleDatabase::with_standard_rules(); let mut selector = @@ -219,37 +220,60 @@ fn compile_wasm_to_arm( // 2.00×→1.72× vs LLVM). Escape hatch: `SYNTH_NO_LOCAL_PROMOTE=1` restores // the frame-slot path. Leaf-only / i32-only / ARM-only (see // compute_local_promotion); the leaf-only lift + i64 locals are follow-ons. - selector.set_local_promote(std::env::var("SYNTH_NO_LOCAL_PROMOTE").is_err()); + // #474: `local_promote` is now a per-attempt parameter so the retry ladder + // can drop promotion as an exhaustion-recovery rung (promotion pins r4-r8, + // which on a dense function leaves the spill allocator with nothing to + // free → the frame-slot path is the escape that restores compilability). + selector.set_local_promote(local_promote); selector.select_with_stack(wasm_ops, num_params) }; let select_direct = || -> Result, String> { - // The two recoverable exhaustion classes. NOT retried: the i64 - // spill-slot-pool Err ("spill-slot pool exhausted") — the honest - // remaining bound of the 3b-lite allocator. const SINGLE_EXHAUSTION: &str = "all allocatable registers are live on the stack"; const PAIR_EXHAUSTION: &str = "no consecutive pair of free registers for i64"; - let mut attempt = select_direct_attempt(false, false); - // VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion - // hard-fail is recoverable — retry with spill-on-exhaustion, which - // reserves the spill area and spills the deepest stack value when the - // pool is full. Only functions that FAILED the first pass ever reach - // this, so existing output is untouched by construction. - if let Err(e) = &attempt - && e.to_string().contains(SINGLE_EXHAUSTION) - { - attempt = select_direct_attempt(true, false); - } - // VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR - // exhaustion is recoverable too — but not by stack spilling (the pair - // allocator already spills stack values, #171): the blockers are the - // pinned param home registers. The final retry frame-backs the params - // (#204 machinery) so they stop pinning R0-R3, with spill-on-exhaustion - // kept on for the single-register pressure the reloads add. Reached - // only by functions that failed every earlier pass. - if let Err(e) = &attempt - && e.to_string().contains(PAIR_EXHAUSTION) + // The full exhaustion-recovery ladder, parameterized on whether local + // promotion is enabled. Each rung is reached only when the previous one + // returned a recoverable register-exhaustion Err, so a function that + // compiles on the first attempt is untouched by the later rungs. + let recovery_ladder = |promote: bool| -> Result, synth_core::Error> { + let mut attempt = select_direct_attempt(false, false, promote); + // VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion + // hard-fail is recoverable — retry with spill-on-exhaustion, which + // reserves the spill area and spills the deepest stack value when the + // pool is full. + if let Err(e) = &attempt + && e.to_string().contains(SINGLE_EXHAUSTION) + { + attempt = select_direct_attempt(true, false, promote); + } + // VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR + // exhaustion is recoverable too — not by stack spilling (the pair + // allocator already spills stack values, #171) but by frame-backing + // the params (#204) so they stop pinning R0-R3, with spill kept on. + if let Err(e) = &attempt + && e.to_string().contains(PAIR_EXHAUSTION) + { + attempt = select_direct_attempt(true, true, promote); + } + attempt + }; + // #474: local promotion (default-on since v0.14.0) is an OPTIMIZATION — it + // must never be the reason a function fails to compile. Run the full ladder + // with promotion first (so every function that compiles today is + // bit-identical), and if it still ends in register exhaustion, fall back to + // the promotion-off ladder (the v0.12.0 frame-slot lowering — exactly what + // the `SYNTH_NO_LOCAL_PROMOTE=1` workaround does, now automatic). Promotion + // pins r4-r8 for the locals; on a dense function that leaves the allocator + // with nothing to free, so dropping it restores compilability. The fallback + // is reached ONLY by functions that exhaust WITH promotion, so promotion-on + // output is untouched by construction (frozen byte gate stays green). + let promote = std::env::var("SYNTH_NO_LOCAL_PROMOTE").is_err(); + let mut attempt = recovery_ladder(promote); + if promote + && let Err(e) = &attempt + && e.to_string().contains("register exhaustion") + && let Ok(rescued) = recovery_ladder(false) { - attempt = select_direct_attempt(true, true); + attempt = Ok(rescued); } attempt.map_err(|e| format!("instruction selection failed: {}", e)) }; diff --git a/crates/synth-cli/tests/promotion_exhaustion_fallback_474.rs b/crates/synth-cli/tests/promotion_exhaustion_fallback_474.rs new file mode 100644 index 0000000..0964406 --- /dev/null +++ b/crates/synth-cli/tests/promotion_exhaustion_fallback_474.rs @@ -0,0 +1,90 @@ +//! #474 regression guard — local promotion must never CAUSE a compile failure. +//! +//! v0.14.0 made i32 local promotion default-on. Promotion pins eligible locals +//! into callee-saved r4-r8, which halves the operand-stack temp pool. On a dense +//! function that tips register allocation past what it can recover, turning a +//! working compile into a hard "register exhaustion" skip — a regression from +//! v0.12.0 (observed on a real engine-control function; reproduced generically by +//! `scripts/repro/promotion_exhaustion_fallback.wat`). +//! +//! The fix (arm_backend.rs `select_direct`): run the promotion-on recovery ladder +//! first — so every function that compiles today is bit-identical (the frozen byte +//! gate proves it) — and, only if it still ends in register exhaustion, fall back +//! to the promotion-off ladder automatically (exactly what `SYNTH_NO_LOCAL_PROMOTE=1` +//! does by hand). Promotion is an optimization; it must never be the *reason* a +//! function fails to compile. +//! +//! This test is the executable proof the fallback is load-bearing AND lands on the +//! frame-slot path: the fixture compiles with promotion default-on, and its `.text` +//! is byte-identical to the `SYNTH_NO_LOCAL_PROMOTE=1` build. + +use std::process::Command; + +use object::{Object, ObjectSection}; + +fn synth() -> &'static str { + env!("CARGO_BIN_EXE_synth") +} + +fn fixture() -> std::path::PathBuf { + std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("../..") + .join("scripts/repro/promotion_exhaustion_fallback.wat") +} + +/// Compile the fixture (cortex-m4, relocatable) with promotion either default-on +/// or forced off, and return the `.text` bytes. Panics if the compile fails — a +/// FAILED promotion-on compile is exactly the #474 regression this guards. +fn text_bytes(promotion_off: bool) -> Vec { + let elf = format!( + "/tmp/pef_474_{}.elf", + if promotion_off { "off" } else { "on" } + ); + let mut cmd = Command::new(synth()); + if promotion_off { + cmd.env("SYNTH_NO_LOCAL_PROMOTE", "1"); + } else { + cmd.env_remove("SYNTH_NO_LOCAL_PROMOTE"); + } + let out = cmd + .args([ + "compile", + fixture().to_str().unwrap(), + "-o", + &elf, + "--target", + "cortex-m4", + "--relocatable", + "--all-exports", + ]) + .output() + .expect("run synth"); + assert!( + out.status.success(), + "#474 REGRESSION: promotion-{} compile FAILED — promotion turned a \ + compilable function into a skipped one. stderr:\n{}", + if promotion_off { "off" } else { "on" }, + String::from_utf8_lossy(&out.stderr) + ); + let bytes = std::fs::read(&elf).expect("read elf"); + let obj = object::File::parse(&*bytes).expect("parse elf"); + obj.section_by_name(".text") + .expect(".text section") + .data() + .expect("read .text") + .to_vec() +} + +/// The dense fixture compiles with promotion DEFAULT-ON (the v0.14.0 setting that +/// regressed it), and the rescued `.text` is byte-identical to the promotion-off +/// build — proving the fallback fired and used the frame-slot lowering. +#[test] +fn promotion_never_causes_compile_failure_474() { + let on = text_bytes(false); + let off = text_bytes(true); + assert_eq!( + on, off, + "#474: promotion-on .text should equal the promotion-off (frame-slot) \ + lowering for an exhausting function — the fallback must land on that path" + ); +} diff --git a/scripts/repro/promotion_exhaustion_fallback.wat b/scripts/repro/promotion_exhaustion_fallback.wat new file mode 100644 index 0000000..8c2b95d --- /dev/null +++ b/scripts/repro/promotion_exhaustion_fallback.wat @@ -0,0 +1,30 @@ +;; Regression guard (#474): local promotion (default-on v0.14.0) must never be the +;; REASON a function fails to compile. +;; +;; Promotion pins eligible i32 locals into callee-saved r4-r8, halving the +;; operand-stack temp pool (temps drop from r4-r8+r0-r3 to just r0-r3). On a dense +;; function that tips register allocation past what it can recover, turning a +;; working compile into a hard "register exhaustion" failure (skipped function) -- +;; observed on a real engine-control function: fine on v0.12.0, skipped on v0.14.0. +;; +;; This fixture reproduces it generically: 5 promotable i32 locals + a deep +;; right-leaning chain that keeps many products simultaneously live. +;; * promotion ON (v0.14.0 default) -- exhausts -> WAS skipped. +;; * promotion OFF (SYNTH_NO_LOCAL_PROMOTE=1) -- compiles (frame-slot path). +;; The #474 fix runs the promotion-on ladder first (every function that compiles +;; today stays bit-identical) and, only if it still exhausts, falls back to the +;; promotion-off ladder automatically. With the fix this compiles, .text +;; byte-identical to the promotion-off build. +;; +;; Compile: --target cortex-m4 --relocatable --all-exports. Generic values. +(module + (memory 1) + (func (export "dense") (param $p i32) (result i32) + (local $a i32)(local $b i32)(local $c i32)(local $d i32)(local $e i32) + (local.set $a (i32.add (local.get $p) (i32.const 16))) + (local.set $b (i32.add (local.get $p) (i32.const 32))) + (local.set $c (i32.add (local.get $p) (i32.const 48))) + (local.set $d (i32.add (local.get $p) (i32.const 64))) + (local.set $e (i32.add (local.get $p) (i32.const 80))) + (i32.add (i32.mul (local.get $a) (local.get $c)) (i32.add (i32.mul (local.get $e) (local.get $b)) (i32.add (i32.mul (local.get $d) (local.get $a)) (i32.add (i32.mul (local.get $c) (local.get $e)) (i32.add (i32.mul (local.get $b) (local.get $d)) (i32.add (i32.mul (local.get $a) (local.get $c)) (i32.add (i32.mul (local.get $e) (local.get $b)) (i32.add (i32.mul (local.get $d) (local.get $a)) (i32.add (i32.mul (local.get $c) (local.get $e)) (i32.add (i32.mul (local.get $b) (local.get $d)) (i32.add (i32.mul (local.get $a) (local.get $c)) (i32.add (i32.mul (local.get $e) (local.get $b)) (i32.add (i32.mul (local.get $d) (local.get $a)) (i32.add (i32.mul (local.get $c) (local.get $e)) (i32.add (i32.mul (local.get $b) (local.get $d)) (local.get $a)))))))))))))))) + ))