Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 50 additions & 26 deletions crates/synth-backend/src/arm_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ fn compile_wasm_to_arm(
// exactly the code that compiled it yesterday (bit-identity is structural,
// not behavioural).
let select_direct_attempt = |spill_on_exhaustion: bool,
param_backing_on_exhaustion: bool|
param_backing_on_exhaustion: bool,
local_promote: bool|
-> Result<Vec<ArmInstruction>, synth_core::Error> {
let db = RuleDatabase::with_standard_rules();
let mut selector =
Expand Down Expand Up @@ -219,37 +220,60 @@ fn compile_wasm_to_arm(
// 2.00×→1.72× vs LLVM). Escape hatch: `SYNTH_NO_LOCAL_PROMOTE=1` restores
// the frame-slot path. Leaf-only / i32-only / ARM-only (see
// compute_local_promotion); the leaf-only lift + i64 locals are follow-ons.
selector.set_local_promote(std::env::var("SYNTH_NO_LOCAL_PROMOTE").is_err());
// #474: `local_promote` is now a per-attempt parameter so the retry ladder
// can drop promotion as an exhaustion-recovery rung (promotion pins r4-r8,
// which on a dense function leaves the spill allocator with nothing to
// free → the frame-slot path is the escape that restores compilability).
selector.set_local_promote(local_promote);
selector.select_with_stack(wasm_ops, num_params)
};
let select_direct = || -> Result<Vec<ArmInstruction>, String> {
// The two recoverable exhaustion classes. NOT retried: the i64
// spill-slot-pool Err ("spill-slot pool exhausted") — the honest
// remaining bound of the 3b-lite allocator.
const SINGLE_EXHAUSTION: &str = "all allocatable registers are live on the stack";
const PAIR_EXHAUSTION: &str = "no consecutive pair of free registers for i64";
let mut attempt = select_direct_attempt(false, false);
// VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion
// hard-fail is recoverable — retry with spill-on-exhaustion, which
// reserves the spill area and spills the deepest stack value when the
// pool is full. Only functions that FAILED the first pass ever reach
// this, so existing output is untouched by construction.
if let Err(e) = &attempt
&& e.to_string().contains(SINGLE_EXHAUSTION)
{
attempt = select_direct_attempt(true, false);
}
// VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR
// exhaustion is recoverable too — but not by stack spilling (the pair
// allocator already spills stack values, #171): the blockers are the
// pinned param home registers. The final retry frame-backs the params
// (#204 machinery) so they stop pinning R0-R3, with spill-on-exhaustion
// kept on for the single-register pressure the reloads add. Reached
// only by functions that failed every earlier pass.
if let Err(e) = &attempt
&& e.to_string().contains(PAIR_EXHAUSTION)
// The full exhaustion-recovery ladder, parameterized on whether local
// promotion is enabled. Each rung is reached only when the previous one
// returned a recoverable register-exhaustion Err, so a function that
// compiles on the first attempt is untouched by the later rungs.
let recovery_ladder = |promote: bool| -> Result<Vec<ArmInstruction>, synth_core::Error> {
let mut attempt = select_direct_attempt(false, false, promote);
// VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion
// hard-fail is recoverable — retry with spill-on-exhaustion, which
// reserves the spill area and spills the deepest stack value when the
// pool is full.
if let Err(e) = &attempt
&& e.to_string().contains(SINGLE_EXHAUSTION)
{
attempt = select_direct_attempt(true, false, promote);
}
// VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR
// exhaustion is recoverable too — not by stack spilling (the pair
// allocator already spills stack values, #171) but by frame-backing
// the params (#204) so they stop pinning R0-R3, with spill kept on.
if let Err(e) = &attempt
&& e.to_string().contains(PAIR_EXHAUSTION)
{
attempt = select_direct_attempt(true, true, promote);
}
attempt
};
// #474: local promotion (default-on since v0.14.0) is an OPTIMIZATION — it
// must never be the reason a function fails to compile. Run the full ladder
// with promotion first (so every function that compiles today is
// bit-identical), and if it still ends in register exhaustion, fall back to
// the promotion-off ladder (the v0.12.0 frame-slot lowering — exactly what
// the `SYNTH_NO_LOCAL_PROMOTE=1` workaround does, now automatic). Promotion
// pins r4-r8 for the locals; on a dense function that leaves the allocator
// with nothing to free, so dropping it restores compilability. The fallback
// is reached ONLY by functions that exhaust WITH promotion, so promotion-on
// output is untouched by construction (frozen byte gate stays green).
let promote = std::env::var("SYNTH_NO_LOCAL_PROMOTE").is_err();
let mut attempt = recovery_ladder(promote);
if promote
&& let Err(e) = &attempt
&& e.to_string().contains("register exhaustion")
&& let Ok(rescued) = recovery_ladder(false)
{
attempt = select_direct_attempt(true, true);
attempt = Ok(rescued);
}
attempt.map_err(|e| format!("instruction selection failed: {}", e))
};
Expand Down
90 changes: 90 additions & 0 deletions crates/synth-cli/tests/promotion_exhaustion_fallback_474.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//! #474 regression guard — local promotion must never CAUSE a compile failure.
//!
//! v0.14.0 made i32 local promotion default-on. Promotion pins eligible locals
//! into callee-saved r4-r8, which halves the operand-stack temp pool. On a dense
//! function that tips register allocation past what it can recover, turning a
//! working compile into a hard "register exhaustion" skip — a regression from
//! v0.12.0 (observed on a real engine-control function; reproduced generically by
//! `scripts/repro/promotion_exhaustion_fallback.wat`).
//!
//! The fix (arm_backend.rs `select_direct`): run the promotion-on recovery ladder
//! first — so every function that compiles today is bit-identical (the frozen byte
//! gate proves it) — and, only if it still ends in register exhaustion, fall back
//! to the promotion-off ladder automatically (exactly what `SYNTH_NO_LOCAL_PROMOTE=1`
//! does by hand). Promotion is an optimization; it must never be the *reason* a
//! function fails to compile.
//!
//! This test is the executable proof the fallback is load-bearing AND lands on the
//! frame-slot path: the fixture compiles with promotion default-on, and its `.text`
//! is byte-identical to the `SYNTH_NO_LOCAL_PROMOTE=1` build.

use std::process::Command;

use object::{Object, ObjectSection};

fn synth() -> &'static str {
env!("CARGO_BIN_EXE_synth")
}

fn fixture() -> std::path::PathBuf {
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("../..")
.join("scripts/repro/promotion_exhaustion_fallback.wat")
}

/// Compile the fixture (cortex-m4, relocatable) with promotion either default-on
/// or forced off, and return the `.text` bytes. Panics if the compile fails — a
/// FAILED promotion-on compile is exactly the #474 regression this guards.
fn text_bytes(promotion_off: bool) -> Vec<u8> {
let elf = format!(
"/tmp/pef_474_{}.elf",
if promotion_off { "off" } else { "on" }
);
let mut cmd = Command::new(synth());
if promotion_off {
cmd.env("SYNTH_NO_LOCAL_PROMOTE", "1");
} else {
cmd.env_remove("SYNTH_NO_LOCAL_PROMOTE");
}
let out = cmd
.args([
"compile",
fixture().to_str().unwrap(),
"-o",
&elf,
"--target",
"cortex-m4",
"--relocatable",
"--all-exports",
])
.output()
.expect("run synth");
assert!(
out.status.success(),
"#474 REGRESSION: promotion-{} compile FAILED — promotion turned a \
compilable function into a skipped one. stderr:\n{}",
if promotion_off { "off" } else { "on" },
String::from_utf8_lossy(&out.stderr)
);
let bytes = std::fs::read(&elf).expect("read elf");
let obj = object::File::parse(&*bytes).expect("parse elf");
obj.section_by_name(".text")
.expect(".text section")
.data()
.expect("read .text")
.to_vec()
}

/// The dense fixture compiles with promotion DEFAULT-ON (the v0.14.0 setting that
/// regressed it), and the rescued `.text` is byte-identical to the promotion-off
/// build — proving the fallback fired and used the frame-slot lowering.
#[test]
fn promotion_never_causes_compile_failure_474() {
let on = text_bytes(false);
let off = text_bytes(true);
assert_eq!(
on, off,
"#474: promotion-on .text should equal the promotion-off (frame-slot) \
lowering for an exhausting function — the fallback must land on that path"
);
}
30 changes: 30 additions & 0 deletions scripts/repro/promotion_exhaustion_fallback.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
;; Regression guard (#474): local promotion (default-on v0.14.0) must never be the
;; REASON a function fails to compile.
;;
;; Promotion pins eligible i32 locals into callee-saved r4-r8, halving the
;; operand-stack temp pool (temps drop from r4-r8+r0-r3 to just r0-r3). On a dense
;; function that tips register allocation past what it can recover, turning a
;; working compile into a hard "register exhaustion" failure (skipped function) --
;; observed on a real engine-control function: fine on v0.12.0, skipped on v0.14.0.
;;
;; This fixture reproduces it generically: 5 promotable i32 locals + a deep
;; right-leaning chain that keeps many products simultaneously live.
;; * promotion ON (v0.14.0 default) -- exhausts -> WAS skipped.
;; * promotion OFF (SYNTH_NO_LOCAL_PROMOTE=1) -- compiles (frame-slot path).
;; The #474 fix runs the promotion-on ladder first (every function that compiles
;; today stays bit-identical) and, only if it still exhausts, falls back to the
;; promotion-off ladder automatically. With the fix this compiles, .text
;; byte-identical to the promotion-off build.
;;
;; Compile: --target cortex-m4 --relocatable --all-exports. Generic values.
(module
(memory 1)
(func (export "dense") (param $p i32) (result i32)
(local $a i32)(local $b i32)(local $c i32)(local $d i32)(local $e i32)
(local.set $a (i32.add (local.get $p) (i32.const 16)))
(local.set $b (i32.add (local.get $p) (i32.const 32)))
(local.set $c (i32.add (local.get $p) (i32.const 48)))
(local.set $d (i32.add (local.get $p) (i32.const 64)))
(local.set $e (i32.add (local.get $p) (i32.const 80)))
(i32.add (i32.mul (local.get $a) (local.get $c)) (i32.add (i32.mul (local.get $e) (local.get $b)) (i32.add (i32.mul (local.get $d) (local.get $a)) (i32.add (i32.mul (local.get $c) (local.get $e)) (i32.add (i32.mul (local.get $b) (local.get $d)) (i32.add (i32.mul (local.get $a) (local.get $c)) (i32.add (i32.mul (local.get $e) (local.get $b)) (i32.add (i32.mul (local.get $d) (local.get $a)) (i32.add (i32.mul (local.get $c) (local.get $e)) (i32.add (i32.mul (local.get $b) (local.get $d)) (i32.add (i32.mul (local.get $a) (local.get $c)) (i32.add (i32.mul (local.get $e) (local.get $b)) (i32.add (i32.mul (local.get $d) (local.get $a)) (i32.add (i32.mul (local.get $c) (local.get $e)) (i32.add (i32.mul (local.get $b) (local.get $d)) (local.get $a))))))))))))))))
))
Loading