Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions crates/synth-backend/src/arm_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,18 @@ fn compile_wasm_to_arm(
stats.needs_spill
);
}
// VCR-RA-002 (#390, epic #242): eliminate a provably-dead stack frame
// (`sub sp,#N`/`add sp,#N` reserved by `compute_local_layout` for locals
// that promotion homed in registers, never accessed). Removing it saves
// the two instructions AND restores the SP-untouched precondition that
// `shrink_callee_saved_saves` requires — so it must run FIRST. Flag-off
// (opt-in `SYNTH_DEAD_FRAME_ELIM=1`); off ⇒ byte-identical. Default-on
// flip held for on-silicon validation, like the realloc/shrink levers.
let out = if std::env::var("SYNTH_DEAD_FRAME_ELIM").is_ok() {
synth_synthesis::liveness::elide_dead_frame(&out).unwrap_or(out)
} else {
out
};
synth_synthesis::liveness::shrink_callee_saved_saves(&out).unwrap_or(out)
} else {
arm_instrs
Expand Down
285 changes: 285 additions & 0 deletions crates/synth-synthesis/src/liveness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2390,6 +2390,141 @@ pub fn shrink_callee_saved_saves(instrs: &[ArmInstruction]) -> Option<Vec<ArmIns
Some(out)
}

/// VCR-RA-002 (#390, epic #242): eliminate a provably-dead stack frame.
///
/// `compute_local_layout` reserves a frame slot — materialized as
/// `sub sp,#N` (prologue) / `add sp,#N` (epilogue) — for every non-param wasm
/// local it sees. Local promotion (v0.14.0) then homes eligible i32 locals in a
/// register instead. When promotion homes ALL of a function's locals and the
/// function neither spills, calls, nor touches i64 / stack-passed params, those
/// reserved frame bytes are never accessed: the `sub`/`add sp` pair is pure
/// overhead (~2-3 cyc on a small leaf), AND — because it writes SP —
/// [`shrink_callee_saved_saves`] declines on it (that pass bails on any SP
/// def/use, since a shrunk push would shift SP-relative offsets). Removing the
/// dead frame both saves the two instructions and restores the SP-untouched
/// precondition the shrink pass needs, so the two passes compose.
///
/// SOUNDNESS (safe-by-construction): fire ONLY when NO instruction in the body
/// reads or writes SP except the matched frame `sub`/`add` and the prologue
/// `Push` / epilogue `Pop` (which adjust SP symmetrically but never index the
/// frame region). For wasm locals that guard IS exact deadness — locals are not
/// addressable, so only `LocalGet/Set/Tee` (lowered to a promoted register or
/// `[sp,#off]`) touch them, and every OTHER SP consumer — register spills, #204
/// param-backing, the i64 pair-spill area, the #359 outgoing-arg region, and
/// incoming stack-passed params — manifests as an `[sp,#off]` access this guard
/// sees. Any such access (or any unmodeled op whose SP effect `reg_effect` can't
/// confirm absent) returns `None` and the bytes are unchanged. When it fires,
/// the removed region was provably unreferenced: SP stays balanced and no
/// surviving offset shifts. Removal-only — no instruction is added, rewritten,
/// or reordered, so the only count change is the dropped `sub`/`add` pair.
///
/// Bounded v1 scope (mirrors [`shrink_callee_saved_saves`]) — `None` unless:
/// - exactly one frame `sub sp,sp,#N`; every `add sp,sp,#M` restores the same
/// `N` (balanced epilogues; a mismatched `add sp` is some other SP
/// arithmetic → decline);
/// - no other SP def/use or `[sp]`-relative addressing anywhere in the body;
/// - every instruction is register-modeled (`reg_effect`) or pure label
/// control flow (`Label`/`B*`/`Bx {LR}`).
pub fn elide_dead_frame(instrs: &[ArmInstruction]) -> Option<Vec<ArmInstruction>> {
use ArmOp::*;

// Pass 1: locate the single frame allocation and its matching deallocs, and
// verify nothing else touches SP.
let mut frame_sub: Option<(usize, i32)> = None;
let mut frame_adds: Vec<usize> = Vec::new();
for (i, ins) in instrs.iter().enumerate() {
match &ins.op {
Sub {
rd: Reg::SP,
rn: Reg::SP,
op2: Operand2::Imm(n),
} => {
if frame_sub.is_some() {
return None; // more than one frame allocation — unmodeled
}
frame_sub = Some((i, *n));
}
Add {
rd: Reg::SP,
rn: Reg::SP,
op2: Operand2::Imm(_),
} => frame_adds.push(i),
_ => {}
}
}
let (sub_idx, frame_n) = frame_sub?;
if frame_adds.is_empty() {
return None;
}
// Every frame dealloc must restore exactly `frame_n` (balanced); a different
// immediate means this `add sp` is not the frame's counterpart → decline.
for &ai in &frame_adds {
if let Add {
op2: Operand2::Imm(n),
..
} = &instrs[ai].op
&& *n != frame_n
{
return None;
}
}

// Pass 2: prove the frame is dead — no SP access outside the frame sub/adds
// and the push/pop.
for (i, ins) in instrs.iter().enumerate() {
if i == sub_idx || frame_adds.contains(&i) {
continue;
}
match &ins.op {
// Push/Pop move SP but never index the frame region — allowed.
// Pure label control flow has no register operands.
Push { .. }
| Pop { .. }
| Label { .. }
| B { .. }
| BOffset { .. }
| BCondOffset { .. }
| Bhs { .. }
| Blo { .. }
| Bcc { .. } => {}
Bx { rm } if *rm == Reg::LR => {}
op => {
// Unmodeled op (call/BrTable/i64-pair/...): cannot prove SP-free.
let e = reg_effect(op)?;
if e.defs.iter().chain(e.uses.iter()).any(|r| *r == Reg::SP) {
return None;
}
// SP hides inside addressing modes too.
if let Ldr { addr, .. }
| Ldrb { addr, .. }
| Ldrsb { addr, .. }
| Ldrh { addr, .. }
| Ldrsh { addr, .. }
| Str { addr, .. }
| Strb { addr, .. }
| Strh { addr, .. } = op
&& (addr.base == Reg::SP || addr.offset_reg == Some(Reg::SP))
{
return None;
}
}
}
}

// Dead frame confirmed: drop the `sub sp` and every matching `add sp`.
let drop: BTreeSet<usize> = std::iter::once(sub_idx)
.chain(frame_adds.iter().copied())
.collect();
Some(
instrs
.iter()
.enumerate()
.filter(|(i, _)| !drop.contains(i))
.map(|(_, ins)| ins.clone())
.collect(),
)
}

/// Defense-in-depth: before accepting a segment's rewrite, every interference
/// edge is re-checked against the final assignment (independent of the
/// colourer), mirroring `verify_allocation`.
Expand Down Expand Up @@ -5462,6 +5597,156 @@ mod tests {
assert_eq!(out[4], seq[4]);
}

// ---- elide_dead_frame (VCR-RA-002, #390) ----

fn frame_sub(n: i32) -> ArmInstruction {
ins(ArmOp::Sub {
rd: Reg::SP,
rn: Reg::SP,
op2: Operand2::Imm(n),
})
}
fn frame_add(n: i32) -> ArmInstruction {
ins(ArmOp::Add {
rd: Reg::SP,
rn: Reg::SP,
op2: Operand2::Imm(n),
})
}

#[test]
fn elide_dead_frame_removes_unreferenced_frame() {
// Promoted-leaf shape: a `sub sp,#16` reserves slots for locals that all
// live in registers; the body never touches SP. The frame is dead.
let body_a = ins(ArmOp::Add {
rd: Reg::R4,
rn: Reg::R0,
op2: Operand2::Imm(1),
});
let body_b = ins(ArmOp::Mov {
rd: Reg::R0,
op2: Operand2::Reg(Reg::R4),
});
let seq = vec![
prologue(),
frame_sub(16),
body_a.clone(),
body_b.clone(),
frame_add(16),
epilogue(),
];
let out = elide_dead_frame(&seq).expect("dead frame elided");
// sub/add sp gone; everything else verbatim and in order.
assert_eq!(
out,
vec![prologue(), body_a, body_b, epilogue()],
"only the sub/add sp pair is removed"
);
}

#[test]
fn elide_dead_frame_declines_on_sp_relative_access() {
use crate::rules::MemAddr;
// A spill/local lives in the frame → `[sp,#off]` access → frame is LIVE.
let seq = vec![
prologue(),
frame_sub(16),
ins(ArmOp::Str {
rd: Reg::R0,
addr: MemAddr {
base: Reg::SP,
offset: 4,
offset_reg: None,
},
}),
frame_add(16),
epilogue(),
];
assert_eq!(elide_dead_frame(&seq), None);
}

#[test]
fn elide_dead_frame_declines_on_unbalanced_add_sp() {
// An `add sp,#8` that does not match the `sub sp,#16` is some other SP
// arithmetic, not the frame's counterpart → decline conservatively.
let seq = vec![
prologue(),
frame_sub(16),
ins(ArmOp::Mov {
rd: Reg::R4,
op2: Operand2::Imm(0),
}),
frame_add(8),
epilogue(),
];
assert_eq!(elide_dead_frame(&seq), None);
}

#[test]
fn elide_dead_frame_declines_on_unmodeled_sp_effect() {
// A call sits inside the frame: `reg_effect` declines `Bl` (its SP/clobber
// effect is not modeled), so we cannot prove the frame dead → decline.
let seq = vec![
prologue(),
frame_sub(16),
ins(ArmOp::Bl {
label: "func_1".to_string(),
}),
frame_add(16),
epilogue(),
];
assert_eq!(elide_dead_frame(&seq), None);
}

#[test]
fn elide_dead_frame_noop_when_no_frame() {
// No `sub sp` at all (frame_size == 0) → nothing to elide.
let seq = vec![
prologue(),
ins(ArmOp::Add {
rd: Reg::R4,
rn: Reg::R0,
op2: Operand2::Imm(1),
}),
epilogue(),
];
assert_eq!(elide_dead_frame(&seq), None);
}

#[test]
fn elide_dead_frame_removes_across_multiple_epilogues() {
// Two return paths each restore the frame; both `add sp` are removed.
let seq = vec![
prologue(),
frame_sub(8),
ins(ArmOp::B {
label: "L_end".to_string(),
}),
frame_add(8),
epilogue(),
ins(ArmOp::Label {
name: "L_end".to_string(),
}),
frame_add(8),
epilogue(),
];
let out = elide_dead_frame(&seq).expect("dead frame elided");
assert_eq!(
out,
vec![
prologue(),
ins(ArmOp::B {
label: "L_end".to_string(),
}),
epilogue(),
ins(ArmOp::Label {
name: "L_end".to_string(),
}),
epilogue(),
],
);
}

#[test]
fn precolored_node_keeps_its_colour_and_constrains_neighbours() {
// Triangle R0—R1—R2; pin R0 to colour 2. With k=3, R1/R2 must avoid 2
Expand Down
30 changes: 19 additions & 11 deletions scripts/repro/leaf_caller_saved.wat
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
;; perf repro (VCR-RA-002, #428, epic #242): leaf-function prologue shrink.
;; perf repro (VCR-RA-002, #390, epic #242): leaf-function prologue shrink —
;; dead-frame elimination.
;;
;; Local promotion (v0.14.0) homes eligible i32 locals in callee-saved r4..r8.
;; For a LEAF function that is the wrong pool: callee-saved regs must be
;; saved/restored (`push {r4-r8,lr}` / `pop {…,pc}` ~12 cyc of pure overhead),
;; whereas a leaf never calls, so caller-saved r1..r3 (minus params, minus r0
;; for the return value) are free homes that need NO prologue save. Promoting
;; into caller-saved first lets `shrink_callee_saved_saves` drop the callee-saved
;; push entirely.
;; `compute_local_layout` reserves a frame slot (`sub sp,#N` / `add sp,#N`) for
;; every non-param wasm local it sees. Local promotion (v0.14.0) then homes the
;; eligible i32 locals in registers, so those frame bytes are NEVER accessed: a
;; dead `sub`/`add sp` pair (~2-3 cyc on a small leaf) that also touches SP and
;; thereby blocks `shrink_callee_saved_saves` (which declines on any SP def/use).
;; `elide_dead_frame` (SYNTH_DEAD_FRAME_ELIM=1) removes it when the body provably
;; never touches SP — saving the two instructions and restoring the SP-untouched
;; precondition the shrink pass needs.
;;
;; This fixture: 1 param + 3 promotable i32 locals (each written-before-read,
;; depth-0, >=2 reads), minimal operand-stack temp pressure. Flag-off homes
;; a,b,c -> r4,r5,r6 (push {r4-r6,lr}); flag-on (SYNTH_LEAF_CALLER_SAVED=1) homes
;; them -> r1,r2,r3 (no callee-saved push). Same result either way.
;; depth-0, >=2 reads), minimal operand-stack temp pressure. Promotion homes
;; a,b,c -> r4,r5,r6 and the layout reserves a dead 16-byte frame. Flag-off keeps
;; it (`sub sp,#16` ... `add sp,#16`, 36 B); flag-on elides it (28 B, -8 B = the
;; two 4-byte wide insns), byte-identical otherwise. leaf3(p) = 4*p + 10.
;;
;; NOTE: the push stays `{r4-r8,lr}` either way here — a,b,c are in callee-saved
;; r4,r5,r6 + scratch r7 = 4 saved regs, which `shrink_callee_saved_saves` pads
;; back up to the even-count `{r4-r8,lr}`. Trimming the push needs the locals OUT
;; of callee-saved (caller-saved leaf homing), tracked separately as #390.
;;
;; Generic — neutral values, tied to nothing real.
(module
Expand Down
Loading
Loading