Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion llvm/lib/CodeGen/LiveRangeEdit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Modifications (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its
// Modifications (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its
// affiliates
//
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -81,6 +81,57 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
return true;
}

/// Check if the defining instruction is suitable for rematerialization.
/// Returns true if the instruction can be rematerialized, false otherwise.
static bool isRematerializableDefInstr(const MachineInstr *DefMI,
Register Original, const VirtRegMap *VRM,
const MachineRegisterInfo &MRI,
const TargetInstrInfo &TII) {
// Find the operand that defines a register with Original as its ancestor
const MachineOperand *DefMO = nullptr;
for (const MachineOperand &MO : DefMI->defs()) {
if (MO.isReg() && MO.getReg().isVirtual()) {
// Check if this def's original register is our Original
if (VRM->getOriginal(MO.getReg()) == Original) {
DefMO = &MO;
break;
}
}
}

if (!DefMO) {
LLVM_DEBUG({
dbgs() << " No def operand traces back to "
<< printReg(Original, MRI.getTargetRegisterInfo(), 0, &MRI)
<< " (stale VNInfo after rewriting)\n";
});
return false;
}

// The instruction at OrigVNI->def may no longer define a register
// compatible with Original. This can happen when the SplitEditor
// transfers a def to a split product, and that product is later
// rewritten in-place (e.g., by a super-register rewriter that strips
// sub-register indices, changing the register class). Check the
// instruction descriptor's operand constraint rather than the current
// virtual register, and skip if Original's class is not contained in
// the instruction's def class.
//
// However, if the def operand has a subreg index, this indicates a
// partial def of the original register, which may be valid for
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sounds like a gamble. Perhaps we can strengthen the check by checking that all subregindices connect to the same class.

// rematerialization regardless of register class constraints, depending on
// the remaining analysis.
if (!DefMO->getSubReg()) {
const TargetRegisterClass *DefRC =
TII.getRegClass(DefMI->getDesc(), DefMO->getOperandNo(),
MRI.getTargetRegisterInfo(), *DefMI->getMF());
if (DefRC && !DefRC->hasSubClassEq(MRI.getRegClass(Original)))
return false;
}

return true;
}

void LiveRangeEdit::scanRemattable() {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a table, or is this abbreviating 'rematerializable'. In both cases, the spelling is horrible.

for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
Expand All @@ -93,6 +144,10 @@ void LiveRangeEdit::scanRemattable() {
MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (!DefMI)
continue;

if (!isRematerializableDefInstr(DefMI, Original, VRM, MRI, TII))
continue;

checkRematerializable(OrigVNI, DefMI);
}
ScannedRemattable = true;
Expand Down
10 changes: 0 additions & 10 deletions llvm/lib/Target/AIE/AIESuperRegUtils.cpp
Comment thread
mludevid marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -376,16 +376,6 @@ void rewriteSuperReg(Register Reg, std::optional<Register> AssignedPhysReg,
// Step 4: Remove the original register's live interval
LIS.removeInterval(Reg);

// Step 4b: Clear stale ancestor live intervals. The operand rewrite in
// step 3 modified instructions in-place (e.g., stripping sub-register
// indices). Any ancestor register in the VRM split chain still has VNInfos
// pointing to those instruction slots. If a later Greedy pass traces back
// via VRM.getOriginal(), it would find a stale instruction and could produce
// an invalid rematerialization. Clearing the ancestor interval prevents this.
Register Original = VRM.getOriginal(Reg);
if (Original != Reg && LIS.hasInterval(Original))
LIS.getInterval(Original).clear();

// Step 5: Filter out empty subregisters
markEffectiveEmptyCopiesDead(SubRegToVReg, MRI, TRI, LIS);

Expand Down
Comment thread
mludevid marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
; RUN: llc -mtriple=aie2ps -stop-before=virtregrewriter %s -o - | FileCheck %s

; This test verifies that rematerialization validation correctly handles
; register class compatibility. Without the fix, this would crash during
; register allocation when attempting to rematerialize an instruction with
; incompatible register classes.
; CHECK: name: remat_cross_class_test

define void @remat_cross_class_test() {
entry:
br label %for.body.i

for.body.i: ; preds = %for.cond.cleanup51.i, %entry
%dims_in_L1.sroa.12.0308.i = phi i32 [ 0, %entry ], [ %11, %for.cond.cleanup51.i ]
%dims_out_L1.sroa.15.0307.i = phi i32 [ 0, %entry ], [ %8, %for.cond.cleanup51.i ]
%dims_in_L2_01.sroa.12.0303.i = phi i32 [ 0, %entry ], [ %22, %for.cond.cleanup51.i ]
%dims_out_L2_23.sroa.15.0296.i = phi i32 [ 0, %entry ], [ %26, %for.cond.cleanup51.i ]
br label %for.cond54.preheader.i

for.cond54.preheader.i: ; preds = %for.cond54.preheader.i, %for.body.i
%dims_in_L1.sroa.12.1251.i = phi i32 [ %dims_in_L1.sroa.12.0308.i, %for.body.i ], [ %11, %for.cond54.preheader.i ]
%dims_out_L1.sroa.15.1250.i = phi i32 [ %dims_out_L1.sroa.15.0307.i, %for.body.i ], [ %8, %for.cond54.preheader.i ]
%0 = trunc i32 %dims_out_L1.sroa.15.1250.i to i20
%1 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %0)
%2 = extractvalue { ptr, i20, i20 } %1, 1
%3 = trunc i32 %dims_in_L1.sroa.12.1251.i to i20
%4 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 1, i20 %3, i20 0, i20 0)
%5 = extractvalue { ptr, i20, i20 } %4, 1
%6 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %2, i20 1, i20 0)
%7 = extractvalue { ptr, i20, i20 } %6, 2
%8 = zext i20 %7 to i32
%9 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 1, i20 %5, i20 0, i20 0)
%10 = extractvalue { ptr, i20, i20 } %9, 1
%11 = zext i20 %10 to i32
%12 = call i1 @llvm.loop.decrement.i32(i32 0)
br i1 %12, label %for.cond54.preheader.i, label %for.cond.cleanup51.i

for.cond.cleanup51.i: ; preds = %for.cond54.preheader.i
%13 = trunc i32 %dims_in_L2_01.sroa.12.0303.i to i20
%14 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 1)
%15 = extractvalue { ptr, i20, i20 } %14, 1
%16 = extractvalue { ptr, i20, i20 } %14, 2
%17 = trunc i32 %dims_out_L2_23.sroa.15.0296.i to i20
%18 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %17)
%19 = extractvalue { ptr, i20, i20 } %18, 1
%20 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 %16)
%21 = extractvalue { ptr, i20, i20 } %20, 1
%22 = zext i20 %21 to i32
%23 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %19, i20 0, i20 0)
%24 = extractvalue { ptr, i20, i20 } %18, 1
%25 = extractvalue { ptr, i20, i20 } %23, 2
%26 = zext i20 %25 to i32
br label %for.body.i

; uselistorder directives
uselistorder i32 %8, { 1, 0 }
uselistorder i32 %11, { 1, 0 }
}

declare { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20)

; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
declare i1 @llvm.loop.decrement.i32(i32) #0

; uselistorder directives
uselistorder ptr @llvm.aie2ps.add.3d, { 7, 6, 5, 4, 3, 2, 1, 0 }

attributes #0 = { nocallback noduplicate nofree nosync nounwind willreturn }
Loading