diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index d7421c548a2d..182f5108f975 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Modifications (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its +// Modifications (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its // affiliates // //===----------------------------------------------------------------------===// @@ -81,6 +81,57 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, return true; } +/// Check if the defining instruction is suitable for rematerialization. +/// Returns true if the instruction can be rematerialized, false otherwise. +static bool isRematerializableDefInstr(const MachineInstr *DefMI, + Register Original, const VirtRegMap *VRM, + const MachineRegisterInfo &MRI, + const TargetInstrInfo &TII) { + // Find the operand that defines a register with Original as its ancestor + const MachineOperand *DefMO = nullptr; + for (const MachineOperand &MO : DefMI->defs()) { + if (MO.isReg() && MO.getReg().isVirtual()) { + // Check if this def's original register is our Original + if (VRM->getOriginal(MO.getReg()) == Original) { + DefMO = &MO; + break; + } + } + } + + if (!DefMO) { + LLVM_DEBUG({ + dbgs() << " No def operand traces back to " + << printReg(Original, MRI.getTargetRegisterInfo(), 0, &MRI) + << " (stale VNInfo after rewriting)\n"; + }); + return false; + } + + // The instruction at OrigVNI->def may no longer define a register + // compatible with Original. This can happen when the SplitEditor + // transfers a def to a split product, and that product is later + // rewritten in-place (e.g., by a super-register rewriter that strips + // sub-register indices, changing the register class). Check the + // instruction descriptor's operand constraint rather than the current + // virtual register, and skip if Original's class is not contained in + // the instruction's def class. + // + // However, if the def operand has a subreg index, this indicates a + // partial def of the original register, which may be valid for + // rematerialization regardless of register class constraints, depending on + // the remaining analysis. + if (!DefMO->getSubReg()) { + const TargetRegisterClass *DefRC = + TII.getRegClass(DefMI->getDesc(), DefMO->getOperandNo(), + MRI.getTargetRegisterInfo(), *DefMI->getMF()); + if (DefRC && !DefRC->hasSubClassEq(MRI.getRegClass(Original))) + return false; + } + + return true; +} + void LiveRangeEdit::scanRemattable() { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) @@ -93,6 +144,10 @@ void LiveRangeEdit::scanRemattable() { MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; + + if (!isRematerializableDefInstr(DefMI, Original, VRM, MRI, TII)) + continue; + checkRematerializable(OrigVNI, DefMI); } ScannedRemattable = true; diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp index 0d9269ce0694..acdf5995ab24 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp @@ -376,16 +376,6 @@ void rewriteSuperReg(Register Reg, std::optional AssignedPhysReg, // Step 4: Remove the original register's live interval LIS.removeInterval(Reg); - // Step 4b: Clear stale ancestor live intervals. The operand rewrite in - // step 3 modified instructions in-place (e.g., stripping sub-register - // indices). Any ancestor register in the VRM split chain still has VNInfos - // pointing to those instruction slots. If a later Greedy pass traces back - // via VRM.getOriginal(), it would find a stale instruction and could produce - // an invalid rematerialization. Clearing the ancestor interval prevents this. - Register Original = VRM.getOriginal(Reg); - if (Original != Reg && LIS.hasInterval(Original)) - LIS.getInterval(Original).clear(); - // Step 5: Filter out empty subregisters markEffectiveEmptyCopiesDead(SubRegToVReg, MRI, TRI, LIS); diff --git a/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll b/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll new file mode 100644 index 000000000000..6a31cf816c13 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll @@ -0,0 +1,73 @@ +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -mtriple=aie2ps -stop-before=virtregrewriter %s -o - | FileCheck %s + +; This test verifies that rematerialization validation correctly handles +; register class compatibility. Without the fix, this would crash during +; register allocation when attempting to rematerialize an instruction with +; incompatible register classes. +; CHECK: name: remat_cross_class_test + +define void @remat_cross_class_test() { +entry: + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup51.i, %entry + %dims_in_L1.sroa.12.0308.i = phi i32 [ 0, %entry ], [ %11, %for.cond.cleanup51.i ] + %dims_out_L1.sroa.15.0307.i = phi i32 [ 0, %entry ], [ %8, %for.cond.cleanup51.i ] + %dims_in_L2_01.sroa.12.0303.i = phi i32 [ 0, %entry ], [ %22, %for.cond.cleanup51.i ] + %dims_out_L2_23.sroa.15.0296.i = phi i32 [ 0, %entry ], [ %26, %for.cond.cleanup51.i ] + br label %for.cond54.preheader.i + +for.cond54.preheader.i: ; preds = %for.cond54.preheader.i, %for.body.i + %dims_in_L1.sroa.12.1251.i = phi i32 [ %dims_in_L1.sroa.12.0308.i, %for.body.i ], [ %11, %for.cond54.preheader.i ] + %dims_out_L1.sroa.15.1250.i = phi i32 [ %dims_out_L1.sroa.15.0307.i, %for.body.i ], [ %8, %for.cond54.preheader.i ] + %0 = trunc i32 %dims_out_L1.sroa.15.1250.i to i20 + %1 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %0) + %2 = extractvalue { ptr, i20, i20 } %1, 1 + %3 = trunc i32 %dims_in_L1.sroa.12.1251.i to i20 + %4 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 1, i20 %3, i20 0, i20 0) + %5 = extractvalue { ptr, i20, i20 } %4, 1 + %6 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %2, i20 1, i20 0) + %7 = extractvalue { ptr, i20, i20 } %6, 2 + %8 = zext i20 %7 to i32 + %9 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 1, i20 %5, i20 0, i20 0) + %10 = extractvalue { ptr, i20, i20 } %9, 1 + %11 = zext i20 %10 to i32 + %12 = call i1 @llvm.loop.decrement.i32(i32 0) + br i1 %12, label %for.cond54.preheader.i, label %for.cond.cleanup51.i + +for.cond.cleanup51.i: ; preds = %for.cond54.preheader.i + %13 = trunc i32 %dims_in_L2_01.sroa.12.0303.i to i20 + %14 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 1) + %15 = extractvalue { ptr, i20, i20 } %14, 1 + %16 = extractvalue { ptr, i20, i20 } %14, 2 + %17 = trunc i32 %dims_out_L2_23.sroa.15.0296.i to i20 + %18 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %17) + %19 = extractvalue { ptr, i20, i20 } %18, 1 + %20 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 %16) + %21 = extractvalue { ptr, i20, i20 } %20, 1 + %22 = zext i20 %21 to i32 + %23 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %19, i20 0, i20 0) + %24 = extractvalue { ptr, i20, i20 } %18, 1 + %25 = extractvalue { ptr, i20, i20 } %23, 2 + %26 = zext i20 %25 to i32 + br label %for.body.i + +; uselistorder directives + uselistorder i32 %8, { 1, 0 } + uselistorder i32 %11, { 1, 0 } +} + +declare { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) + +; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn +declare i1 @llvm.loop.decrement.i32(i32) #0 + +; uselistorder directives +uselistorder ptr @llvm.aie2ps.add.3d, { 7, 6, 5, 4, 3, 2, 1, 0 } + +attributes #0 = { nocallback noduplicate nofree nosync nounwind willreturn }