From 9bb32875d7f3b9e50c70e12f6cce3d63d02de0f7 Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Thu, 26 Mar 2026 05:25:56 -0600 Subject: [PATCH 1/3] [AIEX][AIE2PS] Add base test with a cross regclass remat trial --- .../liverangeedit-avoid-cross-class-remat.ll | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll diff --git a/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll b/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll new file mode 100644 index 000000000000..ed4b7ea964fb --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll @@ -0,0 +1,71 @@ +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates +; RUN: not --crash llc -mtriple=aie2ps -stop-before=virtregrewriter %s -o - 2>&1 | FileCheck %s + +; Without the fix, this test crashes during register allocation when attempting +; to rematerialize an instruction with incompatible register classes. +; CHECK: Running pass 'Greedy Register Allocator' + +define void @remat_cross_class_test() { +entry: + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup51.i, %entry + %dims_in_L1.sroa.12.0308.i = phi i32 [ 0, %entry ], [ %11, %for.cond.cleanup51.i ] + %dims_out_L1.sroa.15.0307.i = phi i32 [ 0, %entry ], [ %8, %for.cond.cleanup51.i ] + %dims_in_L2_01.sroa.12.0303.i = phi i32 [ 0, %entry ], [ %22, %for.cond.cleanup51.i ] + %dims_out_L2_23.sroa.15.0296.i = phi i32 [ 0, %entry ], [ %26, %for.cond.cleanup51.i ] + br label %for.cond54.preheader.i + +for.cond54.preheader.i: ; preds = %for.cond54.preheader.i, %for.body.i + %dims_in_L1.sroa.12.1251.i = phi i32 [ %dims_in_L1.sroa.12.0308.i, %for.body.i ], [ %11, %for.cond54.preheader.i ] + %dims_out_L1.sroa.15.1250.i = phi i32 [ %dims_out_L1.sroa.15.0307.i, %for.body.i ], [ %8, %for.cond54.preheader.i ] + %0 = trunc i32 %dims_out_L1.sroa.15.1250.i to i20 + %1 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %0) + %2 = extractvalue { ptr, i20, i20 } %1, 1 + %3 = trunc i32 %dims_in_L1.sroa.12.1251.i to i20 + %4 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 1, i20 %3, i20 0, i20 0) + %5 = extractvalue { ptr, i20, i20 } %4, 1 + %6 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %2, i20 1, i20 0) + %7 = extractvalue { ptr, i20, i20 } %6, 2 + %8 = zext i20 %7 to i32 + %9 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 1, i20 %5, i20 0, i20 0) + %10 = extractvalue { ptr, i20, i20 } %9, 1 + %11 = zext i20 %10 to i32 + %12 = call i1 @llvm.loop.decrement.i32(i32 0) + br i1 %12, label %for.cond54.preheader.i, label %for.cond.cleanup51.i + +for.cond.cleanup51.i: ; preds = %for.cond54.preheader.i + %13 = trunc i32 %dims_in_L2_01.sroa.12.0303.i to i20 + %14 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 1) + %15 = extractvalue { ptr, i20, i20 } %14, 1 + %16 = extractvalue { ptr, i20, i20 } %14, 2 + %17 = trunc i32 %dims_out_L2_23.sroa.15.0296.i to i20 + %18 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %17) + %19 = extractvalue { ptr, i20, i20 } %18, 1 + %20 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 %16) + %21 = extractvalue { ptr, i20, i20 } %20, 1 + %22 = zext i20 %21 to i32 + %23 = tail call { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %19, i20 0, i20 0) + %24 = extractvalue { ptr, i20, i20 } %18, 1 + %25 = extractvalue { ptr, i20, i20 } %23, 2 + %26 = zext i20 %25 to i32 + br label %for.body.i + +; uselistorder directives + uselistorder i32 %8, { 1, 0 } + uselistorder i32 %11, { 1, 0 } +} + +declare { ptr, i20, i20 } @llvm.aie2ps.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) + +; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn +declare i1 @llvm.loop.decrement.i32(i32) #0 + +; uselistorder directives +uselistorder ptr @llvm.aie2ps.add.3d, { 7, 6, 5, 4, 3, 2, 1, 0 } + +attributes #0 = { nocallback noduplicate nofree nosync nounwind willreturn } From 2ae96566dc99bcd43293074b50db945b998a098e Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Thu, 26 Mar 2026 05:18:21 -0600 Subject: [PATCH 2/3] [AIEX] Manually revert "Clear stale ancestor live intervals after super-reg rewrite" We are not reverting the original commit because it would remove one important lit test. --- llvm/lib/Target/AIE/AIESuperRegUtils.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp index 0d9269ce0694..acdf5995ab24 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp @@ -376,16 +376,6 @@ void rewriteSuperReg(Register Reg, std::optional AssignedPhysReg, // Step 4: Remove the original register's live interval LIS.removeInterval(Reg); - // Step 4b: Clear stale ancestor live intervals. The operand rewrite in - // step 3 modified instructions in-place (e.g., stripping sub-register - // indices). Any ancestor register in the VRM split chain still has VNInfos - // pointing to those instruction slots. If a later Greedy pass traces back - // via VRM.getOriginal(), it would find a stale instruction and could produce - // an invalid rematerialization. Clearing the ancestor interval prevents this. - Register Original = VRM.getOriginal(Reg); - if (Original != Reg && LIS.hasInterval(Original)) - LIS.getInterval(Original).clear(); - // Step 5: Filter out empty subregisters markEffectiveEmptyCopiesDead(SubRegToVReg, MRI, TRI, LIS); From 048fa16fd01f07b1bb9c58b70e0141d7b8dc26a0 Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Thu, 26 Mar 2026 05:54:41 -0600 Subject: [PATCH 3/3] [LiveRangeEdit] Improve rematerialization validation for register class compatibility During register allocation, instructions may be rewritten in ways that change the register class of their operands. For example, the SplitEditor may transfer a def to a split product, and target-specific passes (like super-register rewriters) may later modify the instruction, changing register classes. This patch adds validation to scanRemattable() to prevent rematerialization when the defining instruction's register class requirements are incompatible with the target register: 1. Finds the correct def operand by tracing through VirtRegMap to identify which operand corresponds to the original register being rematerialized 2. Validates register class compatibility: checks that the instruction's required register class for the def operand is compatible with the original register's class 3. Handles subreg defs correctly: when a def operand has a subreg index (indicating a partial register definition), register class validation is skipped since the partial def's class may legitimately differ from the full register's class 4. Refactors validation logic into isRematerializableDefInstr() helper function for better code organization This prevents machine verifier failures and incorrect code generation on targets with complex register hierarchies like AIE, while maintaining correct behavior for subreg rematerialization on all targets. --- llvm/lib/CodeGen/LiveRangeEdit.cpp | 57 ++++++++++++++++++- .../liverangeedit-avoid-cross-class-remat.ll | 10 ++-- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index d7421c548a2d..182f5108f975 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Modifications (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its +// Modifications (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its // affiliates // //===----------------------------------------------------------------------===// @@ -81,6 +81,57 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, return true; } +/// Check if the defining instruction is suitable for rematerialization. +/// Returns true if the instruction can be rematerialized, false otherwise. +static bool isRematerializableDefInstr(const MachineInstr *DefMI, + Register Original, const VirtRegMap *VRM, + const MachineRegisterInfo &MRI, + const TargetInstrInfo &TII) { + // Find the operand that defines a register with Original as its ancestor + const MachineOperand *DefMO = nullptr; + for (const MachineOperand &MO : DefMI->defs()) { + if (MO.isReg() && MO.getReg().isVirtual()) { + // Check if this def's original register is our Original + if (VRM->getOriginal(MO.getReg()) == Original) { + DefMO = &MO; + break; + } + } + } + + if (!DefMO) { + LLVM_DEBUG({ + dbgs() << " No def operand traces back to " + << printReg(Original, MRI.getTargetRegisterInfo(), 0, &MRI) + << " (stale VNInfo after rewriting)\n"; + }); + return false; + } + + // The instruction at OrigVNI->def may no longer define a register + // compatible with Original. This can happen when the SplitEditor + // transfers a def to a split product, and that product is later + // rewritten in-place (e.g., by a super-register rewriter that strips + // sub-register indices, changing the register class). Check the + // instruction descriptor's operand constraint rather than the current + // virtual register, and skip if Original's class is not contained in + // the instruction's def class. + // + // However, if the def operand has a subreg index, this indicates a + // partial def of the original register, which may be valid for + // rematerialization regardless of register class constraints, depending on + // the remaining analysis. + if (!DefMO->getSubReg()) { + const TargetRegisterClass *DefRC = + TII.getRegClass(DefMI->getDesc(), DefMO->getOperandNo(), + MRI.getTargetRegisterInfo(), *DefMI->getMF()); + if (DefRC && !DefRC->hasSubClassEq(MRI.getRegClass(Original))) + return false; + } + + return true; +} + void LiveRangeEdit::scanRemattable() { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) @@ -93,6 +144,10 @@ void LiveRangeEdit::scanRemattable() { MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; + + if (!isRematerializableDefInstr(DefMI, Original, VRM, MRI, TII)) + continue; + checkRematerializable(OrigVNI, DefMI); } ScannedRemattable = true; diff --git a/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll b/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll index ed4b7ea964fb..6a31cf816c13 100644 --- a/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll +++ b/llvm/test/CodeGen/AIE/aie2ps/ra/liverangeedit-avoid-cross-class-remat.ll @@ -3,11 +3,13 @@ ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ; ; (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates -; RUN: not --crash llc -mtriple=aie2ps -stop-before=virtregrewriter %s -o - 2>&1 | FileCheck %s +; RUN: llc -mtriple=aie2ps -stop-before=virtregrewriter %s -o - | FileCheck %s -; Without the fix, this test crashes during register allocation when attempting -; to rematerialize an instruction with incompatible register classes. -; CHECK: Running pass 'Greedy Register Allocator' +; This test verifies that rematerialization validation correctly handles +; register class compatibility. Without the fix, this would crash during +; register allocation when attempting to rematerialize an instruction with +; incompatible register classes. +; CHECK: name: remat_cross_class_test define void @remat_cross_class_test() { entry: