Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,9 @@ void AIE2PSInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
regClassMatches(AIE2PS::spill_eS_to_eRRegClass, RC, SrcReg)) {
// Can't spill these directly. Need to bounce through a GPR.
return bounceViaRegClass(&AIE2PS::eRRegClass);
} else if (regClassMatches(AIE2PS::spill_vec512_to_compositeRegClass, RC,
SrcReg)) {
Opcode = AIE2PS::VST_512_COMPOSED_REG_SPILL;
} else {
LLVM_DEBUG(I->dump());
llvm_unreachable("Can't store this register to stack slot: is it virtual?");
Expand Down Expand Up @@ -1080,6 +1083,9 @@ void AIE2PSInstrInfo::loadRegFromStackSlot(
regClassMatches(AIE2PS::spill_eS_to_eRRegClass, RC, DstReg)) {
// Can't spill these directly. Need to bounce through a GPR.
return bounceViaRegClass(&AIE2PS::eRRegClass);
} else if (regClassMatches(AIE2PS::spill_vec512_to_compositeRegClass, RC,
DstReg)) {
Opcode = AIE2PS::VLDA_512_COMPOSED_REG_SPILL;
} else {
LLVM_DEBUG(I->dump());
llvm_unreachable(
Expand Down Expand Up @@ -1545,6 +1551,38 @@ bool AIE2PSInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
return true;
}
case AIE2PS::VLDA_512_COMPOSED_REG_SPILL: {
unsigned int Opcode;
const Register Dst = MI.getOperand(0).getReg();
if (AIE2PS::VEC512RegClass.contains(Dst)) {
Opcode = AIE2PS::VLDA_dmx_lda_x_spill;
} else if (AIE2PS::ACC512RegClass.contains(Dst)) {
Opcode = AIE2PS::VLDA_dmx_lda_bm_spill;
} else {
// FIFO512 is part of the composite RC for parity with AIE2P, but
// AIE2PS has no native FIFO spill opcode. The allocator should not
// assign a FIFO physreg to a composite-class vreg in practice; if
// it does, fail loudly so the assumption can be revisited.
report_fatal_error("VLDA_512_COMPOSED_REG_SPILL: no native AIE2PS "
"spill opcode for non-VEC/non-ACC physreg");
}
MI.setDesc(get(Opcode));
return false;
}
case AIE2PS::VST_512_COMPOSED_REG_SPILL: {
unsigned int Opcode;
const Register Src = MI.getOperand(0).getReg();
if (AIE2PS::VEC512RegClass.contains(Src)) {
Opcode = AIE2PS::VST_dmx_sts_x_spill;
} else if (AIE2PS::ACC512RegClass.contains(Src)) {
Opcode = AIE2PS::VST_dmx_sts_bm_spill;
} else {
report_fatal_error("VST_512_COMPOSED_REG_SPILL: no native AIE2PS "
"spill opcode for non-VEC/non-ACC physreg");
}
MI.setDesc(get(Opcode));
return false;
}
}
return false;
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/aie2ps/AIE2PSInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def VST_Y_SPILL : Pseudo<(outs ), (ins VEC1024:$src, c17n_step128:$imm), "vst_y_
def VST_BM_SPILL : Pseudo<(outs ), (ins ACC512:$src, c16n_step64:$imm), "vst_bm_spill", "$src, [sp, $imm]">;
def VST_CM_SPILL : Pseudo<(outs ), (ins ACC1024:$src, c17n_step128:$imm), "vst_cm_spill", "$src, [sp, $imm]">;
def VST_DM_SPILL : Pseudo<(outs ), (ins ACC2048:$src, c17n_step128:$imm), "vst_dm_spill", "$src, [sp, $imm]">;
def VST_512_COMPOSED_REG_SPILL : Pseudo<(outs ), (ins spill_vec512_to_composite:$src, c16n_step64:$imm), "vst_512_composed_reg_spill", "${src}, [sp, $imm]">;

def VST_E_SPILL : Pseudo<(outs ), (ins mEs:$src, c12n_step4:$imm), "vst_e_spill", "$src, [sp, $imm]">;
def VST_EE_SPILL : Pseudo<(outs ), (ins mEEs:$src, c13n_step8:$imm), "vst_ee_spill", "$src, [sp, $imm]">;
Expand Down Expand Up @@ -193,6 +194,7 @@ def VLDA_Y_SPILL : Pseudo<(outs VEC1024:$dst), (ins c17n_step128:$imm), "vlda_y_
def VLDA_BM_SPILL : Pseudo<(outs ACC512:$dst), (ins c16n_step64:$imm), "vlda_bm_spill", "${dst}, [sp, $imm]">;
def VLDA_CM_SPILL : Pseudo<(outs ACC1024:$dst), (ins c17n_step128:$imm), "vlda_cm_spill", "${dst}, [sp, $imm]">;
def VLDA_DM_SPILL : Pseudo<(outs ACC2048:$dst), (ins c17n_step128:$imm), "vlda_dm_spill", "${dst}, [sp, $imm]">;
def VLDA_512_COMPOSED_REG_SPILL : Pseudo<(outs spill_vec512_to_composite:$dst), (ins c16n_step64:$imm), "vlda_512_composed_reg_spill", "${dst}, [sp, $imm]">;

def VLDA_E_SPILL : Pseudo<(outs mEs:$dst), (ins c12n_step4:$imm), "vlda_e_spill", "$dst, [sp, $imm]">;
def VLDA_EE_SPILL : Pseudo<(outs mEEs:$dst), (ins c13n_step8:$imm), "vlda_ee_spill", "$dst, [sp, $imm]">;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AIE/aie2ps/AIE2PSRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ bool AIE2PSRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case AIE2PS::VST_dmw_sts_w_spill:
case AIE2PS::VST_dmx_sts_bm_spill:
case AIE2PS::VST_dmx_sts_x_spill:
case AIE2PS::VLDA_512_COMPOSED_REG_SPILL:
case AIE2PS::VST_512_COMPOSED_REG_SPILL:
MI.getOperand(FIOperandNum).ChangeToImmediate(Offset);
return false;
case AIE2PS::LDA_R_SPILL:
Expand Down Expand Up @@ -395,6 +397,10 @@ AIE2PSRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
return &AIE2PS::spill_eDC_to_eRRegClass;
if (AIE2PS::eSRegClass.hasSubClassEq(RC))
return &AIE2PS::spill_eS_to_eRRegClass;
if (RC == &AIE2PS::ACC512RegClass || RC == &AIE2PS::VEC512RegClass)
// using hasSubClassEq leads to register coalescer changes (spill_vec512
// will be used more frequently) and thus change machine scheduling
return &AIE2PS::spill_vec512_to_compositeRegClass;

return RC;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/aie2ps/AIE2PSRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,10 @@ def spill_eDN_to_eR : AIE2PSScalarRegisterClass<(add eDN, eR)>;
def spill_eDJ_to_eR : AIE2PSScalarRegisterClass<(add eDJ, eR, eDN)>;
def spill_eDC_to_eR : AIE2PSScalarRegisterClass<(add eDC, eR)>;

def spill_vec512_to_composite : AIE2PSVector512RegisterClass<(add mXm, mBMm, lfh0, lfh1, lfl0, lfl1, sfl, sfh, lfe)> {
let ConsiderInPreRAScheduling = false;
}

class AIE2PVector1076FifoRegisterClass<dag reglist> :
AIE2PSRegisterClass<1088, 512, [i32], reglist>;
def sub_fifo : SubRegIndex<1024, 0>;
Expand Down
50 changes: 50 additions & 0 deletions llvm/test/CodeGen/AIE/aie2ps/ra/spill-vec-acc-postra-expand.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates

# RUN: llc -mtriple=aie2ps -verify-machineinstrs \
# RUN: -run-pass=prologepilog -run-pass=postrapseudos %s -o - \
# RUN: | FileCheck %s

# The combined spill_vec512_to_composite register class lets the
# allocator place a 512-bit value in either an X (VEC512) or a BM
# (ACC512) physreg, sharing one stack slot for either bank. After
# allocation, eliminateFrameIndex resolves the FI to an SP-relative
# immediate and expandPostRAPseudo swaps the composite pseudo for the
# native opcode that matches the actual physical register chosen by
# the allocator. This test exercises both branches of that swap.

---
name: composite_spill_lowers_to_x_native
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 64, alignment: 64 }
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: composite_spill_lowers_to_x_native
; CHECK: VST_dmx_sts_x_spill renamable $x0,
; CHECK: renamable $x0 = VLDA_dmx_lda_x_spill
VST_512_COMPOSED_REG_SPILL renamable $x0, %stack.0, implicit $sp :: (store (s512) into %stack.0)
renamable $x0 = VLDA_512_COMPOSED_REG_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0)
PseudoRET implicit $lr, implicit killed $x0
...

---
name: composite_spill_lowers_to_bm_native
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 64, alignment: 64 }
body: |
bb.0:
liveins: $bmll0
; CHECK-LABEL: name: composite_spill_lowers_to_bm_native
; CHECK: VST_dmx_sts_bm_spill renamable $bmll0,
; CHECK: renamable $bmll0 = VLDA_dmx_lda_bm_spill
VST_512_COMPOSED_REG_SPILL renamable $bmll0, %stack.0, implicit $sp :: (store (s512) into %stack.0)
renamable $bmll0 = VLDA_512_COMPOSED_REG_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0)
PseudoRET implicit $lr, implicit killed $bmll0
...
125 changes: 125 additions & 0 deletions llvm/test/CodeGen/AIE/aie2ps/ra/spill-vec-acc.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates

# RUN: llc -mtriple=aie2ps -verify-machineinstrs --start-before=greedy --stop-after=virtregrewriter %s -o - | FileCheck %s

# Register-pressure scenario where an ACC1024 value is built in BB0 and
# consumed across a tight self-loop in BB1, while many VEC512 broadcasts
# are also live across the loop edge. Without a combined acc/vec spill
# class, the allocator must spill at least one of these 512-bit values
# to the stack. The aie2p target solves this with the
# spill_vec512_to_composite class; this test mirrors that scenario for
# aie2ps so that enabling the same combined class can be measured by a
# CHECK-line diff (memory spill -> cross-bank allocation).

---
name: spill-vec-acc
alignment: 16
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: spill-vec-acc
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $r16 = MOV_RLC_imm11_pseudo 0
; CHECK-NEXT: renamable $x5 = VBCST_32 renamable $r16
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19312
; CHECK-NEXT: renamable $x0 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $bmll2 = COPY killed renamable $x0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19360
; CHECK-NEXT: renamable $x2 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg -19424
; CHECK-NEXT: renamable $x4 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 32704
; CHECK-NEXT: renamable $x6 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 16256
; CHECK-NEXT: renamable $x8 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46208
; CHECK-NEXT: renamable $x10 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46144
; CHECK-NEXT: renamable $x1 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 46080
; CHECK-NEXT: renamable $x3 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOVXM_lng_cg 45952
; CHECK-NEXT: renamable $bmll0 = COPY killed renamable $x5
; CHECK-NEXT: renamable $x5 = VBCST_16 killed renamable $r0
; CHECK-NEXT: renamable $r0 = MOV_RLC_imm11_pseudo 1
; CHECK-NEXT: renamable $x7 = VBCST_16 renamable $r16
; CHECK-NEXT: renamable $bmlh0 = COPY renamable $bmll0
; CHECK-NEXT: renamable $x9 = VBCST_16 killed renamable $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $bmll2, $cml0:0x000000000000000C, $r16, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x11 = VCONV_bf16_fp32_mv_conv_mv_x_srs_bf16 renamable $cml0, implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crfpconvsat, implicit $crrnd
; CHECK-NEXT: renamable $x0 = COPY renamable $bmll2
; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, killed renamable $x0, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $x0 = VSEL_16 renamable $x7, renamable $x10, killed renamable $r18, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $r18 = VGE_bf16 renamable $x11, renamable $x2, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $r20 = VGE_bf16 renamable $x11, renamable $x4, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $r22 = VGE_bf16 renamable $x11, renamable $x6, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $r24 = VGE_bf16 killed renamable $x11, renamable $x8, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x1, killed renamable $r18, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x3, killed renamable $r20, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x5, killed renamable $r22, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x7, killed renamable $r24, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $x0 = VSEL_16 killed renamable $x0, renamable $x9, renamable $r16, implicit $crbf8conf, implicit $crfp8conf
; CHECK-NEXT: renamable $r8 = MOV_RLC_imm11_pseudo 0
; CHECK-NEXT: dead $dm1 = VMUL_f_vmul_bf_vmul_bf_core_X_X $x7, killed $x0, killed $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
bb.0:
successors: %bb.1(0x80000000)

%36:mrs16m = MOV_RLC_imm11_pseudo 0
%40:vec512 = VBCST_32 %36
%58:mrm = MOVXM_lng_cg -19312
%4:vec512 = VBCST_16 %58
%57:mrm = MOVXM_lng_cg -19360
%7:vec512 = VBCST_16 %57
%56:mrm = MOVXM_lng_cg -19424
%10:vec512 = VBCST_16 %56
%55:mrm = MOVXM_lng_cg 32704
%13:vec512 = VBCST_16 %55
%54:mrm = MOVXM_lng_cg 16256
%16:vec512 = VBCST_16 %54
%47:mrm = MOVXM_lng_cg 46208
%21:vec512 = VBCST_16 %47
%48:mrm = MOVXM_lng_cg 46144
%24:vec512 = VBCST_16 %48
%49:mrm = MOVXM_lng_cg 46080
%27:vec512 = VBCST_16 %49
%50:mrm = MOVXM_lng_cg 45952
undef %1.sub_512_acc_lo:acc1024 = COPY %40
%30:vec512 = VBCST_16 %50
%51:mrm = MOV_RLC_imm11_pseudo 1
%19:vec512 = VBCST_16 %36
%1.sub_512_acc_hi:acc1024 = COPY %1.sub_512_acc_lo
%34:vec512 = VBCST_16 %51

bb.1:
successors: %bb.1(0x80000000)

%0:vec512 = VCONV_bf16_fp32_mv_conv_mv_x_srs_bf16 %1, implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crfpconvsat, implicit $crrnd
%3:mrs16m = VGE_bf16 %0, %4, implicit $crbf8conf, implicit $crfp8conf
%18:vec512 = VSEL_16 %19, %21, %3, implicit $crbf8conf, implicit $crfp8conf
%6:mrs16m = VGE_bf16 %0, %7, implicit $crbf8conf, implicit $crfp8conf
%9:mrs16m = VGE_bf16 %0, %10, implicit $crbf8conf, implicit $crfp8conf
%12:mrs16m = VGE_bf16 %0, %13, implicit $crbf8conf, implicit $crfp8conf
%15:mrs16m = VGE_bf16 %0, %16, implicit $crbf8conf, implicit $crfp8conf
%23:vec512 = VSEL_16 %18, %24, %6, implicit $crbf8conf, implicit $crfp8conf
%26:vec512 = VSEL_16 %23, %27, %9, implicit $crbf8conf, implicit $crfp8conf
%29:vec512 = VSEL_16 %26, %30, %12, implicit $crbf8conf, implicit $crfp8conf
%32:vec512 = VSEL_16 %29, %19, %15, implicit $crbf8conf, implicit $crfp8conf
%33:vec512 = VSEL_16 %32, %34, %36, implicit $crbf8conf, implicit $crfp8conf
%62:mrv8 = MOV_RLC_imm11_pseudo 0
dead %61:edm = VMUL_f_vmul_bf_vmul_bf_core_X_X %19, %33, %62, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask
PseudoJ_jump_imm %bb.1

...
Loading