Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/aie2ps/AIE2PSCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def CC_AIE2PS : CallingConv<[
]>;

def CSR_AIE2PS
: CalleeSavedRegs<(add lr, r8, r9, r10, r11, r12, r13, r14, r15, p6, p7)>;
: CalleeSavedRegs<(add lr, l4, l5, l6, l7, r8, r9, r10, r11, r12, r13, r14, r15, p6, p7)>;

def CSR_AIE2PS_Vec
: CalleeSavedRegs<(add lr, r8, r9, r10, r11, r12, r13, r14, r15, p6, p7,
Expand Down
82 changes: 77 additions & 5 deletions llvm/lib/Target/AIE/aie2ps/AIE2PSFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,85 @@ void AIE2PSFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
Register FPReg = STI.getRegisterInfo()->getFrameRegister(MF);
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineFrameInfo &MFI = MF.getFrameInfo();

// If there is a frame pointer (dynamic stack allocation), p7 will be used as
// a frame pointer. The register allocator will not be able to see the
// When both L registers and their sub-GPRs are in the CSR list, we need to
// decide whether to save as L register or individual GPRs.
//
// Strategy:
// - If only one GPR of the pair is used: save just that GPR
// - If both GPRs are used AND function has calls: use L register save
// (stack spill is required, 1 L spill is more efficient than 2 GPR spills)
// - If both GPRs are used AND no calls: use individual GPR saves
// (allows GPR-to-GPR spilling via scratch registers)
// Build the list of callee-saved L registers from the callee-saved regs
// provided by CSR list.
SmallVector<MCPhysReg, 4> CalleeSavedLRegs;
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned I = 0; CSRegs[I]; ++I) {
MCPhysReg Reg = CSRegs[I];
if (AIE2PS::eLRegClass.contains(Reg))
CalleeSavedLRegs.push_back(Reg);
}

for (MCPhysReg LReg : CalleeSavedLRegs) {
// Get the two GPR subregisters of this L register
MCPhysReg EvenGPR = TRI->getSubReg(LReg, AIE2PS::sub_l_even);
MCPhysReg OddGPR = TRI->getSubReg(LReg, AIE2PS::sub_l_odd);

// Check what's marked for saving by the base determineCalleeSaves.
// This already reflects which registers are actually clobbered.
const bool LRegMarked = SavedRegs.test(LReg);
const bool EvenMarked = SavedRegs.test(EvenGPR);
const bool OddMarked = SavedRegs.test(OddGPR);

if (!LRegMarked && !EvenMarked && !OddMarked)
continue;

Comment thread
khallouh marked this conversation as resolved.
SavedRegs.reset(EvenGPR);
SavedRegs.reset(OddGPR);
SavedRegs.reset(LReg);

assert((!(EvenMarked || OddMarked) || LRegMarked) &&
"sub-reg mark without L pair mark violates invariant");

// Determine if both subregisters actually need saving.
// LRegMarked alone doesn't mean both - check individual GPR marks.
const bool BothNeeded =
(EvenMarked && OddMarked) || (LRegMarked && !EvenMarked && !OddMarked);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have both L and R at the same time? Can we assert?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check !EvenMarked && !OddMarked is meant for the case where only one GPR is touched, since we have its super-register in the CSR list it will also be marked by the generic determineCalleeSaves for saving, but we don't want to save the entire L just the gpr. This is then handled in the else case where we reset the L and mark the single GPR that is actually needed.

    } else {
      // Only one GPR needs saving - clear L and keep only the needed GPR.
      SavedRegs.reset(LReg);
      if (EvenMarked || (LRegMarked && !OddMarked))
        SavedRegs.set(EvenGPR);
      else
        SavedRegs.reset(EvenGPR);
      if (OddMarked || (LRegMarked && !EvenMarked))
        SavedRegs.set(OddGPR);
      else
        SavedRegs.reset(OddGPR);
    }

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It means, there could be cases L register and corresponding R register marked at the same time, right?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly, but only one of them. This is a big limitation of the generic determineCalleeSaves as it will happily mark all super registers for saving even if only of their subregisters is actually used. So far this has not been an issue as our CSR list only contained atomic registers.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: You could add an assert assert((!EvenMarked || LRegMarked) && (!OddMarked || LRegMarked) && "sub-reg mark without L pair mark violates invariant");


// When there is calls we mark the L register so that we get a single
// spill instead of 2. When there are no calls, we prefer marking the
// subregisters since they can be copied to non CSR registers instead of
// spilled to memory (There is no move instruction between L registers).
// For the call case we have no choice but to spill anyway since we don't
// know which registers the callee is going to use.
if (BothNeeded) {
// Both subregisters need saving.
if (MFI.hasCalls()) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don`t fully understand why we have to do special handling with a call in the function, since we are working on callee saved registers. Would be nice to include a comment for future refernce.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed offline, when there is calls we mark the L register so that we get a single spill instead of 2. When there are no calls, we prefer marking the subregsiters since they can be copied to non CSR registers instead of spilled to memory (There is no move instruction between L registers). For the call case we have no choice but to spill anyway since we don't know which registers the callee is going to use. I'll add this as a comment.

// Use L register save. Stack spill is required (scratch regs
// clobbered by calls), so 1 L spill is more efficient than 2 GPR
// spills.
SavedRegs.set(LReg);
Comment thread
khallouh marked this conversation as resolved.
} else {
// No calls: use individual GPRs for GPR-to-GPR copy.
SavedRegs.set(EvenGPR);
SavedRegs.set(OddGPR);
}
} else if (EvenMarked) {
SavedRegs.set(EvenGPR);
} else {
assert(OddMarked);
SavedRegs.set(OddGPR);
}
}
// If there is a frame pointer (dynamic stack allocation), p7 will be used
// as a frame pointer. The register allocator will not be able to see the
// redefinition of p7 as the prologue and the epilogue are emitted after the
// register allocation. Thus, we make sure to spill p7 at the beginning of the
// function body and restore it at the end by adding it in SavedRegs.
// register allocation. Thus, we make sure to spill p7 at the beginning of
// the function body and restore it at the end by adding it in SavedRegs.
const Register FPReg = TRI->getFrameRegister(MF);
if (hasFP(MF))
SavedRegs.set(FPReg);
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AIE/aie2ps/AIE2PSRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class AIE2PSScalarRegisterClass<dag reglist, RegAltNameIndex idx = NoRegAltName>
idx>;

class AIE2PSScalar64RegisterClass<dag reglist, RegAltNameIndex idx = NoRegAltName>
: AIE2PSRegisterClass<64, 32, [i64, v8i8, v4i16, v4bf16, v4f16,v2i32, v2f32, v64i1], reglist,
: AIE2PSRegisterClass<64, 64, [i64, v8i8, v4i16, v4bf16, v4f16,v2i32, v2f32, v64i1], reglist,
idx>;

class AIE2PSVector128RegisterClass<dag reglist> :
Expand Down
32 changes: 14 additions & 18 deletions llvm/test/CodeGen/AIE/GlobalISel/legalize-dyn-stackalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -243,23 +243,21 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
;
; AIE2PS-LABEL: test_loop_dyn_alloca:
; AIE2PS: // %bb.0: // %entry
; AIE2PS-NEXT: paddxm [sp], #64; nopb ; nopxm
; AIE2PS-NEXT: st p7, [sp, #-32] // 4-byte Folded Spill
; AIE2PS-NEXT: paddxm [sp], #64
; AIE2PS-NEXT: st p7, [sp, #-28] // 4-byte Folded Spill
; AIE2PS-NEXT: mov p7, sp
; AIE2PS-NEXT: st r8, [sp, #-60] // 4-byte Folded Spill
; AIE2PS-NEXT: st r9:r8, [sp, #-56] // 8-byte Folded Spill
; AIE2PS-NEXT: mova r8, #1
; AIE2PS-NEXT: st r10, [sp, #-52] // 4-byte Folded Spill
; AIE2PS-NEXT: st r11:r10, [sp, #-48] // 8-byte Folded Spill
; AIE2PS-NEXT: mova r10, #0
; AIE2PS-NEXT: st r12, [sp, #-44] // 4-byte Folded Spill
; AIE2PS-NEXT: st r12, [sp, #-40] // 4-byte Folded Spill
; AIE2PS-NEXT: mova r12, #10
; AIE2PS-NEXT: st r14, [sp, #-40] // 4-byte Folded Spill
; AIE2PS-NEXT: st r14, [sp, #-36] // 4-byte Folded Spill
; AIE2PS-NEXT: mova r14, #2
; AIE2PS-NEXT: st r9, [sp, #-56] // 4-byte Folded Spill
; AIE2PS-NEXT: mova r9, #-64
; AIE2PS-NEXT: st r11, [sp, #-48] // 4-byte Folded Spill
; AIE2PS-NEXT: mova r11, #0
; AIE2PS-NEXT: st lr, [sp, #-64] // 4-byte Folded Spill
; AIE2PS-NEXT: st p6, [sp, #-36] // 4-byte Folded Spill
; AIE2PS-NEXT: st p6, [sp, #-32] // 4-byte Folded Spill
; AIE2PS-NEXT: padda [p7], #-64
; AIE2PS-NEXT: .LBB1_1: // %for.body
; AIE2PS-NEXT: // =>This Inner Loop Header: Depth=1
Expand All @@ -273,7 +271,7 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
; AIE2PS-NEXT: mov m0, r0 // Delay Slot 3
; AIE2PS-NEXT: padda [p1], m0 // Delay Slot 2
; AIE2PS-NEXT: mov sp, p1 // Delay Slot 1
; AIE2PS-NEXT: nopa ; add r10, r10, #1
; AIE2PS-NEXT: nopa ; nopb ; add r10, r10, #1; nopm ; nops
; AIE2PS-NEXT: ltu r0, r10, r8
; AIE2PS-NEXT: add r11, r11, r0
; AIE2PS-NEXT: xor r0, r10, r12
Expand All @@ -286,15 +284,13 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
; AIE2PS-NEXT: mov sp, p6 // Delay Slot 1
; AIE2PS-NEXT: // %bb.2: // %for.cond.cleanup
; AIE2PS-NEXT: mov sp, p7
; AIE2PS-NEXT: lda p7, [sp, #-32] // 4-byte Folded Reload
; AIE2PS-NEXT: lda p6, [sp, #-36] // 4-byte Folded Reload
; AIE2PS-NEXT: lda lr, [sp, #-64] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r14, [sp, #-40] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r12, [sp, #-44] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r11, [sp, #-48] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r10, [sp, #-52] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r9, [sp, #-56] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r8, [sp, #-60] // 4-byte Folded Reload
; AIE2PS-NEXT: lda p7, [sp, #-28] // 4-byte Folded Reload
; AIE2PS-NEXT: lda p6, [sp, #-32] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r14, [sp, #-36] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r12, [sp, #-40] // 4-byte Folded Reload
; AIE2PS-NEXT: lda r11:r10, [sp, #-48] // 8-byte Folded Reload
; AIE2PS-NEXT: lda r9:r8, [sp, #-56] // 8-byte Folded Reload
; AIE2PS-NEXT: ret lr
; AIE2PS-NEXT: nop // Delay Slot 5
; AIE2PS-NEXT: nop // Delay Slot 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@ declare <16 x i32> @llvm.aie2ps.vshuffle(<16 x i32>, <16 x i32>, i32) #1
define weak_odr dso_local void @_Z9avgpool2dILh1E8bfloat16Qsr5mllib5utilsE11is_one_of_vIT0_ahS0_7float16EEvPS1_S3_R25avgpool2d_internal_paramsIS1_E(ptr noalias %ifm_ptr, ptr noalias %ofm_ptr, ptr nonnull align 64 dereferenceable(64) %avgpool2d_params) local_unnamed_addr #2 comdat {
; CHECK-LABEL: _Z9avgpool2dILh1E8bfloat16Qsr5mllib5utilsE11is_one_of_vIT0_ahS0_7float16EEvPS1_S3_R25avgpool2d_internal_paramsIS1_E:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: paddxm [sp], #64
; CHECK-NEXT: st r10, [sp, #-52] // 4-byte Folded Spill
; CHECK-NEXT: lda.u8 r0, [p2, #0]; st p6, [sp, #-44]; mov m0, #46 // 4-byte Folded Spill
; CHECK-NEXT: mova m0, #-42; paddb [p2], m0; st p7, [sp, #-40] // 4-byte Folded Spill
; CHECK-NEXT: lda.s16 r2, [p2], m0; st r8, [sp, #-60] // 4-byte Folded Spill
; CHECK-NEXT: st lr, [sp, #-64] // 4-byte Folded Spill
; CHECK-NEXT: st r9, [sp, #-56] // 4-byte Folded Spill
; CHECK-NEXT: paddxm [sp], #64; nopb ; nopx
; CHECK-NEXT: lda.u8 r0, [p2, #0]; st r10, [sp, #-52]; mov m0, #46 // 4-byte Folded Spill
; CHECK-NEXT: mova m0, #-42; paddb [p2], m0; st p6, [sp, #-44] // 4-byte Folded Spill
; CHECK-NEXT: lda.s16 r2, [p2], m0; st p7, [sp, #-40] // 4-byte Folded Spill
; CHECK-NEXT: st r9:r8, [sp, #-64] // 8-byte Folded Spill
; CHECK-NEXT: st lr, [sp, #-56] // 4-byte Folded Spill
; CHECK-NEXT: st r12, [sp, #-48]; jl #__floatsisf // 4-byte Folded Spill
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
Expand Down Expand Up @@ -127,12 +126,12 @@ define weak_odr dso_local void @_Z9avgpool2dILh1E8bfloat16Qsr5mllib5utilsE11is_o
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vshift x8, x5, x9, r18; nopv
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
; CHECK-NEXT: lda lr, [sp, #-64]; nopb ; nops ; nopx ; vshuffle x11, x7, x10, r4; nopv // 4-byte Folded Reload
; CHECK-NEXT: lda p6, [sp, #-44]; nopb ; nopx ; vshift x1, x6, x5, r0; vmac.f cml7, cml7, x6, x2, r8 // 4-byte Folded Reload
; CHECK-NEXT: lda lr, [sp, #-56]; vshuffle x11, x7, x10, r4 // 4-byte Folded Reload
; CHECK-NEXT: lda p6, [sp, #-44]; vshift x1, x6, x5, r0; vmac.f cml7, cml7, x6, x2, r8 // 4-byte Folded Reload
; CHECK-NEXT: lda r12, [sp, #-48]; vshift x7, x5, x9, r0; vmac.f cml6, cml6, x8, x2, r8 // 4-byte Folded Reload
; CHECK-NEXT: lda r10, [sp, #-52]; vshift x7, x7, x11, r20 // 4-byte Folded Reload
; CHECK-NEXT: lda r9, [sp, #-56]; vshift x3, x6, x5, r6; vmac.f cml7, cml7, x1, x2, r8 // 4-byte Folded Reload
; CHECK-NEXT: lda r8, [sp, #-60]; vshift x10, x5, x9, r16; vmac.f cml6, cml6, x7, x2, r8 // 4-byte Folded Reload
; CHECK-NEXT: lda r9:r8, [sp, #-64]; vshift x3, x6, x5, r6; vmac.f cml7, cml7, x1, x2, r8 // 8-byte Folded Reload
; CHECK-NEXT: vshift x10, x5, x9, r16; vmac.f cml6, cml6, x7, x2, r8
; CHECK-NEXT: vshift x10, x10, x4, r24
; CHECK-NEXT: vshift x3, x3, x9, r22
; CHECK-NEXT: vmac.f cml6, cml6, x10, x2, r8
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AIE/aie2ps/spill/spill-reload-efg.mir
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple=aie2ps --verify-machineinstrs -run-pass=greedy -run-pass=virtregrewriter %s -o - | FileCheck %s

# There is only one register available, it will then be used for the %0
Expand Down Expand Up @@ -75,10 +75,10 @@ body: |
; CHECK-LABEL: name: test_ee_scarce_regs
; CHECK: liveins: $e0, $e1, $e2, $e3, $e4, $e5, $e6, $e7, $e8, $e9, $e10, $e11, $p0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: VST_EE_SPILL $e0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_EE_SPILL $e0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: $e0 = COPY $e1
; CHECK-NEXT: ST_dms_sts_scalar_st_idx_imm $r0, $p0, 0, implicit $e0, implicit $e1, implicit $e2, implicit $e3, implicit $e4, implicit $e5, implicit $e6, implicit $e7, implicit $e8, implicit $e9, implicit $e10, implicit $e11
; CHECK-NEXT: renamable $e0 = VLDA_EE_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $e0 = VLDA_EE_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $e0
%0:mees = COPY $e0 ; -> only $e0 is available

Expand All @@ -102,9 +102,9 @@ body: |
; CHECK-LABEL: name: test_ee_caller_saved
; CHECK: liveins: $e0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: VST_EE_SPILL $e0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_EE_SPILL $e0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: PseudoJL 32, csr_aie2ps, implicit-def $lr
; CHECK-NEXT: renamable $e0 = VLDA_EE_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $e0 = VLDA_EE_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $e0
%0:mees = COPY $e0
PseudoJL 32, csr_aie2ps, implicit-def $lr
Expand All @@ -125,10 +125,10 @@ body: |
; CHECK-LABEL: name: test_f_scarce_regs
; CHECK: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $fl0, $p0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: VST_F_SPILL $fl0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_F_SPILL $fl0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: $f0 = COPY $f1
; CHECK-NEXT: ST_dms_sts_scalar_st_idx_imm $r0, $p0, 0, implicit $f0, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11
; CHECK-NEXT: renamable $fl0 = VLDA_F_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $fl0 = VLDA_F_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $fl0
%0:mfs = COPY $fl0 ; -> only $f0 is available

Expand All @@ -153,9 +153,9 @@ body: |
; CHECK: liveins: $f0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $fl0 = COPY $f0
; CHECK-NEXT: VST_F_SPILL killed renamable $fl0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_F_SPILL killed renamable $fl0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: PseudoJL 32, csr_aie2ps, implicit-def $lr
; CHECK-NEXT: renamable $fl0 = VLDA_F_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $fl0 = VLDA_F_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $fl0
%0:mfs = COPY $f0
PseudoJL 32, csr_aie2ps, implicit-def $lr
Expand Down Expand Up @@ -277,10 +277,10 @@ body: |
; CHECK-LABEL: name: test_gg_scarce_regs
; CHECK: liveins: $g0, $g1, $g2, $g3, $g4, $g5, $g6, $g7, $g8, $g9, $g10, $g11, $p0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: VST_GG_SPILL $g0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_GG_SPILL $g0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: $g0 = COPY $g1
; CHECK-NEXT: ST_dms_sts_scalar_st_idx_imm $r0, $p0, 0, implicit $g0, implicit $g1, implicit $g2, implicit $g3, implicit $g4, implicit $g5, implicit $g6, implicit $g7, implicit $g8, implicit $g9, implicit $g10, implicit $g11
; CHECK-NEXT: renamable $g0 = VLDA_GG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $g0 = VLDA_GG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $g0
%0:mggs = COPY $g0 ; -> only $g0 is available

Expand All @@ -304,9 +304,9 @@ body: |
; CHECK-LABEL: name: test_gg_caller_saved
; CHECK: liveins: $g0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: VST_GG_SPILL $g0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_GG_SPILL $g0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: PseudoJL 32, csr_aie2ps, implicit-def $lr
; CHECK-NEXT: renamable $g0 = VLDA_GG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $g0 = VLDA_GG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $g0
%0:mggs = COPY $g0
PseudoJL 32, csr_aie2ps, implicit-def $lr
Expand All @@ -327,10 +327,10 @@ body: |
; CHECK-LABEL: name: test_eg_scarce_regs
; CHECK: liveins: $eg1, $eg2, $eg3, $eg4, $eg5, $eg6, $eg7, $eg8, $eg9, $eg10, $eg11, $egl0, $p0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: VST_EG_SPILL $egl0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_EG_SPILL $egl0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: $eg0 = COPY $eg1
; CHECK-NEXT: ST_dms_sts_scalar_st_idx_imm $r0, $p0, 0, implicit $eg0, implicit $eg1, implicit $eg2, implicit $eg3, implicit $eg4, implicit $eg5, implicit $eg6, implicit $eg7, implicit $eg8, implicit $eg9, implicit $eg10, implicit $eg11
; CHECK-NEXT: renamable $egl0 = VLDA_EG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $egl0 = VLDA_EG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $egl0
%0:megs = COPY $egl0 ; -> only $eg0 is available

Expand All @@ -355,9 +355,9 @@ body: |
; CHECK: liveins: $eg0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $egl0 = COPY $eg0
; CHECK-NEXT: VST_EG_SPILL killed renamable $egl0, %stack.0, implicit $sp :: (store (s64) into %stack.0, align 4)
; CHECK-NEXT: VST_EG_SPILL killed renamable $egl0, %stack.0, implicit $sp :: (store (s64) into %stack.0)
; CHECK-NEXT: PseudoJL 32, csr_aie2ps, implicit-def $lr
; CHECK-NEXT: renamable $egl0 = VLDA_EG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0, align 4)
; CHECK-NEXT: renamable $egl0 = VLDA_EG_SPILL %stack.0, implicit $sp :: (load (s64) from %stack.0)
; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $egl0
%0:megs = COPY $eg0
PseudoJL 32, csr_aie2ps, implicit-def $lr
Expand Down
Loading
Loading