From fd620907d23bedd5392d5e80bbeb59fa69bcf039 Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Tue, 5 May 2026 08:36:30 -0600 Subject: [PATCH 1/3] [AIEX] Revert: clear ancestor LiveInterval in rewriteSuperReg LiveRange::clear() destructively mutates a shared LiveInterval; any cached reference held by greedy (spill weights, queue entries, hint reconciliation) is left dangling. --- llvm/lib/Target/AIE/AIESuperRegUtils.cpp | 10 - .../AIE/aie2p/ra/staged-ra-stale-remat.mir | 250 ++++++++++++++++-- 2 files changed, 228 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp index 0d9269ce0694..acdf5995ab24 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp @@ -376,16 +376,6 @@ void rewriteSuperReg(Register Reg, std::optional AssignedPhysReg, // Step 4: Remove the original register's live interval LIS.removeInterval(Reg); - // Step 4b: Clear stale ancestor live intervals. The operand rewrite in - // step 3 modified instructions in-place (e.g., stripping sub-register - // indices). Any ancestor register in the VRM split chain still has VNInfos - // pointing to those instruction slots. If a later Greedy pass traces back - // via VRM.getOriginal(), it would find a stale instruction and could produce - // an invalid rematerialization. Clearing the ancestor interval prevents this. - Register Original = VRM.getOriginal(Reg); - if (Original != Reg && LIS.hasInterval(Original)) - LIS.getInterval(Original).clear(); - // Step 5: Filter out empty subregisters markEffectiveEmptyCopiesDead(SubRegToVReg, MRI, TRI, LIS); diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir b/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir index 580d3cf4e361..a488dce8e010 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir @@ -1,11 +1,11 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # This file is licensed under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates - -# RUN: llc -mtriple=aie2p -start-before=greedy %s -o /dev/null --filetype=obj - +# RUN: : not --crash llc -mtriple=aie2p -start-before=greedy %s -o /dev/null --filetype=obj +# RUN: llc -mtriple=aie2p -start-before=greedy -stop-before=virtregrewriter %s -o - | FileCheck %s # Verify that staged register allocation does not produce an invalid # rematerialization of a MOV_PD_imm11_pseudo into a composed eds (3D) # register. The bug occurs when rewriteSuperReg (in AIESuperRegUtils) rewrites @@ -43,10 +43,216 @@ frameInfo: savePoint: '' restorePoint: '' body: | + ; CHECK-LABEL: name: stale_remat_test + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: [[VBCST_32_:%[0-9]+]]:vec512 = VBCST_32 [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: VST_dmx_sts_x_spill [[VBCST_32_]], %stack.1, implicit $sp :: (store (s512) into %stack.1) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: dead [[VBCST_32_1:%[0-9]+]]:vec512 = VBCST_32 [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[VBCST_32_2:%[0-9]+]]:vec512 = VBCST_32 [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: VST_dmx_sts_x_spill [[VBCST_32_2]], %stack.0, implicit $sp :: (store (s512) into %stack.0) + ; CHECK-NEXT: $p0 = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: PseudoJL_IND [[MOV_PD_imm11_pseudo]], csr_aie2p, implicit-def $lr, implicit $p0 + ; CHECK-NEXT: [[VLDA_512_COMPOSED_REG_SPILL:%[0-9]+]]:vec512 = VLDA_512_COMPOSED_REG_SPILL %stack.1, implicit $sp :: (load (s512) from %stack.1) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edn_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:magusrc_and_magudst_and_spill_edj_to_er = MOV_PD_imm11_pseudo 128 + ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub_dim_stride:eds = COPY [[MOV_PD_imm11_pseudo2]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:spill_em_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:spill_edj_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:spill_edj_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:spill_edc_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:spill_edc_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:spill_edn_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:spill_edn_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[XOR:%[0-9]+]]:er = XOR [[COPY]], [[MOV_RLC_imm11_pseudo1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[AND:%[0-9]+]]:er = AND [[XOR]], [[MOV_RLC_imm11_pseudo1]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:edcl = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ednl = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:edjl = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:em_as_32bit = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:edch = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:ednh = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:edjh = COPY [[COPY4]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, dead [[COPY9:%[0-9]+]]:edcl, dead [[COPY13:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY12]], [[COPY10]], [[COPY11]], [[COPY9]], undef %48:em_as_32bit, [[COPY14]], [[COPY15]], [[COPY13]] + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[VLDA_512_COMPOSED_REG_SPILL]], [[MOV_PD_imm11_pseudo3]], 0 :: (store (<64 x s16>)) + ; CHECK-NEXT: PseudoJNZ [[AND]], %bb.1 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:er = AND [[COPY]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:eds = MOV_PD_imm11_pseudo 128 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]].sub_mod:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:spill_edc_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:edjl = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:edjh = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edcl = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:spill_edc_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo7:%[0-9]+]]:magusrc_and_magudst_and_spill_em_to_er = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo3:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: [[VLDA_512_COMPOSED_REG_SPILL1:%[0-9]+]]:vec512 = VLDA_512_COMPOSED_REG_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJNZ [[AND1]], %bb.5 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY28:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[COPY29:%[0-9]+]].sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size { + ; CHECK-NEXT: internal [[COPY29]].sub_dim_stride:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_stride + ; CHECK-NEXT: internal [[COPY29]].sub_mod:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_mod + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:spill_em_to_er = COPY [[COPY29]].sub_mod + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:spill_edc_to_er = COPY [[COPY24]] + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:edcl = COPY [[COPY18]] + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:ednl = COPY [[COPY31]] + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:edjl = COPY [[COPY16]] + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:em_as_32bit = COPY [[COPY30]] + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:edch = COPY [[COPY33]] + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:ednh = COPY [[COPY32]] + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:edjh = COPY [[COPY17]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep, [[COPY34:%[0-9]+]]:edcl, dead [[COPY38:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo8]], [[COPY37]], [[COPY35]], [[COPY36]], [[COPY34]], undef %64:em_as_32bit, [[COPY39]], [[COPY40]], [[COPY38]] + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:em_as_32bit = COPY [[COPY29]].sub_mod + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:ednl = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:ednh = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit, [[COPY26:%[0-9]+]]:edcl, dead [[COPY25:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY41]], [[COPY42]], [[COPY19]], [[COPY26]], undef %56:em_as_32bit, [[COPY43]], [[COPY20]], [[COPY25]] + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:spill_edc_to_er = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:spill_em_to_er = COPY [[COPY29]].sub_mod + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: undef [[COPY48:%[0-9]+]].sub_dim_size:eds = COPY [[COPY29]].sub_dim_size + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_dim_stride:eds = COPY [[COPY29]].sub_dim_stride + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_mod:eds = COPY [[COPY29]].sub_mod + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:edcl = COPY [[COPY27]] + ; CHECK-NEXT: [[COPY50:%[0-9]+]]:ednl = COPY [[COPY46]] + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:edjl = COPY [[COPY21]] + ; CHECK-NEXT: [[COPY52:%[0-9]+]]:em_as_32bit = COPY [[COPY45]] + ; CHECK-NEXT: [[COPY53:%[0-9]+]]:edch = COPY [[COPY44]] + ; CHECK-NEXT: [[COPY54:%[0-9]+]]:ednh = COPY [[COPY47]] + ; CHECK-NEXT: [[COPY55:%[0-9]+]]:edjh = COPY [[COPY22]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep, [[COPY49:%[0-9]+]]:edcl, dead [[COPY53:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo9]], [[COPY52]], [[COPY50]], [[COPY51]], [[COPY49]], undef %72:em_as_32bit, [[COPY54]], [[COPY55]], [[COPY53]] + ; CHECK-NEXT: [[COPY56:%[0-9]+]]:edjh = COPY [[COPY48]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY57:%[0-9]+]]:em_as_32bit = COPY [[COPY48]].sub_mod + ; CHECK-NEXT: [[COPY58:%[0-9]+]]:ednl = COPY [[COPY48]].sub_dim_size + ; CHECK-NEXT: [[COPY59:%[0-9]+]]:edcl = COPY [[COPY34]] + ; CHECK-NEXT: [[COPY60:%[0-9]+]]:ednh = COPY [[COPY48]].sub_dim_size + ; CHECK-NEXT: [[COPY61:%[0-9]+]]:edch = COPY [[COPY44]] + ; CHECK-NEXT: [[COPY62:%[0-9]+]]:edj_as_32bit = COPY [[COPY23]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep, dead [[COPY59:%[0-9]+]]:edcl, [[COPY61:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo10]], [[COPY57]], [[COPY58]], [[COPY62]], [[COPY59]], undef %94:em_as_32bit, [[COPY60]], [[COPY56]], [[COPY61]] + ; CHECK-NEXT: [[COPY63:%[0-9]+]]:spill_edj_to_er = COPY [[COPY62]] + ; CHECK-NEXT: [[COPY64:%[0-9]+]]:spill_edc_to_er = COPY [[COPY61]] + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[VLDA_512_COMPOSED_REG_SPILL1]], [[MOV_PD_imm11_pseudo6]], 0 :: (store (<32 x s16>)) + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY48]].sub_dim_size + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_dim_count:eds = COPY [[COPY26]] + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY48]].sub_dim_size + ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY44]] + ; CHECK-NEXT: undef [[COPY65:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY48]].sub_lo_dim + ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY48]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY48]].sub_hi_dim_then_sub_dim_size + ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY48]].sub_hi_dim_then_sub_dim_stride + ; CHECK-NEXT: PseudoJNZ [[AND1]], %bb.7 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY66:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:er = XOR [[COPY]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: undef [[COPY67:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY65]].sub_lo_dim + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY65]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY65]].sub_hi_dim_then_sub_dim_size + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY65]].sub_hi_dim_then_sub_dim_stride + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit, [[COPY67:%[0-9]+]].sub_dim_count:eds, [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY67]].sub_mod, [[COPY67]].sub_dim_size, [[COPY67]].sub_dim_stride, [[COPY67]].sub_dim_count, undef [[COPY67]].sub_hi_dim_then_sub_mod, [[COPY67]].sub_hi_dim_then_sub_dim_size, [[COPY67]].sub_hi_dim_then_sub_dim_stride, [[COPY67]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo11:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:er = AND [[XOR1]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: [[COPY68:%[0-9]+]]:edjh = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: [[COPY69:%[0-9]+]]:edch = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: [[COPY70:%[0-9]+]]:em_as_32bit = COPY [[COPY67]].sub_mod + ; CHECK-NEXT: [[COPY71:%[0-9]+]]:ednl = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: [[COPY72:%[0-9]+]]:edjl = COPY [[COPY67]].sub_dim_stride + ; CHECK-NEXT: [[COPY73:%[0-9]+]]:ednh = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo11:%[0-9]+]]:ep, [[COPY49:%[0-9]+]]:edcl, dead [[COPY69:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo11]], [[COPY70]], [[COPY71]], [[COPY72]], [[COPY49]], undef %72:em_as_32bit, [[COPY73]], [[COPY68]], [[COPY69]] + ; CHECK-NEXT: [[COPY74:%[0-9]+]]:edcl = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: [[COPY75:%[0-9]+]]:edch = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY76:%[0-9]+]]:em_as_32bit = COPY [[COPY67]].sub_mod + ; CHECK-NEXT: [[COPY77:%[0-9]+]]:ednl = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: [[COPY78:%[0-9]+]]:edjl = COPY [[COPY68]] + ; CHECK-NEXT: [[COPY79:%[0-9]+]]:ednh = COPY [[COPY67]].sub_dim_size + ; CHECK-NEXT: [[COPY80:%[0-9]+]]:edjh = COPY [[COPY68]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep_as_32bit, dead [[COPY74:%[0-9]+]]:edcl, dead [[COPY75:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo6]], [[COPY76]], [[COPY77]], [[COPY78]], [[COPY74]], undef %86:em_as_32bit, [[COPY79]], [[COPY80]], [[COPY75]] + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:spill_edc_to_er = COPY [[COPY49]] + ; CHECK-NEXT: undef [[MOV_PD_imm11_pseudo4:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY67]].sub_lo_dim + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo7]] + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edcl = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_count + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:spill_edj_to_er = COPY [[COPY63]] + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:edch = COPY [[COPY64]] + ; CHECK-NEXT: PseudoJNZ [[AND2]], %bb.3 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY81:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo7]] + ; CHECK-NEXT: [[COPY82:%[0-9]+]]:spill_edc_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY83:%[0-9]+]]:spill_edn_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY84:%[0-9]+]]:edc = COPY [[COPY82]] + ; CHECK-NEXT: [[COPY85:%[0-9]+]]:edn = COPY [[COPY83]] + ; CHECK-NEXT: [[COPY86:%[0-9]+]]:edj = COPY [[COPY81]] + ; CHECK-NEXT: [[COPY87:%[0-9]+]]:em = COPY [[MOV_PD_imm11_pseudo7]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:ep_as_32bit, dead [[COPY84:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo]], [[COPY87]], [[COPY85]], [[COPY86]], [[COPY84]] + ; CHECK-NEXT: PseudoJ_jump_imm %bb.9 bb.0: successors: %bb.1(0x80000000) liveins: $r0 - + undef %103.sub_dim_size:eds = MOV_PD_imm11_pseudo 0 %18:erf2 = MOV_RLC_imm11_pseudo 0 %103.sub_dim_stride:eds = MOV_PD_imm11_pseudo 128 @@ -68,10 +274,10 @@ body: | %127.sub_hi_dim_then_sub_dim_count:eds = COPY %103.sub_dim_size %127.sub_dim_size:eds = COPY %103.sub_dim_size %127.sub_hi_dim_then_sub_dim_size:eds = COPY %103.sub_dim_size - + bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - + %84:er = XOR %1, %85 %3:ep = MOV_PD_imm11_pseudo 0 %81:er = AND %84, %85 @@ -80,10 +286,10 @@ body: | VST_dmx_sts_x_idx_imm %7, %3, 0 :: (store (<64 x s16>)) PseudoJNZ %81, %bb.1 PseudoJ_jump_imm %bb.2 - + bb.2: successors: %bb.3(0x80000000) - + %87:er = MOV_RLC_imm11_pseudo 1 %80:er = AND %1, %87 %103.sub_mod:eds = COPY %103.sub_dim_size @@ -101,21 +307,21 @@ body: | %117.sub_hi_dim_then_sub_dim_count:eds = COPY %103.sub_dim_size %117.sub_dim_count:eds = COPY %103.sub_dim_size %112.sub_dim_count:eds = COPY %103.sub_dim_size - + bb.3: successors: %bb.5(0x40000000), %bb.4(0x40000000) - + PseudoJNZ %80, %bb.5 PseudoJ_jump_imm %bb.4 - + bb.4: successors: %bb.5(0x80000000) - + %126:erf2 = COPY %18 - + bb.5: successors: %bb.7(0x40000000), %bb.6(0x40000000) - + %121.sub_mod:eds = COPY %103.sub_mod %20:ep = MOV_PD_imm11_pseudo 0 %121.sub_dim_size:eds = COPY %103.sub_dim_size @@ -149,15 +355,15 @@ body: | %103.sub_hi_dim_then_sub_dim_count:eds = COPY %112.sub_hi_dim_then_sub_dim_count PseudoJNZ %80, %bb.7 PseudoJ_jump_imm %bb.6 - + bb.6: successors: %bb.7(0x80000000) - + %102:erf2 = COPY %18 - + bb.7: successors: %bb.8(0x04000000), %bb.3(0x7c000000) - + %133:ep_as_32bit = MOV_PD_imm11_pseudo 0 %77:er = XOR %1, %87 %133:ep_as_32bit, %103.sub_dim_count:eds, %103.sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split %133, %103.sub_mod, %103.sub_dim_size, %103.sub_dim_stride, %103.sub_dim_count, undef %103.sub_hi_dim_then_sub_mod, %103.sub_hi_dim_then_sub_dim_size, %103.sub_hi_dim_then_sub_dim_stride, %103.sub_hi_dim_then_sub_dim_count @@ -184,17 +390,17 @@ body: | %117.sub_dim_count:eds = COPY %103.sub_dim_count PseudoJNZ %74, %bb.3 PseudoJ_jump_imm %bb.8 - + bb.8: successors: %bb.9(0x80000000) - + %89.sub_dim_stride:ed = COPY %89.sub_mod %89.sub_dim_count:ed = COPY %103.sub_dim_size %89.sub_dim_size:ed = COPY %103.sub_dim_size - + bb.9: successors: %bb.9(0x80000000) - + %148:ed = COPY %89 %139:ep_as_32bit, dead %148.sub_dim_count:ed = PADD_2D_pseudo_split %139, %148.sub_mod, %148.sub_dim_size, %148.sub_dim_stride, %148.sub_dim_count PseudoJ_jump_imm %bb.9 From 506478c0afb7a1515c2f64cfb49c1d9eea7d41b8 Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Thu, 7 May 2026 04:41:29 -0600 Subject: [PATCH 2/3] [AIEX] Sever stale VRM split-from chain after super-reg rewrites AIESuperRegRewriter and AIEUnallocatedSuperRegRewriter rewrite or unbundle instructions in place but leave the LiveInterval of the VRM "Original" ancestor untouched. Its VNInfos still point at slots whose MIs have changed, so a later Greedy split that consults OrigLI via SplitEditor::defFromParent can rematerialize the wrong instruction (e.g. a sublane-only MOV becomes a full-composite def). Add AIESuperRegUtils::clearStaleSplitFromMappings and call it from both passes. It clears the VRM split-from mapping for every descendant of a touched Original via the new VirtRegMap::clearSplitFromReg API, restoring each descendant to the canonical "no split parent" state of a freshly created vreg. SplitKit then reads the descendant's own repaired LI rather than the stale ancestor's, and predicates that examine Virt2SplitMap directly (e.g. isAssignedReg) no longer treat the descendant as a split product. OrigLI is left untouched. --- llvm/include/llvm/CodeGen/VirtRegMap.h | 10 +- llvm/lib/Target/AIE/AIESuperRegRewriter.cpp | 10 +- llvm/lib/Target/AIE/AIESuperRegUtils.cpp | 25 ++ llvm/lib/Target/AIE/AIESuperRegUtils.h | 17 ++ .../AIE/AIEUnallocatedSuperRegRewriter.cpp | 10 + .../AIE/aie2p/ra/staged-ra-stale-remat.mir | 243 +++++++++--------- 6 files changed, 194 insertions(+), 121 deletions(-) diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index 2e7545312c87..dfef831d40e8 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Modifications (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its +// Modifications (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its // affiliates // //===----------------------------------------------------------------------===// @@ -176,6 +176,14 @@ class VirtRegMap { } } + /// clearSplitFromReg - Remove the split-from mapping for virtReg, + /// making it its own original. This restores the register to the + /// same canonical state as a freshly created vreg (no split parent). + void clearSplitFromReg(Register virtReg) { + assert(virtReg.isVirtual()); + Virt2SplitMap[virtReg] = Register(); + } + /// returns the live interval virtReg is split from. Register getPreSplitReg(Register virtReg) const { return Virt2SplitMap[virtReg]; diff --git a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp index 44e84037df91..1b1fd0ef9e8d 100644 --- a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// @@ -114,6 +114,11 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { } } + // Snapshot Originals whose LI is about to go stale. + SmallSet TaintedOriginals; + for (auto &[VReg, _] : AssignedPhysRegs) + TaintedOriginals.insert(VRM.getOriginal(VReg)); + // Re-write all the collected VRegs for (auto &[VReg, PhysRegAndSubRegs] : AssignedPhysRegs) { const Register PhysReg = PhysRegAndSubRegs.first; @@ -122,6 +127,9 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { LRM, LIS, Indexes, DebugVars); } + // Prevent SplitKit from rematerializing through stale ancestor LIs. + AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MRI, VRM); + LLVM_DEBUG(VRM.dump()); return !AssignedPhysRegs.empty(); } diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp index acdf5995ab24..c0fea76575e7 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "aie-ra" @@ -403,6 +404,30 @@ bool isRegUsedBy2DOr3DInstruction(const MachineRegisterInfo &MRI, }); } +void clearStaleSplitFromMappings(const SmallSet &TaintedOriginals, + MachineRegisterInfo &MRI, VirtRegMap &VRM) { + if (TaintedOriginals.empty()) + return; + + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + const Register V = Register::index2VirtReg(I); + if (MRI.reg_nodbg_empty(V)) + continue; + const Register Orig = VRM.getPreSplitReg(V); + if (!Orig || !TaintedOriginals.count(Orig)) + continue; + + LLVM_DEBUG({ + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + dbgs() << " Clearing stale split-from for " << printReg(V, TRI, 0, &MRI) + << " (was split from " << printReg(Orig, TRI, 0, &MRI) << ")\n"; + }); + // Restore V to the canonical "no split parent" state so getOriginal(V)==V + // and isAssignedReg() does not treat V as a split product. + VRM.clearSplitFromReg(V); + } +} + void repairLiveIntervals(SmallSet &RegistersToRepair, VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS) { diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.h b/llvm/lib/Target/AIE/AIESuperRegUtils.h index 74d592f66c95..03c2263ba7db 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.h +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.h @@ -84,6 +84,23 @@ void repairLiveIntervals(SmallSet &RegistersToRepair, VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS); +/// Sever VRM split-from chain for descendants of \p TaintedOriginals so that +/// SplitKit::defFromParent consults the descendant's own (repaired) LI, not +/// the stale ancestor LI which may still hold VNs at slots whose MIs were +/// rewritten/unbundled by an AIE register-rewriter pass. Each affected +/// descendant is restored via VRM.clearSplitFromReg() to the canonical +/// "no split parent" state of a freshly created vreg. +/// +/// before: after: +/// %0 (stale LI) %0 (stale LI, ignored) +/// | split-from x (chain cut) +/// %35 ----. %35 (no split parent) +/// | split-from +/// %141..%144 (future split greedy splits will use %35's LI +/// would consult %0's LI) instead of %0's +void clearStaleSplitFromMappings(const SmallSet &TaintedOriginals, + MachineRegisterInfo &MRI, VirtRegMap &VRM); + } // namespace llvm::AIESuperRegUtils #endif diff --git a/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp index 28be72b3963a..4c14ef24605c 100644 --- a/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp @@ -229,6 +229,13 @@ bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { return false; } + // Snapshot Originals whose LI is about to go stale. + SmallSet TaintedOriginals; + for (auto &P : Info.ExpandableRegs) + TaintedOriginals.insert(VRM.getOriginal(P.first)); + for (auto &P : Info.RewritableRegs) + TaintedOriginals.insert(VRM.getOriginal(P.first)); + LLVM_DEBUG(dbgs() << "Expanding copy bundles...\n"); expandCopyBundles(Info.ExpandableRegs, MRI, Indexes, LIS, VRM, LRM); @@ -236,6 +243,9 @@ bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { rewriteCandidates(Info.RewritableRegs, MRI, TRI, VRM, LRM, LIS, Indexes, DebugVars); + // Prevent SplitKit from rematerializing through stale ancestor LIs. + AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MRI, VRM); + LLVM_DEBUG(dbgs() << "Successfully rewrote " << Info.RewritableRegs.size() << " register(s)\n"); diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir b/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir index a488dce8e010..08e9748db639 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates -# RUN: : not --crash llc -mtriple=aie2p -start-before=greedy %s -o /dev/null --filetype=obj +# RUN: llc -mtriple=aie2p -start-before=greedy %s -o /dev/null --filetype=obj # RUN: llc -mtriple=aie2p -start-before=greedy -stop-before=virtregrewriter %s -o - | FileCheck %s # Verify that staged register allocation does not produce an invalid # rematerialization of a MOV_PD_imm11_pseudo into a composed eds (3D) @@ -72,7 +72,10 @@ body: | ; CHECK-NEXT: [[COPY5:%[0-9]+]]:spill_edc_to_er = COPY [[COPY1]].sub_dim_size ; CHECK-NEXT: [[COPY6:%[0-9]+]]:spill_edc_to_er = COPY [[COPY1]].sub_dim_size ; CHECK-NEXT: [[COPY7:%[0-9]+]]:spill_edn_to_er = COPY [[COPY1]].sub_dim_size - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:spill_edn_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: undef [[COPY8:%[0-9]+]].sub_dim_size:eds = COPY [[COPY1]].sub_dim_size { + ; CHECK-NEXT: internal [[COPY8]].sub_dim_stride:eds = COPY [[COPY1]].sub_dim_stride + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:spill_edn_to_er = COPY [[COPY1]].sub_dim_size ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) @@ -80,14 +83,14 @@ body: | ; CHECK-NEXT: [[XOR:%[0-9]+]]:er = XOR [[COPY]], [[MOV_RLC_imm11_pseudo1]] ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 ; CHECK-NEXT: [[AND:%[0-9]+]]:er = AND [[XOR]], [[MOV_RLC_imm11_pseudo1]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:edcl = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ednl = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:edjl = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:em_as_32bit = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:edch = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:ednh = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:edjh = COPY [[COPY4]] - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, dead [[COPY9:%[0-9]+]]:edcl, dead [[COPY13:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY12]], [[COPY10]], [[COPY11]], [[COPY9]], undef %48:em_as_32bit, [[COPY14]], [[COPY15]], [[COPY13]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:edcl = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:ednl = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:edjl = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:em_as_32bit = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:edch = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:ednh = COPY [[COPY9]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:edjh = COPY [[COPY4]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, dead [[COPY10:%[0-9]+]]:edcl, dead [[COPY14:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY13]], [[COPY11]], [[COPY12]], [[COPY10]], undef %48:em_as_32bit, [[COPY15]], [[COPY16]], [[COPY14]] ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[VLDA_512_COMPOSED_REG_SPILL]], [[MOV_PD_imm11_pseudo3]], 0 :: (store (<64 x s16>)) ; CHECK-NEXT: PseudoJNZ [[AND]], %bb.1 ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 @@ -97,23 +100,25 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 1 ; CHECK-NEXT: [[AND1:%[0-9]+]]:er = AND [[COPY]], [[MOV_RLC_imm11_pseudo2]] - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:eds = MOV_PD_imm11_pseudo 128 - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]].sub_mod:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub_dim_size:eds = COPY [[COPY8]].sub_dim_size { + ; CHECK-NEXT: internal [[COPY17]].sub_dim_stride:eds = COPY [[COPY8]].sub_dim_stride + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub_mod:eds = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:spill_edc_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:edjl = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:edjh = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:spill_edc_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:edjl = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:edjh = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edcl = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:spill_edc_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo7:%[0-9]+]]:magusrc_and_magudst_and_spill_em_to_er = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edch = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:edch = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:edcl = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:spill_edc_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:magusrc_and_magudst_and_spill_em_to_er = MOV_PD_imm11_pseudo 1 ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo3:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 ; CHECK-NEXT: [[VLDA_512_COMPOSED_REG_SPILL1:%[0-9]+]]:vec512 = VLDA_512_COMPOSED_REG_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0) ; CHECK-NEXT: {{ $}} @@ -126,128 +131,128 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead [[COPY28:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] + ; CHECK-NEXT: dead [[COPY30:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[COPY29:%[0-9]+]].sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size { - ; CHECK-NEXT: internal [[COPY29]].sub_dim_stride:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_stride - ; CHECK-NEXT: internal [[COPY29]].sub_mod:eds = COPY [[MOV_PD_imm11_pseudo4]].sub_mod + ; CHECK-NEXT: undef [[COPY31:%[0-9]+]].sub_dim_size:eds = COPY [[COPY17]].sub_dim_size { + ; CHECK-NEXT: internal [[COPY31]].sub_dim_stride:eds = COPY [[COPY17]].sub_dim_stride + ; CHECK-NEXT: internal [[COPY31]].sub_mod:eds = COPY [[COPY17]].sub_mod ; CHECK-NEXT: } - ; CHECK-NEXT: [[COPY30:%[0-9]+]]:spill_em_to_er = COPY [[COPY29]].sub_mod + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:spill_em_to_er = COPY [[COPY31]].sub_mod + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:spill_edc_to_er = COPY [[COPY26]] + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:edcl = COPY [[COPY20]] + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:ednl = COPY [[COPY33]] + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:edjl = COPY [[COPY18]] + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:em_as_32bit = COPY [[COPY32]] + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:edch = COPY [[COPY35]] + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:ednh = COPY [[COPY34]] + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:edjh = COPY [[COPY19]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep, [[COPY36:%[0-9]+]]:edcl, dead [[COPY40:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo7]], [[COPY39]], [[COPY37]], [[COPY38]], [[COPY36]], undef %64:em_as_32bit, [[COPY41]], [[COPY42]], [[COPY40]] + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:em_as_32bit = COPY [[COPY31]].sub_mod + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:ednl = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:ednh = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit, [[COPY28:%[0-9]+]]:edcl, dead [[COPY27:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo4]], [[COPY43]], [[COPY44]], [[COPY21]], [[COPY28]], undef %56:em_as_32bit, [[COPY45]], [[COPY22]], [[COPY27]] + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:spill_edc_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:spill_em_to_er = COPY [[COPY31]].sub_mod ; CHECK-NEXT: [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:spill_edc_to_er = COPY [[COPY24]] - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:edcl = COPY [[COPY18]] - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:ednl = COPY [[COPY31]] - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:edjl = COPY [[COPY16]] - ; CHECK-NEXT: [[COPY37:%[0-9]+]]:em_as_32bit = COPY [[COPY30]] - ; CHECK-NEXT: [[COPY38:%[0-9]+]]:edch = COPY [[COPY33]] - ; CHECK-NEXT: [[COPY39:%[0-9]+]]:ednh = COPY [[COPY32]] - ; CHECK-NEXT: [[COPY40:%[0-9]+]]:edjh = COPY [[COPY17]] - ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep, [[COPY34:%[0-9]+]]:edcl, dead [[COPY38:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo8]], [[COPY37]], [[COPY35]], [[COPY36]], [[COPY34]], undef %64:em_as_32bit, [[COPY39]], [[COPY40]], [[COPY38]] - ; CHECK-NEXT: [[COPY41:%[0-9]+]]:em_as_32bit = COPY [[COPY29]].sub_mod - ; CHECK-NEXT: [[COPY42:%[0-9]+]]:ednl = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: [[COPY43:%[0-9]+]]:ednh = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit, [[COPY26:%[0-9]+]]:edcl, dead [[COPY25:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY41]], [[COPY42]], [[COPY19]], [[COPY26]], undef %56:em_as_32bit, [[COPY43]], [[COPY20]], [[COPY25]] - ; CHECK-NEXT: [[COPY44:%[0-9]+]]:spill_edc_to_er = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: [[COPY45:%[0-9]+]]:spill_em_to_er = COPY [[COPY29]].sub_mod + ; CHECK-NEXT: [[COPY48:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: undef [[COPY50:%[0-9]+]].sub_dim_size:eds = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_dim_stride:eds = COPY [[COPY31]].sub_dim_stride + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_mod:eds = COPY [[COPY31]].sub_mod + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:edcl = COPY [[COPY29]] + ; CHECK-NEXT: [[COPY52:%[0-9]+]]:ednl = COPY [[COPY48]] + ; CHECK-NEXT: [[COPY53:%[0-9]+]]:edjl = COPY [[COPY23]] + ; CHECK-NEXT: [[COPY54:%[0-9]+]]:em_as_32bit = COPY [[COPY47]] + ; CHECK-NEXT: [[COPY55:%[0-9]+]]:edch = COPY [[COPY46]] + ; CHECK-NEXT: [[COPY56:%[0-9]+]]:ednh = COPY [[COPY49]] + ; CHECK-NEXT: [[COPY57:%[0-9]+]]:edjh = COPY [[COPY24]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep, [[COPY51:%[0-9]+]]:edcl, dead [[COPY55:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo8]], [[COPY54]], [[COPY52]], [[COPY53]], [[COPY51]], undef %72:em_as_32bit, [[COPY56]], [[COPY57]], [[COPY55]] + ; CHECK-NEXT: [[COPY58:%[0-9]+]]:edjh = COPY [[COPY50]].sub_dim_size ; CHECK-NEXT: [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 - ; CHECK-NEXT: [[COPY46:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: [[COPY47:%[0-9]+]]:spill_edn_to_er = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: undef [[COPY48:%[0-9]+]].sub_dim_size:eds = COPY [[COPY29]].sub_dim_size - ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_dim_stride:eds = COPY [[COPY29]].sub_dim_stride - ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_mod:eds = COPY [[COPY29]].sub_mod - ; CHECK-NEXT: [[COPY49:%[0-9]+]]:edcl = COPY [[COPY27]] - ; CHECK-NEXT: [[COPY50:%[0-9]+]]:ednl = COPY [[COPY46]] - ; CHECK-NEXT: [[COPY51:%[0-9]+]]:edjl = COPY [[COPY21]] - ; CHECK-NEXT: [[COPY52:%[0-9]+]]:em_as_32bit = COPY [[COPY45]] - ; CHECK-NEXT: [[COPY53:%[0-9]+]]:edch = COPY [[COPY44]] - ; CHECK-NEXT: [[COPY54:%[0-9]+]]:ednh = COPY [[COPY47]] - ; CHECK-NEXT: [[COPY55:%[0-9]+]]:edjh = COPY [[COPY22]] - ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep, [[COPY49:%[0-9]+]]:edcl, dead [[COPY53:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo9]], [[COPY52]], [[COPY50]], [[COPY51]], [[COPY49]], undef %72:em_as_32bit, [[COPY54]], [[COPY55]], [[COPY53]] - ; CHECK-NEXT: [[COPY56:%[0-9]+]]:edjh = COPY [[COPY48]].sub_dim_size - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 - ; CHECK-NEXT: [[COPY57:%[0-9]+]]:em_as_32bit = COPY [[COPY48]].sub_mod - ; CHECK-NEXT: [[COPY58:%[0-9]+]]:ednl = COPY [[COPY48]].sub_dim_size - ; CHECK-NEXT: [[COPY59:%[0-9]+]]:edcl = COPY [[COPY34]] - ; CHECK-NEXT: [[COPY60:%[0-9]+]]:ednh = COPY [[COPY48]].sub_dim_size - ; CHECK-NEXT: [[COPY61:%[0-9]+]]:edch = COPY [[COPY44]] - ; CHECK-NEXT: [[COPY62:%[0-9]+]]:edj_as_32bit = COPY [[COPY23]] - ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep, dead [[COPY59:%[0-9]+]]:edcl, [[COPY61:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo10]], [[COPY57]], [[COPY58]], [[COPY62]], [[COPY59]], undef %94:em_as_32bit, [[COPY60]], [[COPY56]], [[COPY61]] - ; CHECK-NEXT: [[COPY63:%[0-9]+]]:spill_edj_to_er = COPY [[COPY62]] - ; CHECK-NEXT: [[COPY64:%[0-9]+]]:spill_edc_to_er = COPY [[COPY61]] - ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[VLDA_512_COMPOSED_REG_SPILL1]], [[MOV_PD_imm11_pseudo6]], 0 :: (store (<32 x s16>)) - ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY48]].sub_dim_size - ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_dim_count:eds = COPY [[COPY26]] - ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY48]].sub_dim_size - ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY44]] - ; CHECK-NEXT: undef [[COPY65:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY48]].sub_lo_dim - ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY48]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY48]].sub_hi_dim_then_sub_dim_size - ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY48]].sub_hi_dim_then_sub_dim_stride + ; CHECK-NEXT: [[COPY59:%[0-9]+]]:em_as_32bit = COPY [[COPY50]].sub_mod + ; CHECK-NEXT: [[COPY60:%[0-9]+]]:ednl = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY61:%[0-9]+]]:edcl = COPY [[COPY36]] + ; CHECK-NEXT: [[COPY62:%[0-9]+]]:ednh = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY63:%[0-9]+]]:edch = COPY [[COPY46]] + ; CHECK-NEXT: [[COPY64:%[0-9]+]]:edj_as_32bit = COPY [[COPY25]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep, dead [[COPY61:%[0-9]+]]:edcl, [[COPY63:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo9]], [[COPY59]], [[COPY60]], [[COPY64]], [[COPY61]], undef %94:em_as_32bit, [[COPY62]], [[COPY58]], [[COPY63]] + ; CHECK-NEXT: [[COPY65:%[0-9]+]]:spill_edj_to_er = COPY [[COPY64]] + ; CHECK-NEXT: [[COPY66:%[0-9]+]]:spill_edc_to_er = COPY [[COPY63]] + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[VLDA_512_COMPOSED_REG_SPILL1]], [[MOV_PD_imm11_pseudo5]], 0 :: (store (<32 x s16>)) + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_dim_count:eds = COPY [[COPY28]] + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY46]] + ; CHECK-NEXT: undef [[COPY67:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY50]].sub_lo_dim + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY50]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY50]].sub_hi_dim_then_sub_dim_size + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY50]].sub_hi_dim_then_sub_dim_stride ; CHECK-NEXT: PseudoJNZ [[AND1]], %bb.7 ; CHECK-NEXT: PseudoJ_jump_imm %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead [[COPY66:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] + ; CHECK-NEXT: dead [[COPY68:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 ; CHECK-NEXT: [[XOR1:%[0-9]+]]:er = XOR [[COPY]], [[MOV_RLC_imm11_pseudo2]] - ; CHECK-NEXT: undef [[COPY67:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY65]].sub_lo_dim - ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY65]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY65]].sub_hi_dim_then_sub_dim_size - ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY65]].sub_hi_dim_then_sub_dim_stride - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit, [[COPY67:%[0-9]+]].sub_dim_count:eds, [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY67]].sub_mod, [[COPY67]].sub_dim_size, [[COPY67]].sub_dim_stride, [[COPY67]].sub_dim_count, undef [[COPY67]].sub_hi_dim_then_sub_mod, [[COPY67]].sub_hi_dim_then_sub_dim_size, [[COPY67]].sub_hi_dim_then_sub_dim_stride, [[COPY67]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo11:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: undef [[COPY69:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY67]].sub_lo_dim + ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY67]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY67]].sub_hi_dim_then_sub_dim_size + ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY67]].sub_hi_dim_then_sub_dim_stride + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit, [[COPY69:%[0-9]+]].sub_dim_count:eds, [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo4]], [[COPY69]].sub_mod, [[COPY69]].sub_dim_size, [[COPY69]].sub_dim_stride, [[COPY69]].sub_dim_count, undef [[COPY69]].sub_hi_dim_then_sub_mod, [[COPY69]].sub_hi_dim_then_sub_dim_size, [[COPY69]].sub_hi_dim_then_sub_dim_stride, [[COPY69]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 ; CHECK-NEXT: [[AND2:%[0-9]+]]:er = AND [[XOR1]], [[MOV_RLC_imm11_pseudo2]] - ; CHECK-NEXT: [[COPY68:%[0-9]+]]:edjh = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: [[COPY69:%[0-9]+]]:edch = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: [[COPY70:%[0-9]+]]:em_as_32bit = COPY [[COPY67]].sub_mod - ; CHECK-NEXT: [[COPY71:%[0-9]+]]:ednl = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: [[COPY72:%[0-9]+]]:edjl = COPY [[COPY67]].sub_dim_stride - ; CHECK-NEXT: [[COPY73:%[0-9]+]]:ednh = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo11:%[0-9]+]]:ep, [[COPY49:%[0-9]+]]:edcl, dead [[COPY69:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo11]], [[COPY70]], [[COPY71]], [[COPY72]], [[COPY49]], undef %72:em_as_32bit, [[COPY73]], [[COPY68]], [[COPY69]] - ; CHECK-NEXT: [[COPY74:%[0-9]+]]:edcl = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: [[COPY75:%[0-9]+]]:edch = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 - ; CHECK-NEXT: [[COPY76:%[0-9]+]]:em_as_32bit = COPY [[COPY67]].sub_mod - ; CHECK-NEXT: [[COPY77:%[0-9]+]]:ednl = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: [[COPY78:%[0-9]+]]:edjl = COPY [[COPY68]] - ; CHECK-NEXT: [[COPY79:%[0-9]+]]:ednh = COPY [[COPY67]].sub_dim_size - ; CHECK-NEXT: [[COPY80:%[0-9]+]]:edjh = COPY [[COPY68]] - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:ep_as_32bit, dead [[COPY74:%[0-9]+]]:edcl, dead [[COPY75:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo6]], [[COPY76]], [[COPY77]], [[COPY78]], [[COPY74]], undef %86:em_as_32bit, [[COPY79]], [[COPY80]], [[COPY75]] - ; CHECK-NEXT: [[COPY27:%[0-9]+]]:spill_edc_to_er = COPY [[COPY49]] - ; CHECK-NEXT: undef [[MOV_PD_imm11_pseudo4:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY67]].sub_lo_dim - ; CHECK-NEXT: [[COPY25:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo7]] - ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edcl = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_count - ; CHECK-NEXT: [[COPY23:%[0-9]+]]:spill_edj_to_er = COPY [[COPY63]] - ; CHECK-NEXT: [[COPY24:%[0-9]+]]:edch = COPY [[COPY64]] + ; CHECK-NEXT: [[COPY70:%[0-9]+]]:edjh = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY71:%[0-9]+]]:edch = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY72:%[0-9]+]]:em_as_32bit = COPY [[COPY69]].sub_mod + ; CHECK-NEXT: [[COPY73:%[0-9]+]]:ednl = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY74:%[0-9]+]]:edjl = COPY [[COPY69]].sub_dim_stride + ; CHECK-NEXT: [[COPY75:%[0-9]+]]:ednh = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep, [[COPY51:%[0-9]+]]:edcl, dead [[COPY71:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo10]], [[COPY72]], [[COPY73]], [[COPY74]], [[COPY51]], undef %72:em_as_32bit, [[COPY75]], [[COPY70]], [[COPY71]] + ; CHECK-NEXT: [[COPY76:%[0-9]+]]:edcl = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY77:%[0-9]+]]:edch = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY78:%[0-9]+]]:em_as_32bit = COPY [[COPY69]].sub_mod + ; CHECK-NEXT: [[COPY79:%[0-9]+]]:ednl = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY80:%[0-9]+]]:edjl = COPY [[COPY70]] + ; CHECK-NEXT: [[COPY81:%[0-9]+]]:ednh = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY82:%[0-9]+]]:edjh = COPY [[COPY70]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit, dead [[COPY76:%[0-9]+]]:edcl, dead [[COPY77:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY78]], [[COPY79]], [[COPY80]], [[COPY76]], undef %86:em_as_32bit, [[COPY81]], [[COPY82]], [[COPY77]] + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:spill_edc_to_er = COPY [[COPY51]] + ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY69]].sub_lo_dim + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo6]] + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:edcl = COPY [[COPY17]].sub_dim_count + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:spill_edj_to_er = COPY [[COPY65]] + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edch = COPY [[COPY66]] ; CHECK-NEXT: PseudoJNZ [[AND2]], %bb.3 ; CHECK-NEXT: PseudoJ_jump_imm %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY81:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo7]] - ; CHECK-NEXT: [[COPY82:%[0-9]+]]:spill_edc_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size - ; CHECK-NEXT: [[COPY83:%[0-9]+]]:spill_edn_to_er = COPY [[MOV_PD_imm11_pseudo4]].sub_dim_size + ; CHECK-NEXT: [[COPY83:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo6]] + ; CHECK-NEXT: [[COPY84:%[0-9]+]]:spill_edc_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY85:%[0-9]+]]:spill_edn_to_er = COPY [[COPY17]].sub_dim_size ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9: ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY84:%[0-9]+]]:edc = COPY [[COPY82]] - ; CHECK-NEXT: [[COPY85:%[0-9]+]]:edn = COPY [[COPY83]] - ; CHECK-NEXT: [[COPY86:%[0-9]+]]:edj = COPY [[COPY81]] - ; CHECK-NEXT: [[COPY87:%[0-9]+]]:em = COPY [[MOV_PD_imm11_pseudo7]] - ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:ep_as_32bit, dead [[COPY84:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo]], [[COPY87]], [[COPY85]], [[COPY86]], [[COPY84]] + ; CHECK-NEXT: [[COPY86:%[0-9]+]]:edc = COPY [[COPY84]] + ; CHECK-NEXT: [[COPY87:%[0-9]+]]:edn = COPY [[COPY85]] + ; CHECK-NEXT: [[COPY88:%[0-9]+]]:edj = COPY [[COPY83]] + ; CHECK-NEXT: [[COPY89:%[0-9]+]]:em = COPY [[MOV_PD_imm11_pseudo6]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:ep_as_32bit, dead [[COPY86:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo]], [[COPY89]], [[COPY87]], [[COPY88]], [[COPY86]] ; CHECK-NEXT: PseudoJ_jump_imm %bb.9 bb.0: successors: %bb.1(0x80000000) From 23b474e50278f32ae1dc8ab95965769cedaeb299 Mon Sep 17 00:00:00 2001 From: Niwin Anto Date: Tue, 12 May 2026 10:40:53 -0600 Subject: [PATCH 3/3] [AIEX] Restore spill-slot sharing after clearing stale split-from chain The earlier "Sever stale VRM split-from chain after super-reg rewrites" commit calls VRM.clearSplitFromReg() on descendants whose ancestor LiveInterval has gone stale, so SplitKit::defFromParent no longer rematerializes through it. A side effect is that InlineSpiller can no longer recognise sibling vregs as belonging to the same logical group (VRM.getOriginal() now returns the descendant itself), so each sibling is assigned its own stack slot instead of sharing one. Add a target hook TargetSubtargetInfo::getSpillGroupOriginal that lets a target advertise a "logical group original" for stack-slot sharing in InlineSpiller, independent of the VRM split-from chain. Defaults to no override. Plumb it through AIE: - AIEMachineFunctionInfo gains a side map (descendant -> pre-severance Original) with record/get accessors. - AIESuperRegUtils::clearStaleSplitFromMappings records the chain into that side map before calling VRM.clearSplitFromReg(). - AIEBaseSubtarget overrides getSpillGroupOriginal to forward to AIEMachineFunctionInfo. - Callers (AIE[Unallocated]SuperRegRewriter) updated to pass MF. --- .../llvm/CodeGen/TargetSubtargetInfo.h | 22 ++++++++++++- llvm/lib/CodeGen/InlineSpiller.cpp | 13 ++++++++ llvm/lib/Target/AIE/AIEBaseSubtarget.cpp | 12 +++++++ llvm/lib/Target/AIE/AIEBaseSubtarget.h | 7 ++++ llvm/lib/Target/AIE/AIEMachineFunctionInfo.h | 33 ++++++++++++++++++- llvm/lib/Target/AIE/AIESuperRegRewriter.cpp | 2 +- llvm/lib/Target/AIE/AIESuperRegUtils.cpp | 19 +++++++++-- llvm/lib/Target/AIE/AIESuperRegUtils.h | 9 ++++- .../AIE/AIEUnallocatedSuperRegRewriter.cpp | 2 +- 9 files changed, 111 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 9c36a354770c..57849133c753 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Modifications (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its +// Modifications (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its // affiliates // //===----------------------------------------------------------------------===// @@ -21,11 +21,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MacroFusion.h" #include "llvm/CodeGen/PBQPRAConstraint.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" #include +#include #include namespace llvm { @@ -141,6 +143,24 @@ class TargetSubtargetInfo : public MCSubtargetInfo { return nullptr; } + /// Optional target hook used by InlineSpiller to recover a "logical group + /// original" for stack-slot sharing when a target pass has deliberately + /// severed the VirtRegMap split-from chain (via clearSplitFromReg) for + /// correctness reasons (e.g. to stop SplitKit::defFromParent from + /// rematerializing through a stale ancestor LiveInterval). + /// + /// If the target returns a valid Register \p R, InlineSpiller will use + /// \p R as the "Original" for stack-slot sharing (HoistSpillHelper / + /// MergeableSpills) instead of VRM.getOriginal(VirtReg). The returned + /// register must still have a valid LiveInterval; otherwise the override + /// is ignored. + /// + /// Default: no override. + virtual std::optional + getSpillGroupOriginal(const MachineFunction &MF, Register VirtReg) const { + return std::nullopt; + } + /// Resolve a SchedClass at runtime, where SchedClass identifies an /// MCSchedClassDesc with the isVariant property. This may return the ID of /// another variant SchedClass, but repeated invocation must quickly terminate diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 302dd37ff3d6..bcd69451a465 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -4,6 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // +// Modifications (c) Copyright 2026 Advanced Micro Devices, Inc. or its +// affiliates +// //===----------------------------------------------------------------------===// // // The inline spiller modifies the machine function directly instead of @@ -1289,6 +1292,16 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { "Trying to spill a stack slot."); // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); + // Allow the target to redirect this lookup. Some target passes deliberately + // sever the VirtRegMap split-from chain (clearSplitFromReg) for correctness + // (e.g. to stop SplitKit from rematerializing through a stale ancestor LI), + // but still want spills of the descendants to share a stack slot with the + // logical group's original. The hook returns that "logical group original" + // when it should be used here. + if (auto SyntheticOrig = + MF.getSubtarget().getSpillGroupOriginal(MF, Original)) + if (LIS.hasInterval(*SyntheticOrig)) + Original = *SyntheticOrig; StackSlot = VRM.getStackSlot(Original); StackInt = nullptr; diff --git a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp index a1f1d86ae80f..bf08fdbf2ff4 100644 --- a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp +++ b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp @@ -16,6 +16,7 @@ #include "AIE.h" #include "AIEBaseRegisterInfo.h" #include "AIEInterBlockScheduling.h" +#include "AIEMachineFunctionInfo.h" #include "AIEMachineScheduler.h" #include "AIEMaxLatencyFinder.h" #include "AIERegMemEventTracker.h" @@ -158,6 +159,17 @@ const AIEBaseSubtarget &AIEBaseSubtarget::get(const MachineFunction &MF) { return static_cast(MF.getSubtarget()); } +std::optional +AIEBaseSubtarget::getSpillGroupOriginal(const MachineFunction &MF, + Register VirtReg) const { + // The MFI is created lazily; if for some reason the MF has not allocated one + // (e.g. very early in pipeline), there is nothing to look up. + const auto *MFI = MF.getInfo(); + if (!MFI) + return std::nullopt; + return MFI->getSpillGroupOriginal(VirtReg); +} + namespace { // Set latency and declare height/depth dirty if it changes diff --git a/llvm/lib/Target/AIE/AIEBaseSubtarget.h b/llvm/lib/Target/AIE/AIEBaseSubtarget.h index 22f4d8f3c385..9eaa4b29d920 100644 --- a/llvm/lib/Target/AIE/AIEBaseSubtarget.h +++ b/llvm/lib/Target/AIE/AIEBaseSubtarget.h @@ -96,6 +96,13 @@ class AIEBaseSubtarget : public TargetSubtargetInfo { // All AIE targets need post scheduling for correct instruction timing bool forcePostRAScheduling() const override { return true; } + + /// See TargetSubtargetInfo::getSpillGroupOriginal. Forwards to the side map + /// in AIEMachineFunctionInfo populated by AIE register-rewriter passes when + /// they sever the VRM split-from chain for correctness. + std::optional + getSpillGroupOriginal(const MachineFunction &MF, + Register VirtReg) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h b/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h index cb7acc3f1520..9a1144043679 100644 --- a/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h +++ b/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // @@ -15,9 +15,11 @@ #ifndef LLVM_LIB_TARGET_AIE_AIEMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_AIE_AIEMACHINEFUNCTIONINFO_H +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Register.h" #include namespace llvm { @@ -74,6 +76,17 @@ class AIEMachineFunctionInfo : public MachineFunctionInfo { const TileMemoryPSV TileMemory; + /// Side map: descendant vreg -> "logical group original" (the VRM Original + /// before the split-from chain was deliberately severed by an AIE + /// register-rewriter pass). + /// + /// The chain is severed for correctness so that SplitKit::defFromParent + /// no longer rematerializes through the now-stale ancestor LiveInterval. + /// However, InlineSpiller still wants to share a stack slot among all + /// descendants of that group. This map remembers the pre-severance original + /// so getSpillGroupOriginal() can answer that lookup for InlineSpiller. + DenseMap SpillGroupOriginal; + public: // AIEMachineFunctionInfo() = default; @@ -89,6 +102,24 @@ class AIEMachineFunctionInfo : public MachineFunctionInfo { unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } void setBytesInStackArgArea(unsigned bytes) { BytesInStackArgArea = bytes; } + /// Record that \p Descendant logically belongs to the spill group whose + /// "original" is \p OldOriginal. Called by AIE register-rewriter passes + /// just before they sever the VRM split-from chain for \p Descendant. + void recordSpillGroupOriginal(Register Descendant, Register OldOriginal) { + SpillGroupOriginal[Descendant] = OldOriginal; + } + + /// Return the "logical group original" recorded for \p V, if any. Used by + /// the AIEBaseSubtarget override of + /// TargetSubtargetInfo::getSpillGroupOriginal which InlineSpiller consults + /// for stack-slot sharing. + std::optional getSpillGroupOriginal(Register V) const { + auto It = SpillGroupOriginal.find(V); + if (It == SpillGroupOriginal.end()) + return std::nullopt; + return It->second; + } + MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap &Src2DstMBB) diff --git a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp index 1b1fd0ef9e8d..b510b4c210d4 100644 --- a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp @@ -128,7 +128,7 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { } // Prevent SplitKit from rematerializing through stale ancestor LIs. - AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MRI, VRM); + AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MF, MRI, VRM); LLVM_DEBUG(VRM.dump()); return !AssignedPhysRegs.empty(); diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp index c0fea76575e7..e5da7e4caebf 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp @@ -10,6 +10,7 @@ #include "AIESuperRegUtils.h" #include "AIEBaseInstrInfo.h" #include "AIEBaseRegisterInfo.h" +#include "AIEMachineFunctionInfo.h" #include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" @@ -405,10 +406,17 @@ bool isRegUsedBy2DOr3DInstruction(const MachineRegisterInfo &MRI, } void clearStaleSplitFromMappings(const SmallSet &TaintedOriginals, - MachineRegisterInfo &MRI, VirtRegMap &VRM) { + MachineFunction &MF, MachineRegisterInfo &MRI, + VirtRegMap &VRM) { if (TaintedOriginals.empty()) return; + // Record the pre-severance "logical group original" in the target-side + // side map so that InlineSpiller (via TargetSubtargetInfo:: + // getSpillGroupOriginal) can still merge sibling spills onto a shared + // stack slot after we cut the VRM split-from chain below. + auto *MFI = MF.getInfo(); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { const Register V = Register::index2VirtReg(I); if (MRI.reg_nodbg_empty(V)) @@ -420,10 +428,15 @@ void clearStaleSplitFromMappings(const SmallSet &TaintedOriginals, LLVM_DEBUG({ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); dbgs() << " Clearing stale split-from for " << printReg(V, TRI, 0, &MRI) - << " (was split from " << printReg(Orig, TRI, 0, &MRI) << ")\n"; + << " (was split from " << printReg(Orig, TRI, 0, &MRI) + << "); recorded for spill-group sharing\n"; }); + // Remember the chain so InlineSpiller can still group V's spills with + // the rest of Orig's descendants on a shared stack slot. + if (MFI) + MFI->recordSpillGroupOriginal(V, Orig); // Restore V to the canonical "no split parent" state so getOriginal(V)==V - // and isAssignedReg() does not treat V as a split product. + // and SplitKit::defFromParent stops consulting the (stale) ancestor LI. VRM.clearSplitFromReg(V); } } diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.h b/llvm/lib/Target/AIE/AIESuperRegUtils.h index 03c2263ba7db..b1e6d00ef78d 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.h +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.h @@ -19,6 +19,7 @@ namespace llvm { class Register; +class MachineFunction; class MachineRegisterInfo; struct AIEBaseRegisterInfo; class MachineInstr; @@ -98,8 +99,14 @@ void repairLiveIntervals(SmallSet &RegistersToRepair, /// | split-from /// %141..%144 (future split greedy splits will use %35's LI /// would consult %0's LI) instead of %0's +/// +/// Before severing, the pre-severance Original is recorded in +/// AIEMachineFunctionInfo's spill-group side map so that InlineSpiller (via +/// TargetSubtargetInfo::getSpillGroupOriginal) can still merge sibling spills +/// of these descendants onto a shared stack slot. void clearStaleSplitFromMappings(const SmallSet &TaintedOriginals, - MachineRegisterInfo &MRI, VirtRegMap &VRM); + MachineFunction &MF, MachineRegisterInfo &MRI, + VirtRegMap &VRM); } // namespace llvm::AIESuperRegUtils diff --git a/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp index 4c14ef24605c..66f973b745b1 100644 --- a/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp @@ -244,7 +244,7 @@ bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { DebugVars); // Prevent SplitKit from rematerializing through stale ancestor LIs. - AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MRI, VRM); + AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MF, MRI, VRM); LLVM_DEBUG(dbgs() << "Successfully rewrote " << Info.RewritableRegs.size() << " register(s)\n");