diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 9c36a354770c..57849133c753 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Modifications (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its +// Modifications (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its // affiliates // //===----------------------------------------------------------------------===// @@ -21,11 +21,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MacroFusion.h" #include "llvm/CodeGen/PBQPRAConstraint.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" #include +#include #include namespace llvm { @@ -141,6 +143,24 @@ class TargetSubtargetInfo : public MCSubtargetInfo { return nullptr; } + /// Optional target hook used by InlineSpiller to recover a "logical group + /// original" for stack-slot sharing when a target pass has deliberately + /// severed the VirtRegMap split-from chain (via clearSplitFromReg) for + /// correctness reasons (e.g. to stop SplitKit::defFromParent from + /// rematerializing through a stale ancestor LiveInterval). + /// + /// If the target returns a valid Register \p R, InlineSpiller will use + /// \p R as the "Original" for stack-slot sharing (HoistSpillHelper / + /// MergeableSpills) instead of VRM.getOriginal(VirtReg). The returned + /// register must still have a valid LiveInterval; otherwise the override + /// is ignored. + /// + /// Default: no override. + virtual std::optional + getSpillGroupOriginal(const MachineFunction &MF, Register VirtReg) const { + return std::nullopt; + } + /// Resolve a SchedClass at runtime, where SchedClass identifies an /// MCSchedClassDesc with the isVariant property. This may return the ID of /// another variant SchedClass, but repeated invocation must quickly terminate diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index 2e7545312c87..dfef831d40e8 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// Modifications (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its +// Modifications (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its // affiliates // //===----------------------------------------------------------------------===// @@ -176,6 +176,14 @@ class VirtRegMap { } } + /// clearSplitFromReg - Remove the split-from mapping for virtReg, + /// making it its own original. This restores the register to the + /// same canonical state as a freshly created vreg (no split parent). + void clearSplitFromReg(Register virtReg) { + assert(virtReg.isVirtual()); + Virt2SplitMap[virtReg] = Register(); + } + /// returns the live interval virtReg is split from. Register getPreSplitReg(Register virtReg) const { return Virt2SplitMap[virtReg]; diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 302dd37ff3d6..bcd69451a465 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -4,6 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // +// Modifications (c) Copyright 2026 Advanced Micro Devices, Inc. or its +// affiliates +// //===----------------------------------------------------------------------===// // // The inline spiller modifies the machine function directly instead of @@ -1289,6 +1292,16 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { "Trying to spill a stack slot."); // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); + // Allow the target to redirect this lookup. Some target passes deliberately + // sever the VirtRegMap split-from chain (clearSplitFromReg) for correctness + // (e.g. to stop SplitKit from rematerializing through a stale ancestor LI), + // but still want spills of the descendants to share a stack slot with the + // logical group's original. The hook returns that "logical group original" + // when it should be used here. + if (auto SyntheticOrig = + MF.getSubtarget().getSpillGroupOriginal(MF, Original)) + if (LIS.hasInterval(*SyntheticOrig)) + Original = *SyntheticOrig; StackSlot = VRM.getStackSlot(Original); StackInt = nullptr; diff --git a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp index a1f1d86ae80f..bf08fdbf2ff4 100644 --- a/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp +++ b/llvm/lib/Target/AIE/AIEBaseSubtarget.cpp @@ -16,6 +16,7 @@ #include "AIE.h" #include "AIEBaseRegisterInfo.h" #include "AIEInterBlockScheduling.h" +#include "AIEMachineFunctionInfo.h" #include "AIEMachineScheduler.h" #include "AIEMaxLatencyFinder.h" #include "AIERegMemEventTracker.h" @@ -158,6 +159,17 @@ const AIEBaseSubtarget &AIEBaseSubtarget::get(const MachineFunction &MF) { return static_cast(MF.getSubtarget()); } +std::optional +AIEBaseSubtarget::getSpillGroupOriginal(const MachineFunction &MF, + Register VirtReg) const { + // The MFI is created lazily; if for some reason the MF has not allocated one + // (e.g. very early in pipeline), there is nothing to look up. + const auto *MFI = MF.getInfo(); + if (!MFI) + return std::nullopt; + return MFI->getSpillGroupOriginal(VirtReg); +} + namespace { // Set latency and declare height/depth dirty if it changes diff --git a/llvm/lib/Target/AIE/AIEBaseSubtarget.h b/llvm/lib/Target/AIE/AIEBaseSubtarget.h index 22f4d8f3c385..9eaa4b29d920 100644 --- a/llvm/lib/Target/AIE/AIEBaseSubtarget.h +++ b/llvm/lib/Target/AIE/AIEBaseSubtarget.h @@ -96,6 +96,13 @@ class AIEBaseSubtarget : public TargetSubtargetInfo { // All AIE targets need post scheduling for correct instruction timing bool forcePostRAScheduling() const override { return true; } + + /// See TargetSubtargetInfo::getSpillGroupOriginal. Forwards to the side map + /// in AIEMachineFunctionInfo populated by AIE register-rewriter passes when + /// they sever the VRM split-from chain for correctness. + std::optional + getSpillGroupOriginal(const MachineFunction &MF, + Register VirtReg) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h b/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h index cb7acc3f1520..9a1144043679 100644 --- a/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h +++ b/llvm/lib/Target/AIE/AIEMachineFunctionInfo.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // @@ -15,9 +15,11 @@ #ifndef LLVM_LIB_TARGET_AIE_AIEMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_AIE_AIEMACHINEFUNCTIONINFO_H +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Register.h" #include namespace llvm { @@ -74,6 +76,17 @@ class AIEMachineFunctionInfo : public MachineFunctionInfo { const TileMemoryPSV TileMemory; + /// Side map: descendant vreg -> "logical group original" (the VRM Original + /// before the split-from chain was deliberately severed by an AIE + /// register-rewriter pass). + /// + /// The chain is severed for correctness so that SplitKit::defFromParent + /// no longer rematerializes through the now-stale ancestor LiveInterval. + /// However, InlineSpiller still wants to share a stack slot among all + /// descendants of that group. This map remembers the pre-severance original + /// so getSpillGroupOriginal() can answer that lookup for InlineSpiller. + DenseMap SpillGroupOriginal; + public: // AIEMachineFunctionInfo() = default; @@ -89,6 +102,24 @@ class AIEMachineFunctionInfo : public MachineFunctionInfo { unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } void setBytesInStackArgArea(unsigned bytes) { BytesInStackArgArea = bytes; } + /// Record that \p Descendant logically belongs to the spill group whose + /// "original" is \p OldOriginal. Called by AIE register-rewriter passes + /// just before they sever the VRM split-from chain for \p Descendant. + void recordSpillGroupOriginal(Register Descendant, Register OldOriginal) { + SpillGroupOriginal[Descendant] = OldOriginal; + } + + /// Return the "logical group original" recorded for \p V, if any. Used by + /// the AIEBaseSubtarget override of + /// TargetSubtargetInfo::getSpillGroupOriginal which InlineSpiller consults + /// for stack-slot sharing. + std::optional getSpillGroupOriginal(Register V) const { + auto It = SpillGroupOriginal.find(V); + if (It == SpillGroupOriginal.end()) + return std::nullopt; + return It->second; + } + MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap &Src2DstMBB) diff --git a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp index 44e84037df91..b510b4c210d4 100644 --- a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2026 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// @@ -114,6 +114,11 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { } } + // Snapshot Originals whose LI is about to go stale. + SmallSet TaintedOriginals; + for (auto &[VReg, _] : AssignedPhysRegs) + TaintedOriginals.insert(VRM.getOriginal(VReg)); + // Re-write all the collected VRegs for (auto &[VReg, PhysRegAndSubRegs] : AssignedPhysRegs) { const Register PhysReg = PhysRegAndSubRegs.first; @@ -122,6 +127,9 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { LRM, LIS, Indexes, DebugVars); } + // Prevent SplitKit from rematerializing through stale ancestor LIs. + AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MF, MRI, VRM); + LLVM_DEBUG(VRM.dump()); return !AssignedPhysRegs.empty(); } diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp index 0d9269ce0694..e5da7e4caebf 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.cpp @@ -10,6 +10,7 @@ #include "AIESuperRegUtils.h" #include "AIEBaseInstrInfo.h" #include "AIEBaseRegisterInfo.h" +#include "AIEMachineFunctionInfo.h" #include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" @@ -19,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "aie-ra" @@ -376,16 +378,6 @@ void rewriteSuperReg(Register Reg, std::optional AssignedPhysReg, // Step 4: Remove the original register's live interval LIS.removeInterval(Reg); - // Step 4b: Clear stale ancestor live intervals. The operand rewrite in - // step 3 modified instructions in-place (e.g., stripping sub-register - // indices). Any ancestor register in the VRM split chain still has VNInfos - // pointing to those instruction slots. If a later Greedy pass traces back - // via VRM.getOriginal(), it would find a stale instruction and could produce - // an invalid rematerialization. Clearing the ancestor interval prevents this. - Register Original = VRM.getOriginal(Reg); - if (Original != Reg && LIS.hasInterval(Original)) - LIS.getInterval(Original).clear(); - // Step 5: Filter out empty subregisters markEffectiveEmptyCopiesDead(SubRegToVReg, MRI, TRI, LIS); @@ -413,6 +405,42 @@ bool isRegUsedBy2DOr3DInstruction(const MachineRegisterInfo &MRI, }); } +void clearStaleSplitFromMappings(const SmallSet &TaintedOriginals, + MachineFunction &MF, MachineRegisterInfo &MRI, + VirtRegMap &VRM) { + if (TaintedOriginals.empty()) + return; + + // Record the pre-severance "logical group original" in the target-side + // side map so that InlineSpiller (via TargetSubtargetInfo:: + // getSpillGroupOriginal) can still merge sibling spills onto a shared + // stack slot after we cut the VRM split-from chain below. + auto *MFI = MF.getInfo(); + + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + const Register V = Register::index2VirtReg(I); + if (MRI.reg_nodbg_empty(V)) + continue; + const Register Orig = VRM.getPreSplitReg(V); + if (!Orig || !TaintedOriginals.count(Orig)) + continue; + + LLVM_DEBUG({ + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + dbgs() << " Clearing stale split-from for " << printReg(V, TRI, 0, &MRI) + << " (was split from " << printReg(Orig, TRI, 0, &MRI) + << "); recorded for spill-group sharing\n"; + }); + // Remember the chain so InlineSpiller can still group V's spills with + // the rest of Orig's descendants on a shared stack slot. + if (MFI) + MFI->recordSpillGroupOriginal(V, Orig); + // Restore V to the canonical "no split parent" state so getOriginal(V)==V + // and SplitKit::defFromParent stops consulting the (stale) ancestor LI. + VRM.clearSplitFromReg(V); + } +} + void repairLiveIntervals(SmallSet &RegistersToRepair, VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS) { diff --git a/llvm/lib/Target/AIE/AIESuperRegUtils.h b/llvm/lib/Target/AIE/AIESuperRegUtils.h index 74d592f66c95..b1e6d00ef78d 100644 --- a/llvm/lib/Target/AIE/AIESuperRegUtils.h +++ b/llvm/lib/Target/AIE/AIESuperRegUtils.h @@ -19,6 +19,7 @@ namespace llvm { class Register; +class MachineFunction; class MachineRegisterInfo; struct AIEBaseRegisterInfo; class MachineInstr; @@ -84,6 +85,29 @@ void repairLiveIntervals(SmallSet &RegistersToRepair, VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS); +/// Sever VRM split-from chain for descendants of \p TaintedOriginals so that +/// SplitKit::defFromParent consults the descendant's own (repaired) LI, not +/// the stale ancestor LI which may still hold VNs at slots whose MIs were +/// rewritten/unbundled by an AIE register-rewriter pass. Each affected +/// descendant is restored via VRM.clearSplitFromReg() to the canonical +/// "no split parent" state of a freshly created vreg. +/// +/// before: after: +/// %0 (stale LI) %0 (stale LI, ignored) +/// | split-from x (chain cut) +/// %35 ----. %35 (no split parent) +/// | split-from +/// %141..%144 (future split greedy splits will use %35's LI +/// would consult %0's LI) instead of %0's +/// +/// Before severing, the pre-severance Original is recorded in +/// AIEMachineFunctionInfo's spill-group side map so that InlineSpiller (via +/// TargetSubtargetInfo::getSpillGroupOriginal) can still merge sibling spills +/// of these descendants onto a shared stack slot. +void clearStaleSplitFromMappings(const SmallSet &TaintedOriginals, + MachineFunction &MF, MachineRegisterInfo &MRI, + VirtRegMap &VRM); + } // namespace llvm::AIESuperRegUtils #endif diff --git a/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp index 28be72b3963a..66f973b745b1 100644 --- a/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIEUnallocatedSuperRegRewriter.cpp @@ -229,6 +229,13 @@ bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { return false; } + // Snapshot Originals whose LI is about to go stale. + SmallSet TaintedOriginals; + for (auto &P : Info.ExpandableRegs) + TaintedOriginals.insert(VRM.getOriginal(P.first)); + for (auto &P : Info.RewritableRegs) + TaintedOriginals.insert(VRM.getOriginal(P.first)); + LLVM_DEBUG(dbgs() << "Expanding copy bundles...\n"); expandCopyBundles(Info.ExpandableRegs, MRI, Indexes, LIS, VRM, LRM); @@ -236,6 +243,9 @@ bool AIEUnallocatedSuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { rewriteCandidates(Info.RewritableRegs, MRI, TRI, VRM, LRM, LIS, Indexes, DebugVars); + // Prevent SplitKit from rematerializing through stale ancestor LIs. + AIESuperRegUtils::clearStaleSplitFromMappings(TaintedOriginals, MF, MRI, VRM); + LLVM_DEBUG(dbgs() << "Successfully rewrote " << Info.RewritableRegs.size() << " register(s)\n"); diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir b/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir index 580d3cf4e361..08e9748db639 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/staged-ra-stale-remat.mir @@ -1,11 +1,11 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # This file is licensed under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates - # RUN: llc -mtriple=aie2p -start-before=greedy %s -o /dev/null --filetype=obj - +# RUN: llc -mtriple=aie2p -start-before=greedy -stop-before=virtregrewriter %s -o - | FileCheck %s # Verify that staged register allocation does not produce an invalid # rematerialization of a MOV_PD_imm11_pseudo into a composed eds (3D) # register. The bug occurs when rewriteSuperReg (in AIESuperRegUtils) rewrites @@ -43,10 +43,221 @@ frameInfo: savePoint: '' restorePoint: '' body: | + ; CHECK-LABEL: name: stale_remat_test + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: [[VBCST_32_:%[0-9]+]]:vec512 = VBCST_32 [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: VST_dmx_sts_x_spill [[VBCST_32_]], %stack.1, implicit $sp :: (store (s512) into %stack.1) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: dead [[VBCST_32_1:%[0-9]+]]:vec512 = VBCST_32 [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[VBCST_32_2:%[0-9]+]]:vec512 = VBCST_32 [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: VST_dmx_sts_x_spill [[VBCST_32_2]], %stack.0, implicit $sp :: (store (s512) into %stack.0) + ; CHECK-NEXT: $p0 = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: PseudoJL_IND [[MOV_PD_imm11_pseudo]], csr_aie2p, implicit-def $lr, implicit $p0 + ; CHECK-NEXT: [[VLDA_512_COMPOSED_REG_SPILL:%[0-9]+]]:vec512 = VLDA_512_COMPOSED_REG_SPILL %stack.1, implicit $sp :: (load (s512) from %stack.1) + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edn_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:magusrc_and_magudst_and_spill_edj_to_er = MOV_PD_imm11_pseudo 128 + ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub_dim_stride:eds = COPY [[MOV_PD_imm11_pseudo2]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:spill_em_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:spill_edj_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:spill_edj_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:spill_edc_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:spill_edc_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:spill_edn_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: undef [[COPY8:%[0-9]+]].sub_dim_size:eds = COPY [[COPY1]].sub_dim_size { + ; CHECK-NEXT: internal [[COPY8]].sub_dim_stride:eds = COPY [[COPY1]].sub_dim_stride + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:spill_edn_to_er = COPY [[COPY1]].sub_dim_size + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[XOR:%[0-9]+]]:er = XOR [[COPY]], [[MOV_RLC_imm11_pseudo1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[AND:%[0-9]+]]:er = AND [[XOR]], [[MOV_RLC_imm11_pseudo1]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:edcl = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:ednl = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:edjl = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:em_as_32bit = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:edch = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:ednh = COPY [[COPY9]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:edjh = COPY [[COPY4]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:ep, dead [[COPY10:%[0-9]+]]:edcl, dead [[COPY14:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo3]], [[COPY13]], [[COPY11]], [[COPY12]], [[COPY10]], undef %48:em_as_32bit, [[COPY15]], [[COPY16]], [[COPY14]] + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[VLDA_512_COMPOSED_REG_SPILL]], [[MOV_PD_imm11_pseudo3]], 0 :: (store (<64 x s16>)) + ; CHECK-NEXT: PseudoJNZ [[AND]], %bb.1 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 1 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:er = AND [[COPY]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub_dim_size:eds = COPY [[COPY8]].sub_dim_size { + ; CHECK-NEXT: internal [[COPY17]].sub_dim_stride:eds = COPY [[COPY8]].sub_dim_stride + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub_mod:eds = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:spill_edc_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:edjl = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:edjh = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:spill_edj_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edch = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:edch = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:edcl = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:spill_edc_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:magusrc_and_magudst_and_spill_em_to_er = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo3:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: [[VLDA_512_COMPOSED_REG_SPILL1:%[0-9]+]]:vec512 = VLDA_512_COMPOSED_REG_SPILL %stack.0, implicit $sp :: (load (s512) from %stack.0) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJNZ [[AND1]], %bb.5 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY30:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[COPY31:%[0-9]+]].sub_dim_size:eds = COPY [[COPY17]].sub_dim_size { + ; CHECK-NEXT: internal [[COPY31]].sub_dim_stride:eds = COPY [[COPY17]].sub_dim_stride + ; CHECK-NEXT: internal [[COPY31]].sub_mod:eds = COPY [[COPY17]].sub_mod + ; CHECK-NEXT: } + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:spill_em_to_er = COPY [[COPY31]].sub_mod + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:spill_edc_to_er = COPY [[COPY26]] + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:edcl = COPY [[COPY20]] + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:ednl = COPY [[COPY33]] + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:edjl = COPY [[COPY18]] + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:em_as_32bit = COPY [[COPY32]] + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:edch = COPY [[COPY35]] + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:ednh = COPY [[COPY34]] + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:edjh = COPY [[COPY19]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo7:%[0-9]+]]:ep, [[COPY36:%[0-9]+]]:edcl, dead [[COPY40:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo7]], [[COPY39]], [[COPY37]], [[COPY38]], [[COPY36]], undef %64:em_as_32bit, [[COPY41]], [[COPY42]], [[COPY40]] + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:em_as_32bit = COPY [[COPY31]].sub_mod + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:ednl = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:ednh = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit, [[COPY28:%[0-9]+]]:edcl, dead [[COPY27:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo4]], [[COPY43]], [[COPY44]], [[COPY21]], [[COPY28]], undef %56:em_as_32bit, [[COPY45]], [[COPY22]], [[COPY27]] + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:spill_edc_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:spill_em_to_er = COPY [[COPY31]].sub_mod + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY48:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:spill_edn_to_er = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: undef [[COPY50:%[0-9]+]].sub_dim_size:eds = COPY [[COPY31]].sub_dim_size + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_dim_stride:eds = COPY [[COPY31]].sub_dim_stride + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_mod:eds = COPY [[COPY31]].sub_mod + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:edcl = COPY [[COPY29]] + ; CHECK-NEXT: [[COPY52:%[0-9]+]]:ednl = COPY [[COPY48]] + ; CHECK-NEXT: [[COPY53:%[0-9]+]]:edjl = COPY [[COPY23]] + ; CHECK-NEXT: [[COPY54:%[0-9]+]]:em_as_32bit = COPY [[COPY47]] + ; CHECK-NEXT: [[COPY55:%[0-9]+]]:edch = COPY [[COPY46]] + ; CHECK-NEXT: [[COPY56:%[0-9]+]]:ednh = COPY [[COPY49]] + ; CHECK-NEXT: [[COPY57:%[0-9]+]]:edjh = COPY [[COPY24]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo8:%[0-9]+]]:ep, [[COPY51:%[0-9]+]]:edcl, dead [[COPY55:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo8]], [[COPY54]], [[COPY52]], [[COPY53]], [[COPY51]], undef %72:em_as_32bit, [[COPY56]], [[COPY57]], [[COPY55]] + ; CHECK-NEXT: [[COPY58:%[0-9]+]]:edjh = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY59:%[0-9]+]]:em_as_32bit = COPY [[COPY50]].sub_mod + ; CHECK-NEXT: [[COPY60:%[0-9]+]]:ednl = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY61:%[0-9]+]]:edcl = COPY [[COPY36]] + ; CHECK-NEXT: [[COPY62:%[0-9]+]]:ednh = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY63:%[0-9]+]]:edch = COPY [[COPY46]] + ; CHECK-NEXT: [[COPY64:%[0-9]+]]:edj_as_32bit = COPY [[COPY25]] + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo9:%[0-9]+]]:ep, dead [[COPY61:%[0-9]+]]:edcl, [[COPY63:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo9]], [[COPY59]], [[COPY60]], [[COPY64]], [[COPY61]], undef %94:em_as_32bit, [[COPY62]], [[COPY58]], [[COPY63]] + ; CHECK-NEXT: [[COPY65:%[0-9]+]]:spill_edj_to_er = COPY [[COPY64]] + ; CHECK-NEXT: [[COPY66:%[0-9]+]]:spill_edc_to_er = COPY [[COPY63]] + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[VLDA_512_COMPOSED_REG_SPILL1]], [[MOV_PD_imm11_pseudo5]], 0 :: (store (<32 x s16>)) + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_dim_count:eds = COPY [[COPY28]] + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY50]].sub_dim_size + ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY46]] + ; CHECK-NEXT: undef [[COPY67:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY50]].sub_lo_dim + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY50]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY50]].sub_hi_dim_then_sub_dim_size + ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY50]].sub_hi_dim_then_sub_dim_stride + ; CHECK-NEXT: PseudoJNZ [[AND1]], %bb.7 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[COPY68:%[0-9]+]]:erf2 = COPY [[MOV_RLC_imm11_pseudo3]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:er = XOR [[COPY]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: undef [[COPY69:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY67]].sub_lo_dim + ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[COPY67]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[COPY67]].sub_hi_dim_then_sub_dim_size + ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_stride:eds = COPY [[COPY67]].sub_hi_dim_then_sub_dim_stride + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:ep_as_32bit, [[COPY69:%[0-9]+]].sub_dim_count:eds, [[COPY69:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo4]], [[COPY69]].sub_mod, [[COPY69]].sub_dim_size, [[COPY69]].sub_dim_stride, [[COPY69]].sub_dim_count, undef [[COPY69]].sub_hi_dim_then_sub_mod, [[COPY69]].sub_hi_dim_then_sub_dim_size, [[COPY69]].sub_hi_dim_then_sub_dim_stride, [[COPY69]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:er = AND [[XOR1]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: [[COPY70:%[0-9]+]]:edjh = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY71:%[0-9]+]]:edch = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY72:%[0-9]+]]:em_as_32bit = COPY [[COPY69]].sub_mod + ; CHECK-NEXT: [[COPY73:%[0-9]+]]:ednl = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY74:%[0-9]+]]:edjl = COPY [[COPY69]].sub_dim_stride + ; CHECK-NEXT: [[COPY75:%[0-9]+]]:ednh = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: dead [[MOV_PD_imm11_pseudo10:%[0-9]+]]:ep, [[COPY51:%[0-9]+]]:edcl, dead [[COPY71:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo10]], [[COPY72]], [[COPY73]], [[COPY74]], [[COPY51]], undef %72:em_as_32bit, [[COPY75]], [[COPY70]], [[COPY71]] + ; CHECK-NEXT: [[COPY76:%[0-9]+]]:edcl = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY77:%[0-9]+]]:edch = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit = MOV_PD_imm11_pseudo 0 + ; CHECK-NEXT: [[COPY78:%[0-9]+]]:em_as_32bit = COPY [[COPY69]].sub_mod + ; CHECK-NEXT: [[COPY79:%[0-9]+]]:ednl = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY80:%[0-9]+]]:edjl = COPY [[COPY70]] + ; CHECK-NEXT: [[COPY81:%[0-9]+]]:ednh = COPY [[COPY69]].sub_dim_size + ; CHECK-NEXT: [[COPY82:%[0-9]+]]:edjh = COPY [[COPY70]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:ep_as_32bit, dead [[COPY76:%[0-9]+]]:edcl, dead [[COPY77:%[0-9]+]]:edch = PADD_3D_pseudo_split [[MOV_PD_imm11_pseudo5]], [[COPY78]], [[COPY79]], [[COPY80]], [[COPY76]], undef %86:em_as_32bit, [[COPY81]], [[COPY82]], [[COPY77]] + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:spill_edc_to_er = COPY [[COPY51]] + ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub_lo_dim:eds = COPY [[COPY69]].sub_lo_dim + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:edch = COPY [[MOV_PD_imm11_pseudo6]] + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:edcl = COPY [[COPY17]].sub_dim_count + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:spill_edj_to_er = COPY [[COPY65]] + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:edch = COPY [[COPY66]] + ; CHECK-NEXT: PseudoJNZ [[AND2]], %bb.3 + ; CHECK-NEXT: PseudoJ_jump_imm %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY83:%[0-9]+]]:spill_edj_to_er = COPY [[MOV_PD_imm11_pseudo6]] + ; CHECK-NEXT: [[COPY84:%[0-9]+]]:spill_edc_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: [[COPY85:%[0-9]+]]:spill_edn_to_er = COPY [[COPY17]].sub_dim_size + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY86:%[0-9]+]]:edc = COPY [[COPY84]] + ; CHECK-NEXT: [[COPY87:%[0-9]+]]:edn = COPY [[COPY85]] + ; CHECK-NEXT: [[COPY88:%[0-9]+]]:edj = COPY [[COPY83]] + ; CHECK-NEXT: [[COPY89:%[0-9]+]]:em = COPY [[MOV_PD_imm11_pseudo6]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:ep_as_32bit, dead [[COPY86:%[0-9]+]]:edc = PADD_2D_pseudo_split [[MOV_PD_imm11_pseudo]], [[COPY89]], [[COPY87]], [[COPY88]], [[COPY86]] + ; CHECK-NEXT: PseudoJ_jump_imm %bb.9 bb.0: successors: %bb.1(0x80000000) liveins: $r0 - + undef %103.sub_dim_size:eds = MOV_PD_imm11_pseudo 0 %18:erf2 = MOV_RLC_imm11_pseudo 0 %103.sub_dim_stride:eds = MOV_PD_imm11_pseudo 128 @@ -68,10 +279,10 @@ body: | %127.sub_hi_dim_then_sub_dim_count:eds = COPY %103.sub_dim_size %127.sub_dim_size:eds = COPY %103.sub_dim_size %127.sub_hi_dim_then_sub_dim_size:eds = COPY %103.sub_dim_size - + bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - + %84:er = XOR %1, %85 %3:ep = MOV_PD_imm11_pseudo 0 %81:er = AND %84, %85 @@ -80,10 +291,10 @@ body: | VST_dmx_sts_x_idx_imm %7, %3, 0 :: (store (<64 x s16>)) PseudoJNZ %81, %bb.1 PseudoJ_jump_imm %bb.2 - + bb.2: successors: %bb.3(0x80000000) - + %87:er = MOV_RLC_imm11_pseudo 1 %80:er = AND %1, %87 %103.sub_mod:eds = COPY %103.sub_dim_size @@ -101,21 +312,21 @@ body: | %117.sub_hi_dim_then_sub_dim_count:eds = COPY %103.sub_dim_size %117.sub_dim_count:eds = COPY %103.sub_dim_size %112.sub_dim_count:eds = COPY %103.sub_dim_size - + bb.3: successors: %bb.5(0x40000000), %bb.4(0x40000000) - + PseudoJNZ %80, %bb.5 PseudoJ_jump_imm %bb.4 - + bb.4: successors: %bb.5(0x80000000) - + %126:erf2 = COPY %18 - + bb.5: successors: %bb.7(0x40000000), %bb.6(0x40000000) - + %121.sub_mod:eds = COPY %103.sub_mod %20:ep = MOV_PD_imm11_pseudo 0 %121.sub_dim_size:eds = COPY %103.sub_dim_size @@ -149,15 +360,15 @@ body: | %103.sub_hi_dim_then_sub_dim_count:eds = COPY %112.sub_hi_dim_then_sub_dim_count PseudoJNZ %80, %bb.7 PseudoJ_jump_imm %bb.6 - + bb.6: successors: %bb.7(0x80000000) - + %102:erf2 = COPY %18 - + bb.7: successors: %bb.8(0x04000000), %bb.3(0x7c000000) - + %133:ep_as_32bit = MOV_PD_imm11_pseudo 0 %77:er = XOR %1, %87 %133:ep_as_32bit, %103.sub_dim_count:eds, %103.sub_hi_dim_then_sub_dim_count:eds = PADD_3D_pseudo_split %133, %103.sub_mod, %103.sub_dim_size, %103.sub_dim_stride, %103.sub_dim_count, undef %103.sub_hi_dim_then_sub_mod, %103.sub_hi_dim_then_sub_dim_size, %103.sub_hi_dim_then_sub_dim_stride, %103.sub_hi_dim_then_sub_dim_count @@ -184,17 +395,17 @@ body: | %117.sub_dim_count:eds = COPY %103.sub_dim_count PseudoJNZ %74, %bb.3 PseudoJ_jump_imm %bb.8 - + bb.8: successors: %bb.9(0x80000000) - + %89.sub_dim_stride:ed = COPY %89.sub_mod %89.sub_dim_count:ed = COPY %103.sub_dim_size %89.sub_dim_size:ed = COPY %103.sub_dim_size - + bb.9: successors: %bb.9(0x80000000) - + %148:ed = COPY %89 %139:ep_as_32bit, dead %148.sub_dim_count:ed = PADD_2D_pseudo_split %139, %148.sub_mod, %148.sub_dim_size, %148.sub_dim_stride, %148.sub_dim_count PseudoJ_jump_imm %bb.9