-
Notifications
You must be signed in to change notification settings - Fork 41
Martien.physreg liveranges #747
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
martien-de-jong
wants to merge
21
commits into
aie-public
Choose a base branch
from
martien.physreg-liveranges
base: aie-public
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
cbea80f
label edges consistenly
058e2e0
[AIE][POSTPIPELINER] Debug option to materialize the linear schedule
83bf915
[AIEMachineScheduler] Track lane masks to accommodate VRegs in postsc…
6a7bd2c
[AIE] Add LivenessVector
b45cb59
[AIE] Add RegDefUseTracker
5d9f274
[AIE] Add ScheduleInterpreter
df210b0
[AIE] Add PostRegAlloc
343363c
[AIE] Add AIELiveRangeUtils
e2a54bb
[AIE] Add scarce range scheduling
a7658d7
Virtual pipeliner mode integration
6eb7220
ref updates
bb2a7c3
[ScheduleDAGInstr] Add option not to skip registering full defs of VRegs
5e61cec
add tests
9893cfe
Add folding markers
715f018
Use non-live callee-changed for reallocation of virtualized regs
1ad91bf
blind ref update
a107369
add test for hnadling order of defs and ues
3687a62
[AIE][INTERBLOCK] Don't overload BS.Fixpoint.II as pipelining indicator
afd6b6d
[AIE] Off-by-one error in dumpGraph
ff086b4
RegDefUseTracker uses lanemasks and live-in to determine 'fully defined'
b05c2bd
ref updates
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| #include "AIEMachineScheduler.h" | ||
| #include "AIEMaxLatencyFinder.h" | ||
| #include "AIEMultiSlotInstrMaterializer.h" | ||
| #include "AIERegDefUseTracker.h" | ||
| #include "Utils/AIELoopUtils.h" | ||
| #include "llvm/ADT/PostOrderIterator.h" | ||
| #include "llvm/CodeGen/MachineBasicBlock.h" | ||
|
|
@@ -37,6 +38,7 @@ | |
| // --debug-only=sched-blocks,machine-scheduler | ||
| #define DEBUG_LOOPAWARE(X) DEBUG_WITH_TYPE("loop-aware", X) | ||
| #define DEBUG_BLOCKS(X) DEBUG_WITH_TYPE("sched-blocks", X) | ||
| #define DEBUG_REGALLOC(X) DEBUG_WITH_TYPE("aie-reg-liverange", X) | ||
|
|
||
| using namespace llvm; | ||
|
|
||
|
|
@@ -76,8 +78,52 @@ static cl::opt<int> PostPipelinerMaxTryII( | |
| "aie-postpipeliner-maxtry-ii", cl::init(20), | ||
| cl::desc("[AIE] Maximum II steps to be tried in the post-ra pipeliner")); | ||
|
|
||
| static cl::opt<bool> TestRegDefUseTracker( | ||
| "aie-test-regdefuse-tracker", cl::Hidden, cl::init(false), | ||
| cl::desc("[AIE] TEST MODE: Run RegDefUseTracker analysis on all loops " | ||
| "(for testing only)")); | ||
|
|
||
| namespace llvm::AIE { | ||
|
|
||
| // Helper function to get the name of a PostPipelinerMode as a string | ||
| const char *getPostPipelinerModeName(PostPipelinerMode Mode) { | ||
| switch (Mode) { | ||
| case PostPipelinerMode::None: | ||
| return "None"; | ||
| case PostPipelinerMode::Physical: | ||
| return "Physical"; | ||
| case PostPipelinerMode::Virtual: | ||
| return "Virtual"; | ||
| case PostPipelinerMode::ReservedVirtual: | ||
| return "ReservedVirtual"; | ||
| } | ||
| return "Unknown"; | ||
| } | ||
|
|
||
| // Option for enabling virtual register mode in the postpipeliner | ||
| static cl::opt<bool> PostPipelinerVRegMode( | ||
| "aie-postpipeliner-vreg-mode", cl::Hidden, cl::init(true), | ||
| cl::desc("[AIE] Enable virtual register mode for the postpipeliner " | ||
| "(replaces filtered physical registers with virtual registers)")); | ||
|
|
||
| // Option for enabling physical register mode in the postpipeliner | ||
| static cl::opt<bool> PostPipelinerPhysMode( | ||
| "aie-postpipeliner-phys-mode", cl::Hidden, cl::init(true), | ||
| cl::desc("[AIE] Enable physical register mode for the postpipeliner " | ||
| "(use physical registers without virtualization)")); | ||
|
|
||
| // Option for enabling reserved virtual register mode in the postpipeliner | ||
| static cl::opt<bool> PostPipelinerVRegReservedMode( | ||
| "aie-postpipeliner-vreg-reserved-mode", cl::Hidden, cl::init(false), | ||
| cl::desc("[AIE] Enable reserved virtual register mode for the " | ||
| "postpipeliner (virtualizes ranges overlapping RESERVED bases)")); | ||
|
|
||
| // Option for filtering live ranges with no register choice | ||
| static cl::opt<bool> FilterNoChoiceRegs( | ||
| "aie-postpipeliner-filter-no-choice", cl::Hidden, cl::init(false), | ||
| cl::desc("[AIE] Filter out live ranges with only one available physical " | ||
| "register to prevent pipeliner invalidation")); | ||
|
|
||
| void dumpInterBlock(const InterBlockEdges &Edges) { | ||
| for (const SUnit &SU : Edges) { | ||
| dbgs() << "SU" << SU.NodeNum << ": " << *SU.getInstr(); | ||
|
|
@@ -235,7 +281,7 @@ void InterBlockScheduling::markEpilogueBlocks() { | |
| } | ||
|
|
||
| void InterBlockScheduling::enterFunction(MachineFunction *MF) { | ||
| DEBUG_BLOCKS(dbgs() << ">> enterFunction " << MF->getName() << "\n"); | ||
| DEBUG_BLOCKS(dbgs() << "PSBEGIN Function " << MF->getName() << "\n"); | ||
|
|
||
| // Get ourselves a hazard recognizer | ||
| const auto &Subtarget = MF->getSubtarget(); | ||
|
|
@@ -277,14 +323,14 @@ void InterBlockScheduling::enterFunction(MachineFunction *MF) { | |
| } | ||
|
|
||
| void InterBlockScheduling::leaveFunction() { | ||
| DEBUG_BLOCKS(dbgs() << "<< leaveFunction\n"); | ||
| DEBUG_BLOCKS(dbgs() << "PSEND Function\n"); | ||
| Blocks.clear(); | ||
| } | ||
|
|
||
| void InterBlockScheduling::enterBlock(MachineBasicBlock *BB) { | ||
| CurrentBlockState = &getBlockState(BB); | ||
| CurrentBlockState->resetRegion(); | ||
| DEBUG_BLOCKS(dbgs() << " >> enterBlock " << BB->getNumber() << " " | ||
| DEBUG_BLOCKS(dbgs() << "PSBEGIN Block " << BB->getNumber() << " " | ||
| << CurrentBlockState->kindAsString() << " FixPointIter=" | ||
| << CurrentBlockState->FixPoint.NumIters | ||
| << " II=" << CurrentBlockState->FixPoint.II << "\n"); | ||
|
|
@@ -371,7 +417,7 @@ class PipelineExtractor : public PipelineScheduleVisitor { | |
|
|
||
| } // namespace | ||
| bool InterBlockScheduling::leaveBlock() { | ||
| DEBUG_BLOCKS(dbgs() << " << leaveBlock " | ||
| DEBUG_BLOCKS(dbgs() << "PSEND Block " | ||
| << CurrentBlockState->TheBlock->getNumber() << "\n"); | ||
| // After scheduling a basic block, check convergence to determine which block | ||
| // to schedule next and with what parameters | ||
|
|
@@ -393,8 +439,7 @@ bool InterBlockScheduling::leaveBlock() { | |
| BS.clearSchedule(); | ||
| PipelineExtractor GenSchedule(*this, BS, *TII); | ||
| auto &PostSWP = BS.getPostSWP(); | ||
| PostSWP.visitPipelineSchedule(GenSchedule); | ||
| PostSWP.updateTripCount(); | ||
| PostSWP.materializePipeline(GenSchedule); | ||
| break; | ||
| } | ||
| case SchedulingStage::SchedulingDone: | ||
|
|
@@ -539,6 +584,32 @@ SchedulingStage InterBlockScheduling::updateFixPoint(BlockState &BS) { | |
| return updatePipelining(BS); | ||
| } | ||
|
|
||
| // Get the first pipeliner mode to try based on command line options. | ||
| static PostPipelinerMode firstPipelinerMode() { | ||
| if (PostPipelinerPhysMode) { | ||
| return PostPipelinerMode::Physical; | ||
| } | ||
| if (PostPipelinerVRegMode) { | ||
| return PostPipelinerMode::Virtual; | ||
| } | ||
| if (PostPipelinerVRegReservedMode) { | ||
| return PostPipelinerMode::ReservedVirtual; | ||
| } | ||
| return PostPipelinerMode::None; | ||
| } | ||
|
|
||
| // Get the next pipeliner mode to try after the current one. | ||
| // Returns None when past the last mode. | ||
| static PostPipelinerMode nextPipelinerMode(PostPipelinerMode Current) { | ||
| if (Current == PostPipelinerMode::Physical && PostPipelinerVRegMode) { | ||
| return PostPipelinerMode::Virtual; | ||
| } | ||
| if (Current == PostPipelinerMode::Virtual && PostPipelinerVRegReservedMode) { | ||
| return PostPipelinerMode::ReservedVirtual; | ||
| } | ||
| return PostPipelinerMode::None; | ||
| } | ||
|
|
||
| SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) { | ||
| if (BS.FixPoint.NumIters > | ||
| MaxExpensiveIterations + 2 * HR->getConflictHorizon()) { | ||
|
|
@@ -609,13 +680,22 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) { | |
| << "\n"); | ||
|
|
||
| // The loop schedule has converged, so we could declare our work done. | ||
| // But first try SWP | ||
| // But first try SWP if we have a single region and pipelining is enabled | ||
| if (BS.getRegions().size() == 1) { | ||
| auto &PostSWP = BS.getPostSWP(); | ||
| if (PostSWP.isPostPipelineCandidate(*BS.TheBlock)) { | ||
| BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock); | ||
| BS.FixPoint.IITries = 1; | ||
| return SchedulingStage::Pipelining; | ||
| // Determine which pipelining mode to use | ||
| BS.FixPoint.PipelinerMode = firstPipelinerMode(); | ||
| if (BS.FixPoint.PipelinerMode == PostPipelinerMode::None) { | ||
| return SchedulingStage::SchedulingDone; | ||
| } | ||
|
|
||
| const int ResMII = PostSWP.getResMII(*BS.TheBlock); | ||
| if (ResMII <= PostPipelinerMaxII) { | ||
| BS.FixPoint.II = ResMII; | ||
| BS.FixPoint.IITries = 1; | ||
| return SchedulingStage::Pipelining; | ||
| } | ||
| } | ||
| } | ||
| return SchedulingStage::SchedulingDone; | ||
|
|
@@ -624,14 +704,36 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) { | |
| SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) { | ||
| // We have been pipelining. Check whether we were successful. | ||
| if (BS.FixPoint.Stage == SchedulingStage::PipeliningDone) { | ||
| return BS.FixPoint.Stage; | ||
| return SchedulingStage::PipeliningDone; | ||
| } | ||
|
|
||
| // Otherwise try a larger II. | ||
| // If pipelining is disabled, we shouldn't be here | ||
| if (BS.FixPoint.PipelinerMode == PostPipelinerMode::None) { | ||
| return SchedulingStage::PipeliningFailed; | ||
| } | ||
|
|
||
| // We failed. undo all changes that were required for this attempt. | ||
| BS.restorePipelining(); | ||
|
|
||
| // Try the next mode at the same II. | ||
| const PostPipelinerMode NextMode = | ||
| nextPipelinerMode(BS.FixPoint.PipelinerMode); | ||
| if (NextMode != PostPipelinerMode::None) { | ||
| BS.FixPoint.PipelinerMode = NextMode; | ||
| DEBUG_LOOPAWARE(dbgs() << "Trying next mode at II=" << BS.FixPoint.II | ||
| << "\n"); | ||
| return SchedulingStage::Pipelining; | ||
| } | ||
|
|
||
| // We progressed through all pipeliner modes and failed. | ||
| // Try a larger II. | ||
| // We cut off at larger IIs to prevent excessive compilation time. | ||
| if (++BS.FixPoint.II <= PostPipelinerMaxII && | ||
| ++BS.FixPoint.IITries <= PostPipelinerMaxTryII) { | ||
| return SchedulingStage::Pipelining; | ||
| BS.FixPoint.PipelinerMode = firstPipelinerMode(); | ||
| if (BS.FixPoint.PipelinerMode != PostPipelinerMode::None) { | ||
| return SchedulingStage::Pipelining; | ||
| } | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks a bit weird: we have been pipelining and are trying to restore to the first allowed pipelinermode for the next II. This should be invariant, so I don't think we can get None here. Perhaps assert. |
||
| } | ||
|
|
||
| auto *BB = BS.TheBlock; | ||
|
|
@@ -1125,6 +1227,54 @@ void BlockState::setPipelined() { | |
| FixPoint.Stage = SchedulingStage::PipeliningDone; | ||
| } | ||
|
|
||
| void BlockState::initPipelining() { | ||
| // Should only be called when actually pipelining. | ||
| assert(FixPoint.PipelinerMode != PostPipelinerMode::None && | ||
| "initPipelining called when not pipelining"); | ||
|
|
||
| DEBUG_REGALLOC(dbgs() << "initPipelining called with mode=" | ||
| << getPostPipelinerModeName(FixPoint.PipelinerMode) | ||
| << " II=" << FixPoint.II << "\n"); | ||
|
|
||
| // For virtual modes, virtualize the already-analyzed live ranges. | ||
| if (FixPoint.PipelinerMode == PostPipelinerMode::Virtual || | ||
| FixPoint.PipelinerMode == PostPipelinerMode::ReservedVirtual) { | ||
| assert(RegTracker && "RegTracker must exist in virtual modes"); | ||
|
|
||
| // The analysis was already performed once in initInterBlock. | ||
| // We just need to virtualize the physical registers for this attempt. | ||
| const RegLiveRangeTracker::OverlapPolicy Policy = | ||
| (FixPoint.PipelinerMode == PostPipelinerMode::Virtual) | ||
| ? RegLiveRangeTracker::OverlapPolicy:: | ||
| DisallowOverlapWithReservedBase | ||
| : RegLiveRangeTracker::OverlapPolicy::AllowOverlapWithReservedBase; | ||
|
|
||
| RegTracker->virtualizeFilteredPhysRegs(Policy); | ||
| DEBUG_REGALLOC(dbgs() << "Virtualized with policy=" | ||
| << (Policy == RegLiveRangeTracker::OverlapPolicy:: | ||
| DisallowOverlapWithReservedBase | ||
| ? "DisallowOverlap" | ||
| : "AllowOverlap") | ||
| << " for pipelining attempt at II=" << FixPoint.II | ||
| << "\n"); | ||
| } | ||
| } | ||
|
|
||
| void BlockState::restorePipelining() { | ||
| // Restore to the original allocation of the virtual registers. | ||
| if (FixPoint.PipelinerMode == PostPipelinerMode::Virtual || | ||
| FixPoint.PipelinerMode == PostPipelinerMode::ReservedVirtual) { | ||
| assert(RegTracker && "RegTracker must exist in virtual modes"); | ||
|
|
||
| // Only restore if registers are still virtualized. | ||
| if (RegTracker->areRegistersVirtualized()) { | ||
| // Restore physical registers but keep the analysis results. | ||
| // The analysis is invariant and will be reused for the next attempt. | ||
| RegTracker->restoreOriginalPhysRegs(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| int BlockState::getScheduleLength() const { | ||
| int Length = 0; | ||
| for (auto &R : Regions) { | ||
|
|
@@ -1185,16 +1335,70 @@ void BlockState::initInterBlock(const MachineSchedContext &Context, | |
| }) && | ||
| "Loop cannot have fixed instructions"); | ||
| BoundaryEdges = std::make_unique<InterBlockEdges>(Context); | ||
|
|
||
| // Start with None - we'll determine the actual mode after scheduling | ||
| // converges | ||
| FixPoint.PipelinerMode = PostPipelinerMode::None; | ||
|
|
||
| if (Regions.size() == 1) { | ||
| // Don't worry, this just constructs a mostly empty container class | ||
| auto NumInstrs = getTop().getFreeInstructions().size(); | ||
| PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs); | ||
|
|
||
| // perform static assignment of multi-slot pseudos | ||
| if (EnableMultiSlotInstrMaterialization && | ||
| PostSWP->isPostPipelineCandidate(*TheBlock)) { | ||
| staticallyMaterializeMultiSlotInstructions(*TheBlock, HR, | ||
| MaterializePipeline); | ||
| // Create the persistent tracker that will be used throughout pipelining | ||
| RegTracker = std::make_unique<RegLiveRangeTracker>(*TheBlock); | ||
|
|
||
| // Create PostSWP with the persistent tracker | ||
| const auto NumInstrs = getTop().getFreeInstructions().size(); | ||
| PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs, *RegTracker, | ||
| *TheBlock->getParent()); | ||
|
|
||
| // Check if isPostPipelineCandidate, if so, perform materialization and | ||
| // register tracking. | ||
| // Also run analysis if TestRegDefUseTracker is enabled (for testing). | ||
| // Only proceed if at least one pipelining mode is enabled. | ||
| const bool PipeliningEnabled = | ||
| PostPipelinerVRegMode || PostPipelinerPhysMode; | ||
| if ((PipeliningEnabled && PostSWP->isPostPipelineCandidate(*TheBlock)) || | ||
| TestRegDefUseTracker) { | ||
| // Perform static assignment of multi-slot pseudos | ||
| if (EnableMultiSlotInstrMaterialization) { | ||
| staticallyMaterializeMultiSlotInstructions(*TheBlock, HR, | ||
| MaterializePipeline); | ||
| } | ||
|
|
||
| // Run register live range analysis ONCE using the invariant semantic | ||
| // order. This analysis is done after static MSP materialization to | ||
| // analyze the materialized state. The semantic order and physical | ||
| // register state are invariant across all pipelining attempts, so we | ||
| // only need to analyze once. | ||
| RegTracker->analyze(*TheBlock, getTop().getFreeInstructions()); | ||
| DEBUG_REGALLOC(RegTracker->dump("FINAL LIVE RANGES\n")); | ||
|
|
||
| // Optionally filter out live ranges with no register choice. | ||
| // This is also done once since the available registers don't change. | ||
| if (FilterNoChoiceRegs) { | ||
| RegTracker->filterByRegisterAvailability(); | ||
| DEBUG_REGALLOC(dbgs() << "After filtering by register availability:\n"); | ||
| DEBUG_REGALLOC(RegTracker->dump()); | ||
| } | ||
|
|
||
| // Find and dump the most promising scarce range set. | ||
| const auto &ScarceRanges = RegTracker->getMostPromisingScarceRanges(); | ||
| DEBUG_REGALLOC({ | ||
| dbgs() << "Most promising scarce range set: " << ScarceRanges.size() | ||
| << " ranges\n"; | ||
| if (!ScarceRanges.empty()) { | ||
| const TargetRegisterInfo *TRI = | ||
| TheBlock->getParent()->getSubtarget().getRegisterInfo(); | ||
| dbgs() << "Register class: " | ||
| << TRI->getRegClassName(ScarceRanges[0]->getRegisterClass()) | ||
| << "\n"; | ||
| for (size_t I = 0; I < ScarceRanges.size(); ++I) { | ||
| const auto *LR = ScarceRanges[I]; | ||
| dbgs() << " [" << I | ||
| << "] BaseReg=" << TRI->getName(LR->getBaseReg()) | ||
| << " Defs=" << LR->getNumDefs() | ||
| << " Uses=" << LR->getNumUses() << "\n"; | ||
| } | ||
| } | ||
| }); | ||
| } | ||
| } | ||
|
|
||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is accommodating a dump for the early stages of live range analysis.