Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions llvm/lib/Target/AIE/AIEBaseSubtarget.cpp
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably a dead end. With separate DAG instance hanging around, I would like to have more control and visibility on the dag mutators to call, rather than have the machine scheduler calling them.
Since we have a buildGraph() virtual in the scheduler strategy anyway, we could also directly call the 'AIE' dag mutators as part of that buildGraph().

Original file line number Diff line number Diff line change
Expand Up @@ -269,16 +269,14 @@ class BiasDepth : public ScheduleDAGMutation {
};

class RegionEndEdges : public ScheduleDAGMutation {
AAResults *AA;

void removeExitSUPreds(ScheduleDAGInstrs *DAG) {
SUnit &ExitSU = DAG->ExitSU;
while (!ExitSU.Preds.empty()) {
ExitSU.removePred(ExitSU.Preds.back());
}
}
void apply(ScheduleDAGInstrs *DAG) override {
AIE::MaxLatencyFinder MaxLatency(DAG, AA);
AIE::MaxLatencyFinder MaxLatency(DAG);
MachineBasicBlock *PrologueMBB = DAG->getBB();
unsigned int ZOLBundlesCount = 0;

Expand Down Expand Up @@ -349,7 +347,7 @@ class RegionEndEdges : public ScheduleDAGMutation {
};

public:
RegionEndEdges(AAResults *AA = nullptr) : AA(AA) {}
RegionEndEdges() {}
};

/// This Mutator is responsible for emitting "fixed" SUnits at the top or bottom
Expand Down Expand Up @@ -912,16 +910,30 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT, AAResults *AA) {
if (!TT.isAIE1()) {
if (EnableWAWStickyRegisters)
Mutations.emplace_back(std::make_unique<WAWStickyRegistersEdges>());
Mutations.emplace_back(std::make_unique<RegionEndEdges>(AA));
// RegionEndEdges must run before MemoryEdges/WAWEdges/BiasDepth, and
// EmitFixedSUnits must run last. Both are applied via applyMutations()
// inside AIEPostRASchedStrategy::buildGraph, which also suppresses the
// redundant postProcessDAG() call from ScheduleDAGMI::schedule().
Mutations.emplace_back(createRegionEndEdgesMutation());
Mutations.emplace_back(std::make_unique<MemoryEdges>(true));
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>());
Mutations.emplace_back(std::make_unique<BiasDepth>());
Mutations.emplace_back(std::make_unique<EmitFixedSUnits>(
EnableAAInEmitFixedSUnits ? AA : nullptr));
Mutations.emplace_back(createEmitFixedSUnitsMutation(AA));
}
return Mutations;
}

std::unique_ptr<ScheduleDAGMutation>
AIEBaseSubtarget::createRegionEndEdgesMutation() {
return std::make_unique<RegionEndEdges>();
}

std::unique_ptr<ScheduleDAGMutation>
AIEBaseSubtarget::createEmitFixedSUnitsMutation(AAResults *AA) {
return std::make_unique<EmitFixedSUnits>(EnableAAInEmitFixedSUnits ? AA
: nullptr);
}

// List the Mutations that apply to the interblock DAG construction.
std::vector<std::unique_ptr<ScheduleDAGMutation>>
AIEBaseSubtarget::getDDGMutationsImpl(const Triple &TT, bool ExactLatencies) {
Expand Down
19 changes: 17 additions & 2 deletions llvm/lib/Target/AIE/AIEBaseSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ class AIEBaseSubtarget : public TargetSubtargetInfo {
}
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
&Mutations) const override {
Mutations =
AIEBaseSubtarget::getPostRAMutationsImpl(getTargetTriple(), nullptr);
// Post-RA mutations are applied directly in
// AIEPostRASchedStrategy::buildGraph, which owns the full graph
// construction pipeline. The registered Mutations list is intentionally
// empty so that the postProcessDAG() call in ScheduleDAGMI::schedule()
// is a no-op.
Mutations.clear();
}

void overrideSchedPolicy(MachineSchedPolicy &Policy,
Expand All @@ -84,6 +88,17 @@ class AIEBaseSubtarget : public TargetSubtargetInfo {
static std::vector<std::unique_ptr<ScheduleDAGMutation>>
getSMSMutationsImpl(const Triple &TT);

/// Create the RegionEndEdges mutation for use in buildGraph, where it is
/// invoked directly after the other post-RA mutations and before
/// createEmitFixedSUnitsMutation (ordering is significant).
static std::unique_ptr<ScheduleDAGMutation> createRegionEndEdgesMutation();

/// Create the EmitFixedSUnits mutation for use in buildGraph, invoked after
/// createRegionEndEdgesMutation to preserve the ExitSU-edge ordering
/// invariant.
static std::unique_ptr<ScheduleDAGMutation>
createEmitFixedSUnitsMutation(AAResults *AA);

/// Whether to enable the pre-RA MachinePipeliner. This can be disabled to let
/// the post-RA pipeliner handle the scheduling.
bool enableMachinePipeliner() const override;
Expand Down
12 changes: 5 additions & 7 deletions llvm/lib/Target/AIE/AIEBaseTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,13 +378,11 @@ void AIEBasePassConfig::addPreSched2() {

ScheduleDAGInstrs *
AIEBaseTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
ScheduleDAGMI *DAG =
new AIEScheduleDAGMI(C, std::make_unique<AIEPostRASchedStrategy>(C),
/* RemoveKillFlags=*/true);
for (auto &Mutation :
AIEBaseSubtarget::getPostRAMutationsImpl(getTargetTriple(), C->AA))
DAG->addMutation(std::move(Mutation));
return DAG;
// Post-RA mutations are applied directly in
// AIEPostRASchedStrategy::buildGraph, so the registered Mutations list is
// intentionally empty (matching the empty list from getPostRAMutations).
return new AIEScheduleDAGMI(C, std::make_unique<AIEPostRASchedStrategy>(C),
/* RemoveKillFlags=*/true);
}

ScheduleDAGInstrs *
Expand Down
65 changes: 64 additions & 1 deletion llvm/lib/Target/AIE/AIEDataDependenceHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -72,4 +72,67 @@ void DataDependenceHelper::dumpDot(raw_ostream &OS,
OS << "}\n";
}

void InterBlockEdges::addNode(MachineInstr *MI) {
if (auto Index = initSUnit(*MI)) {
IndexMap &TheMap = Boundary ? SuccMap : PredMap;
TheMap.emplace(MI, *Index);
}
}

void InterBlockEdges::markBoundary() { Boundary = SUnits.size(); }

bool InterBlockEdges::mayAlias(SUnit *SUa, SUnit *SUb, bool TBAA) {
if (SafeToIgnoreMemDeps && Boundary) {
// Suppress memory edges that cross the pre/post boundary.
const bool AIsPost = SUa->NodeNum >= *Boundary;
const bool BIsPost = SUb->NodeNum >= *Boundary;
if (AIsPost != BIsPost)
return false;
}
return DataDependenceHelper::mayAlias(SUa, SUb, TBAA);
}

const SUnit *InterBlockEdges::getPreBoundaryNode(MachineInstr *MI) const {
const auto Found = PredMap.find(MI);
if (Found == PredMap.end()) {
return nullptr;
}
return &SUnits.at(Found->second);
}

bool InterBlockEdges::isPostBoundaryNode(SUnit *SU) const {
return Boundary ? SU->NodeNum >= *Boundary : false;
}

void InterBlockEdges::recordPostDepth(MachineInstr *MI, int Depth) {
const auto Found = SuccMap.find(MI);
if (Found == SuccMap.end())
return;
PostDepths[Found->second] = Depth;
}

int InterBlockEdges::getPostDepthOr(const SUnit *SU, int Default) const {
const auto It = PostDepths.find(SU->NodeNum);
return It != PostDepths.end() ? It->second : Default;
}

void InterBlockEdges::recordPreHeightsFromSuccessors() {
for (const auto &[MI, NodeNum] : PredMap) {
const SUnit &SU = SUnits.at(NodeNum);
int MinHeight = std::numeric_limits<int>::max();
for (const SDep &Dep : SU.Succs) {
if (!isPostBoundaryNode(Dep.getSUnit()))
continue;
MinHeight = std::min(MinHeight, int(Dep.getSUnit()->getHeight()));
}
if (MinHeight != std::numeric_limits<int>::max())
PreHeights[NodeNum] = MinHeight;
}
}

int InterBlockEdges::getPreHeight(const SUnit *SU) const {
const auto It = PreHeights.find(SU->NodeNum);
return It != PreHeights.end() ? It->second : std::numeric_limits<int>::max();
}

} // end namespace llvm::AIE
112 changes: 111 additions & 1 deletion llvm/lib/Target/AIE/AIEDataDependenceHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand All @@ -17,6 +17,9 @@

#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include <limits>
#include <map>
#include <optional>

namespace llvm {

Expand All @@ -36,6 +39,8 @@ class DataDependenceHelper : public ScheduleDAGInstrs {
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
const MachineSchedContext &Context;
void schedule() override {};

protected:
bool mayAlias(SUnit *SUa, SUnit *SUb, bool TBAA) override;

public:
Expand All @@ -53,6 +58,111 @@ class DataDependenceHelper : public ScheduleDAGInstrs {
// are printed.
void dumpDot(raw_ostream &OS, bool IncludeBoundaries) const;
};

/// This class generates all edges between nodes in two flow-adjacent regions.
/// The nodes are added in forward flow order, marking the boundary at the
/// appropriate point.
///
/// When SafeToIgnoreMemDeps is set, memory-alias edges that cross the
/// pre/post boundary are suppressed via a mayAlias() override.
///
/// The class also provides optional depth and height maps (both keyed by SUnit
/// NodeNum, so they remain unambiguous when the same MachineInstr* appears on
/// both sides of the boundary, e.g. in a single-block loop):
///
/// PostDepths — top-down cycle of each post-boundary node. Populated by
/// recordPostDepth(); queried by getPostDepth().
///
/// PreHeights — for each pre-boundary node, the minimum getHeight() of its
/// post-boundary successors in the DDG. Populated by
/// recordPreHeightsFromSuccessors() after buildEdges(); queried by
/// getPreHeight().
///
/// PreRegionLength — total number of bundles in the pre-boundary region.
///
/// PostRegionLength — total number of bundles in the post-boundary region,
/// used to represent the depth of the artificial ExitSU node.
class InterBlockEdges : public DataDependenceHelper {
// The boundary between Pred and Succ nodes.
std::optional<unsigned> Boundary;
// When true, memory edges crossing the boundary are suppressed.
bool SafeToIgnoreMemDeps = false;

/// We can add the same instruction on both sides of the boundary.
/// We maintain explicit maps to retrieve the corresponding SUnit.
using IndexMap = std::map<MachineInstr *, unsigned>;
IndexMap PredMap;
IndexMap SuccMap;

/// Depth (top-down cycle) of post-boundary SUnits, keyed by NodeNum.
std::map<unsigned, int> PostDepths;
/// For each pre-boundary SUnit, the minimum getHeight() of its
/// post-boundary successors (keyed by NodeNum).
std::map<unsigned, int> PreHeights;
/// Total number of bundles in the pre-boundary region.
int PreRegionLength = 0;
/// Total number of bundles in the post-boundary region.
int PostRegionLength = 0;

bool mayAlias(SUnit *SUa, SUnit *SUb, bool TBAA) override;

public:
InterBlockEdges(const MachineSchedContext &Context,
bool SafeToIgnoreMemDeps = false)
: DataDependenceHelper(Context, true, true),
SafeToIgnoreMemDeps(SafeToIgnoreMemDeps) {}

/// Add a Node to the DAG.
void addNode(MachineInstr *);

/// Mark the boundary between the predecessor block and the successor block.
/// In normal operation, there should just be one call to this method.
/// Nodes added before are part of the predecessor, nodes added after are
/// part of the successor.
void markBoundary();

/// To iterate forward across the SUnits of the underlying DDG.
auto begin() const { return SUnits.begin(); }
auto end() const { return SUnits.end(); }

/// The following two methods are used to find the cross-boundary edges,
/// by starting from a pre-boundary node and selecting its successor edges
/// that connect to a post-boundary node.
/// ---
/// Retrieve the SUnit that represents MI's instance before the
/// boundary, null if not found.
const SUnit *getPreBoundaryNode(MachineInstr *MI) const;

/// Check whether SU represents an instruction after the boundary.
bool isPostBoundaryNode(SUnit *SU) const;

// Post-boundary depth interface.
/// Record the top-down cycle of a post-boundary instruction.
void recordPostDepth(MachineInstr *MI, int Depth);
/// Get the recorded top-down cycle of a post-boundary SUnit, or \p Default
/// if no depth has been recorded (e.g. the instruction is beyond the
/// conflict horizon).
int getPostDepthOr(const SUnit *SU, int Default) const;
/// Clear all recorded post-boundary depths. Call before repopulating.
void clearPostDepths() { PostDepths.clear(); }

// Pre-boundary height interface.
/// Compute and store, for each pre-boundary SUnit, the minimum getHeight()
/// of its post-boundary successors. Must be called after buildEdges().
void recordPreHeightsFromSuccessors();
/// Get the stored height of a pre-boundary SUnit.
/// Returns INT_MAX if not recorded (conservative: no loop-carried use).
int getPreHeight(const SUnit *SU) const;

// Pre-boundary region length.
void setPreRegionLength(int Length) { PreRegionLength = Length; }
int getPreRegionLength() const { return PreRegionLength; }

// Post-boundary region length (used as depth of the ExitSU node).
void setPostRegionLength(int Length) { PostRegionLength = Length; }
int getPostRegionLength() const { return PostRegionLength; }
};

} // namespace AIE
} // namespace llvm

Expand Down
Loading
Loading