diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 4707150ab209..b43c72c21564 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -137,6 +137,11 @@ namespace llvm {
     /// Whether lane masks should get tracked.
     bool TrackLaneMasks = false;
 
+    /// This controls registering single defs in CurrentVRegDefs.
+    /// For special uses of ScheduleDAGInstrs, we can not use the assumption
+    /// that defs dominate all uses.
+    bool AbandonSingleDefs = true;
+
     // State specific to the current scheduling region.
     // ------------------------------------------------
 
@@ -351,7 +356,8 @@ namespace llvm {
     /// traversal of the SUnits vector.
     void buildEdges(AAResults *AA, RegPressureTracker *RPTracker = nullptr,
                     PressureDiffs *PDiffs = nullptr,
-                    LiveIntervals *LIS = nullptr, bool TrackLaneMasks = false);
+                    LiveIntervals *LIS = nullptr, bool TrackLaneMasks = false,
+                    bool AbandonSingleDefs = true);
 
     /// Adds dependencies from instructions in the current list of
     /// instructions being scheduled to scheduling barrier. We want to make sure
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 4ec3e91ae044..168145f5c80f 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -492,7 +492,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
   }
 
   // Shortcut: Singly defined vregs do not have output/anti dependencies.
-  if (MRI.hasOneDef(Reg))
+  if (AbandonSingleDefs && MRI.hasOneDef(Reg))
     return;
 
   // Add output dependence to the next nearest defs of this vreg.
@@ -868,7 +868,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
 
 void ScheduleDAGInstrs::buildEdges(AAResults *AA, RegPressureTracker *RPTracker,
                                    PressureDiffs *PDiffs, LiveIntervals *LIS,
-                                   bool TrackLaneMasks) {
+                                   bool TrackLaneMasks,
+                                   bool AbandonSingleDefs) {
 
   const TargetSubtargetInfo &ST = MF.getSubtarget();
   bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
@@ -877,6 +878,7 @@ void ScheduleDAGInstrs::buildEdges(AAResults *AA, RegPressureTracker *RPTracker,
     AAForDep.emplace(*AA);
   BarrierChain = nullptr;
   this->TrackLaneMasks = TrackLaneMasks;
+  this->AbandonSingleDefs = AbandonSingleDefs;
 
   if (PDiffs)
     PDiffs->init(SUnits.size());
diff --git a/llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp b/llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
index 510546674f3d..e7c07a13a410 100644
--- a/llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
+++ b/llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
@@ -19,6 +19,7 @@
 #include "AIEMachineScheduler.h"
 #include "AIEMaxLatencyFinder.h"
 #include "AIEMultiSlotInstrMaterializer.h"
+#include "AIERegDefUseTracker.h"
 #include "Utils/AIELoopUtils.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -37,6 +38,7 @@
 // --debug-only=sched-blocks,machine-scheduler
 #define DEBUG_LOOPAWARE(X) DEBUG_WITH_TYPE("loop-aware", X)
 #define DEBUG_BLOCKS(X) DEBUG_WITH_TYPE("sched-blocks", X)
+#define DEBUG_REGALLOC(X) DEBUG_WITH_TYPE("aie-reg-liverange", X)
 
 using namespace llvm;
 
@@ -76,8 +78,52 @@ static cl::opt<int> PostPipelinerMaxTryII(
     "aie-postpipeliner-maxtry-ii", cl::init(20),
     cl::desc("[AIE] Maximum II steps to be tried in the post-ra pipeliner"));
 
+static cl::opt<bool> TestRegDefUseTracker(
+    "aie-test-regdefuse-tracker", cl::Hidden, cl::init(false),
+    cl::desc("[AIE] TEST MODE: Run RegDefUseTracker analysis on all loops "
+             "(for testing only)"));
+
 namespace llvm::AIE {
 
+// Helper function to get the name of a PostPipelinerMode as a string
+const char *getPostPipelinerModeName(PostPipelinerMode Mode) {
+  switch (Mode) {
+  case PostPipelinerMode::None:
+    return "None";
+  case PostPipelinerMode::Physical:
+    return "Physical";
+  case PostPipelinerMode::Virtual:
+    return "Virtual";
+  case PostPipelinerMode::ReservedVirtual:
+    return "ReservedVirtual";
+  }
+  return "Unknown";
+}
+
+// Option for enabling virtual register mode in the postpipeliner
+static cl::opt<bool> PostPipelinerVRegMode(
+    "aie-postpipeliner-vreg-mode", cl::Hidden, cl::init(true),
+    cl::desc("[AIE] Enable virtual register mode for the postpipeliner "
+             "(replaces filtered physical registers with virtual registers)"));
+
+// Option for enabling physical register mode in the postpipeliner
+static cl::opt<bool> PostPipelinerPhysMode(
+    "aie-postpipeliner-phys-mode", cl::Hidden, cl::init(true),
+    cl::desc("[AIE] Enable physical register mode for the postpipeliner "
+             "(use physical registers without virtualization)"));
+
+// Option for enabling reserved virtual register mode in the postpipeliner
+static cl::opt<bool> PostPipelinerVRegReservedMode(
+    "aie-postpipeliner-vreg-reserved-mode", cl::Hidden, cl::init(false),
+    cl::desc("[AIE] Enable reserved virtual register mode for the "
+             "postpipeliner (virtualizes ranges overlapping RESERVED bases)"));
+
+// Option for filtering live ranges with no register choice
+static cl::opt<bool> FilterNoChoiceRegs(
+    "aie-postpipeliner-filter-no-choice", cl::Hidden, cl::init(false),
+    cl::desc("[AIE] Filter out live ranges with only one available physical "
+             "register to prevent pipeliner invalidation"));
+
 void dumpInterBlock(const InterBlockEdges &Edges) {
   for (const SUnit &SU : Edges) {
     dbgs() << "SU" << SU.NodeNum << ": " << *SU.getInstr();
@@ -235,7 +281,7 @@ void InterBlockScheduling::markEpilogueBlocks() {
 }
 
 void InterBlockScheduling::enterFunction(MachineFunction *MF) {
-  DEBUG_BLOCKS(dbgs() << ">> enterFunction " << MF->getName() << "\n");
+  DEBUG_BLOCKS(dbgs() << "PSBEGIN Function " << MF->getName() << "\n");
 
   // Get ourselves a hazard recognizer
   const auto &Subtarget = MF->getSubtarget();
@@ -277,14 +323,14 @@ void InterBlockScheduling::enterFunction(MachineFunction *MF) {
 }
 
 void InterBlockScheduling::leaveFunction() {
-  DEBUG_BLOCKS(dbgs() << "<< leaveFunction\n");
+  DEBUG_BLOCKS(dbgs() << "PSEND Function\n");
   Blocks.clear();
 }
 
 void InterBlockScheduling::enterBlock(MachineBasicBlock *BB) {
   CurrentBlockState = &getBlockState(BB);
   CurrentBlockState->resetRegion();
-  DEBUG_BLOCKS(dbgs() << "  >> enterBlock " << BB->getNumber() << " "
+  DEBUG_BLOCKS(dbgs() << "PSBEGIN Block " << BB->getNumber() << " "
                       << CurrentBlockState->kindAsString() << " FixPointIter="
                       << CurrentBlockState->FixPoint.NumIters
                       << " II=" << CurrentBlockState->FixPoint.II << "\n");
@@ -371,7 +417,7 @@ class PipelineExtractor : public PipelineScheduleVisitor {
 
 } // namespace
 bool InterBlockScheduling::leaveBlock() {
-  DEBUG_BLOCKS(dbgs() << "  << leaveBlock "
+  DEBUG_BLOCKS(dbgs() << "PSEND Block "
                       << CurrentBlockState->TheBlock->getNumber() << "\n");
   // After scheduling a basic block, check convergence to determine which block
   // to schedule next and with what parameters
@@ -393,8 +439,7 @@ bool InterBlockScheduling::leaveBlock() {
     BS.clearSchedule();
     PipelineExtractor GenSchedule(*this, BS, *TII);
     auto &PostSWP = BS.getPostSWP();
-    PostSWP.visitPipelineSchedule(GenSchedule);
-    PostSWP.updateTripCount();
+    PostSWP.materializePipeline(GenSchedule);
     break;
   }
   case SchedulingStage::SchedulingDone:
@@ -539,6 +584,32 @@ SchedulingStage InterBlockScheduling::updateFixPoint(BlockState &BS) {
   return updatePipelining(BS);
 }
 
+// Get the first pipeliner mode to try based on command line options.
+static PostPipelinerMode firstPipelinerMode() {
+  if (PostPipelinerPhysMode) {
+    return PostPipelinerMode::Physical;
+  }
+  if (PostPipelinerVRegMode) {
+    return PostPipelinerMode::Virtual;
+  }
+  if (PostPipelinerVRegReservedMode) {
+    return PostPipelinerMode::ReservedVirtual;
+  }
+  return PostPipelinerMode::None;
+}
+
+// Get the next pipeliner mode to try after the current one.
+// Returns None when past the last mode.
+static PostPipelinerMode nextPipelinerMode(PostPipelinerMode Current) {
+  if (Current == PostPipelinerMode::Physical && PostPipelinerVRegMode) {
+    return PostPipelinerMode::Virtual;
+  }
+  if (Current == PostPipelinerMode::Virtual && PostPipelinerVRegReservedMode) {
+    return PostPipelinerMode::ReservedVirtual;
+  }
+  return PostPipelinerMode::None;
+}
+
 SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
   if (BS.FixPoint.NumIters >
       MaxExpensiveIterations + 2 * HR->getConflictHorizon()) {
@@ -609,13 +680,22 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
                          << "\n");
 
   // The loop schedule has converged, so we could declare our work done.
-  // But first try SWP
+  // But first try SWP if we have a single region and pipelining is enabled
   if (BS.getRegions().size() == 1) {
     auto &PostSWP = BS.getPostSWP();
     if (PostSWP.isPostPipelineCandidate(*BS.TheBlock)) {
-      BS.FixPoint.II = PostSWP.getResMII(*BS.TheBlock);
-      BS.FixPoint.IITries = 1;
-      return SchedulingStage::Pipelining;
+      // Determine which pipelining mode to use
+      BS.FixPoint.PipelinerMode = firstPipelinerMode();
+      if (BS.FixPoint.PipelinerMode == PostPipelinerMode::None) {
+        return SchedulingStage::SchedulingDone;
+      }
+
+      const int ResMII = PostSWP.getResMII(*BS.TheBlock);
+      if (ResMII <= PostPipelinerMaxII) {
+        BS.FixPoint.II = ResMII;
+        BS.FixPoint.IITries = 1;
+        return SchedulingStage::Pipelining;
+      }
     }
   }
   return SchedulingStage::SchedulingDone;
@@ -624,14 +704,36 @@ SchedulingStage InterBlockScheduling::updateScheduling(BlockState &BS) {
 SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) {
   // We have been pipelining. Check whether we were successful.
   if (BS.FixPoint.Stage == SchedulingStage::PipeliningDone) {
-    return BS.FixPoint.Stage;
+    return SchedulingStage::PipeliningDone;
   }
 
-  // Otherwise try a larger II.
+  // If pipelining is disabled, we shouldn't be here
+  if (BS.FixPoint.PipelinerMode == PostPipelinerMode::None) {
+    return SchedulingStage::PipeliningFailed;
+  }
+
+  // We failed. undo all changes that were required for this attempt.
+  BS.restorePipelining();
+
+  // Try the next mode at the same II.
+  const PostPipelinerMode NextMode =
+      nextPipelinerMode(BS.FixPoint.PipelinerMode);
+  if (NextMode != PostPipelinerMode::None) {
+    BS.FixPoint.PipelinerMode = NextMode;
+    DEBUG_LOOPAWARE(dbgs() << "Trying next mode at II=" << BS.FixPoint.II
+                           << "\n");
+    return SchedulingStage::Pipelining;
+  }
+
+  // We progressed through all pipeliner modes and failed.
+  // Try a larger II.
   // We cut off at larger IIs to prevent excessive compilation time.
   if (++BS.FixPoint.II <= PostPipelinerMaxII &&
       ++BS.FixPoint.IITries <= PostPipelinerMaxTryII) {
-    return SchedulingStage::Pipelining;
+    BS.FixPoint.PipelinerMode = firstPipelinerMode();
+    if (BS.FixPoint.PipelinerMode != PostPipelinerMode::None) {
+      return SchedulingStage::Pipelining;
+    }
   }
 
   auto *BB = BS.TheBlock;
@@ -1125,6 +1227,54 @@ void BlockState::setPipelined() {
   FixPoint.Stage = SchedulingStage::PipeliningDone;
 }
 
+void BlockState::initPipelining() {
+  // Should only be called when actually pipelining.
+  assert(FixPoint.PipelinerMode != PostPipelinerMode::None &&
+         "initPipelining called when not pipelining");
+
+  DEBUG_REGALLOC(dbgs() << "initPipelining called with mode="
+                        << getPostPipelinerModeName(FixPoint.PipelinerMode)
+                        << " II=" << FixPoint.II << "\n");
+
+  // For virtual modes, virtualize the already-analyzed live ranges.
+  if (FixPoint.PipelinerMode == PostPipelinerMode::Virtual ||
+      FixPoint.PipelinerMode == PostPipelinerMode::ReservedVirtual) {
+    assert(RegTracker && "RegTracker must exist in virtual modes");
+
+    // The analysis was already performed once in initInterBlock.
+    // We just need to virtualize the physical registers for this attempt.
+    const RegLiveRangeTracker::OverlapPolicy Policy =
+        (FixPoint.PipelinerMode == PostPipelinerMode::Virtual)
+            ? RegLiveRangeTracker::OverlapPolicy::
+                  DisallowOverlapWithReservedBase
+            : RegLiveRangeTracker::OverlapPolicy::AllowOverlapWithReservedBase;
+
+    RegTracker->virtualizeFilteredPhysRegs(Policy);
+    DEBUG_REGALLOC(dbgs() << "Virtualized with policy="
+                          << (Policy == RegLiveRangeTracker::OverlapPolicy::
+                                            DisallowOverlapWithReservedBase
+                                  ? "DisallowOverlap"
+                                  : "AllowOverlap")
+                          << " for pipelining attempt at II=" << FixPoint.II
+                          << "\n");
+  }
+}
+
+void BlockState::restorePipelining() {
+  // Restore to the original allocation of the virtual registers.
+  if (FixPoint.PipelinerMode == PostPipelinerMode::Virtual ||
+      FixPoint.PipelinerMode == PostPipelinerMode::ReservedVirtual) {
+    assert(RegTracker && "RegTracker must exist in virtual modes");
+
+    // Only restore if registers are still virtualized.
+    if (RegTracker->areRegistersVirtualized()) {
+      // Restore physical registers but keep the analysis results.
+      // The analysis is invariant and will be reused for the next attempt.
+      RegTracker->restoreOriginalPhysRegs();
+    }
+  }
+}
+
 int BlockState::getScheduleLength() const {
   int Length = 0;
   for (auto &R : Regions) {
@@ -1185,16 +1335,70 @@ void BlockState::initInterBlock(const MachineSchedContext &Context,
                 }) &&
          "Loop cannot have fixed instructions");
   BoundaryEdges = std::make_unique<InterBlockEdges>(Context);
+
+  // Start with None - we'll determine the actual mode after scheduling
+  // converges
+  FixPoint.PipelinerMode = PostPipelinerMode::None;
+
   if (Regions.size() == 1) {
-    // Don't worry, this just constructs a mostly empty container class
-    auto NumInstrs = getTop().getFreeInstructions().size();
-    PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs);
-
-    // perform static assignment of multi-slot pseudos
-    if (EnableMultiSlotInstrMaterialization &&
-        PostSWP->isPostPipelineCandidate(*TheBlock)) {
-      staticallyMaterializeMultiSlotInstructions(*TheBlock, HR,
-                                                 MaterializePipeline);
+    // Create the persistent tracker that will be used throughout pipelining
+    RegTracker = std::make_unique<RegLiveRangeTracker>(*TheBlock);
+
+    // Create PostSWP with the persistent tracker
+    const auto NumInstrs = getTop().getFreeInstructions().size();
+    PostSWP = std::make_unique<PostPipeliner>(HR, NumInstrs, *RegTracker,
+                                              *TheBlock->getParent());
+
+    // Check if isPostPipelineCandidate, if so, perform materialization and
+    // register tracking.
+    // Also run analysis if TestRegDefUseTracker is enabled (for testing).
+    // Only proceed if at least one pipelining mode is enabled.
+    const bool PipeliningEnabled =
+        PostPipelinerVRegMode || PostPipelinerPhysMode;
+    if ((PipeliningEnabled && PostSWP->isPostPipelineCandidate(*TheBlock)) ||
+        TestRegDefUseTracker) {
+      // Perform static assignment of multi-slot pseudos
+      if (EnableMultiSlotInstrMaterialization) {
+        staticallyMaterializeMultiSlotInstructions(*TheBlock, HR,
+                                                   MaterializePipeline);
+      }
+
+      // Run register live range analysis ONCE using the invariant semantic
+      // order. This analysis is done after static MSP materialization to
+      // analyze the materialized state. The semantic order and physical
+      // register state are invariant across all pipelining attempts, so we
+      // only need to analyze once.
+      RegTracker->analyze(*TheBlock, getTop().getFreeInstructions());
+      DEBUG_REGALLOC(RegTracker->dump("FINAL LIVE RANGES\n"));
+
+      // Optionally filter out live ranges with no register choice.
+      // This is also done once since the available registers don't change.
+      if (FilterNoChoiceRegs) {
+        RegTracker->filterByRegisterAvailability();
+        DEBUG_REGALLOC(dbgs() << "After filtering by register availability:\n");
+        DEBUG_REGALLOC(RegTracker->dump());
+      }
+
+      // Find and dump the most promising scarce range set.
+      const auto &ScarceRanges = RegTracker->getMostPromisingScarceRanges();
+      DEBUG_REGALLOC({
+        dbgs() << "Most promising scarce range set: " << ScarceRanges.size()
+               << " ranges\n";
+        if (!ScarceRanges.empty()) {
+          const TargetRegisterInfo *TRI =
+              TheBlock->getParent()->getSubtarget().getRegisterInfo();
+          dbgs() << "Register class: "
+                 << TRI->getRegClassName(ScarceRanges[0]->getRegisterClass())
+                 << "\n";
+          for (size_t I = 0; I < ScarceRanges.size(); ++I) {
+            const auto *LR = ScarceRanges[I];
+            dbgs() << "  [" << I
+                   << "] BaseReg=" << TRI->getName(LR->getBaseReg())
+                   << " Defs=" << LR->getNumDefs()
+                   << " Uses=" << LR->getNumUses() << "\n";
+          }
+        }
+      });
     }
   }
 
diff --git a/llvm/lib/Target/AIE/AIEInterBlockScheduling.h b/llvm/lib/Target/AIE/AIEInterBlockScheduling.h
index eddc50a6ae87..817a3da1955c 100644
--- a/llvm/lib/Target/AIE/AIEInterBlockScheduling.h
+++ b/llvm/lib/Target/AIE/AIEInterBlockScheduling.h
@@ -4,7 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
+// (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
 //
 //===----------------------------------------------------------------------===//
 //
@@ -22,6 +22,7 @@
 #include "AIEDataDependenceHelper.h"
 #include "AIEHazardRecognizer.h"
 #include "AIEPostPipeliner.h"
+#include "AIERegDefUseTracker.h"
 #include "Utils/AIELoopUtils.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -82,6 +83,13 @@ class InterBlockEdges {
 // handling.
 enum class BlockType { Regular, Loop, Epilogue };
 
+// PostPipelinerMode determines whether the postpipeliner operates on physical
+// registers or virtualizes them for better scheduling opportunities.
+enum class PostPipelinerMode { None, Physical, Virtual, ReservedVirtual };
+
+// Helper function to get the name of a PostPipelinerMode as a string
+const char *getPostPipelinerModeName(PostPipelinerMode Mode);
+
 // These are states in the state machine that drives scheduling
 enum class SchedulingStage {
   // We are gathering all regions in the block to initialize the BlockState.
@@ -114,6 +122,8 @@ enum class SchedulingStage {
 class FixedpointState {
 public:
   SchedulingStage Stage = SchedulingStage::Scheduling;
+  // PostPipeliner mode - physical or virtual register mode
+  PostPipelinerMode PipelinerMode = PostPipelinerMode::None;
   // Parameters of the loop-aware convergence
   int LatencyMargin = 0;
   SmallMapVector<MachineInstr *, int, 8> PerMILatencyMargin;
@@ -207,6 +217,9 @@ class BlockState {
   // This holds an instance of the PostPipeliner for candidate loops.
   std::unique_ptr<PostPipeliner> PostSWP;
 
+  // This holds an instance of the RegLiveRangeTracker for loops.
+  std::unique_ptr<llvm::RegLiveRangeTracker> RegTracker;
+
 public:
   BlockState(MachineBasicBlock *Block);
   MachineBasicBlock *TheBlock = nullptr;
@@ -271,6 +284,14 @@ class BlockState {
   void clearSchedule();
 
   void setPipelined();
+
+  /// Initialize for pipelining - virtualizes physical registers if in test mode
+  void initPipelining();
+
+  /// Restore after failed pipelining - restores physical registers if
+  /// virtualized
+  void restorePipelining();
+
   bool isScheduled() const {
     return FixPoint.Stage == SchedulingStage::SchedulingDone || isPipelined() ||
            pipeliningFailed();
diff --git a/llvm/lib/Target/AIE/AIELiveRangeUtils.cpp b/llvm/lib/Target/AIE/AIELiveRangeUtils.cpp
new file mode 100644
index 000000000000..54367d8b9859
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIELiveRangeUtils.cpp
@@ -0,0 +1,194 @@
+//===- AIELiveRangeUtils.cpp - Live Range Utilities -----------------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+
+#include "AIELiveRangeUtils.h"
+#include "AIEHazardRecognizer.h"
+#include "AIERegDefUseTracker.h"
+#include "AIEScheduleInterpreter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/ResourceScoreboard.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aie-live-range-utils"
+
+using namespace llvm;
+
+namespace llvm::AIE {
+
+LiveRangeScheduleResult
+computeMinimalSchedule(const RegLiveRange &LR, const ScheduleDAG &DAG,
+                       const AIEHazardRecognizer &HR,
+                       const AIEScheduleInterpreter &Interp) {
+  // TODO: Determine optimal scoreboard bounds based on pipeline depth
+  // and latencies. For now, use a fixed range.
+  constexpr int ScoreboardLowerBound = -32;
+  constexpr int ScoreboardUpperBound = 31;
+
+  // Create a local scoreboard for this scheduling attempt.
+  ResourceScoreboard<FuncUnitWrapper> LocalScoreboard;
+  LocalScoreboard.config(ScoreboardLowerBound, ScoreboardUpperBound);
+
+  // Collect instructions from the live range, defs first, then uses.
+  // This provides a natural topological ordering for most cases.
+  SmallVector<const MachineInstr *, 8> Instructions;
+  DenseSet<const MachineInstr *> Seen;
+
+  // Collect def instructions.
+  for (const auto &DefInfo : LR.defs()) {
+    const MachineInstr *MI = DefInfo.getOperand()->getParent();
+    if (Seen.insert(MI).second)
+      Instructions.push_back(MI);
+  }
+
+  // Collect use instructions.
+  for (const auto &UseInfo : LR.uses()) {
+    const MachineInstr *MI = UseInfo.getOperand()->getParent();
+    if (Seen.insert(MI).second)
+      Instructions.push_back(MI);
+  }
+
+  // Build a map from MachineInstr to SUnit for dependency tracking.
+  // The DAG may contain multiple copies of instructions (for pipelining).
+  // Use try_emplace to only map the first occurrence of each instruction.
+  DenseMap<const MachineInstr *, SUnit *> MIToSUnit;
+  for (SUnit &SU : const_cast<ScheduleDAG &>(DAG).SUnits) {
+    MachineInstr *MI = SU.getInstr();
+    assert(MI && "SUnit must have a MachineInstr");
+    MIToSUnit.try_emplace(MI, &SU);
+  }
+
+  // Schedule instructions with multiple scans.
+  // Track which instructions have been scheduled.
+  DenseMap<const MachineInstr *, int> IssueCycles;
+  DenseSet<const MachineInstr *> Scheduled;
+
+  // Keep scanning until all instructions are scheduled.
+  while (Scheduled.size() < Instructions.size()) {
+    bool MadeProgress = false;
+
+    for (const MachineInstr *MI : Instructions) {
+      if (Scheduled.count(MI))
+        continue;
+
+      SUnit *SU = MIToSUnit.lookup(MI);
+      assert(SU && "Could not find SUnit for instruction in live range");
+
+      // Check if all predecessors within the live range are scheduled.
+      bool CanSchedule = true;
+      int EarliestCycle = 0;
+
+      for (const SDep &Pred : SU->Preds) {
+        if (SUnit *PredSU = Pred.getSUnit()) {
+          const MachineInstr *PredMI = PredSU->getInstr();
+          if (PredMI && Seen.count(PredMI)) {
+            if (!Scheduled.count(PredMI)) {
+              CanSchedule = false;
+              break;
+            }
+            // Account for latency (can be negative).
+            int PredCycle = IssueCycles[PredMI];
+            int MinCycle = PredCycle + static_cast<int>(Pred.getLatency());
+            EarliestCycle = std::max(EarliestCycle, MinCycle);
+          }
+        }
+      }
+
+      if (!CanSchedule)
+        continue;
+
+      // Find the earliest cycle without structural hazards.
+      // Start from EarliestCycle (which can be negative).
+      int IssueCycle = EarliestCycle;
+      while (HR.getHazardType(LocalScoreboard, MI, IssueCycle) !=
+             ScheduleHazardRecognizer::NoHazard) {
+        ++IssueCycle;
+      }
+
+      // Schedule the instruction.
+      IssueCycles[MI] = IssueCycle;
+      Scheduled.insert(MI);
+      MadeProgress = true;
+
+      // Update local scoreboard.
+      HR.emitInScoreboard(LocalScoreboard, *MI, MI->getDesc(), IssueCycle);
+    }
+
+    // We must make progress in each iteration.
+    if (!MadeProgress) {
+      LLVM_DEBUG({
+        dbgs()
+            << "Failed to make scheduling progress. Remaining instructions:\n";
+        for (const MachineInstr *MI : Instructions) {
+          if (!Scheduled.count(MI)) {
+            dbgs() << "  Unscheduled: " << *MI;
+            SUnit *SU = MIToSUnit.lookup(MI);
+            if (SU) {
+              dbgs() << "    Waiting for predecessors:\n";
+              for (const SDep &Pred : SU->Preds) {
+                if (SUnit *PredSU = Pred.getSUnit()) {
+                  const MachineInstr *PredMI = PredSU->getInstr();
+                  if (PredMI && Seen.count(PredMI) &&
+                      !Scheduled.count(PredMI)) {
+                    dbgs() << "      " << *PredMI;
+                  }
+                }
+              }
+            }
+          }
+        }
+      });
+    }
+    assert(MadeProgress && "Failed to make scheduling progress");
+  }
+
+  // Generate events for all scheduled instructions.
+  EventSchedule Schedule;
+  for (const MachineInstr *MI : Instructions) {
+    int IssueCycle = IssueCycles[MI];
+    Interp.addInstructionEvents(*MI, IssueCycle, Schedule);
+  }
+
+  // Compute the minimal live length from the event schedule.
+  // Find the earliest def event and latest use event for this live range.
+  int MinDefCycle = INT_MAX;
+  int MaxUseCycle = INT_MIN;
+
+  for (size_t Cycle = 0; Cycle < Schedule.size(); ++Cycle) {
+    for (const auto &Event : Schedule[Cycle]) {
+      // Check if this event belongs to an instruction in our live range.
+      if (!Seen.count(Event.MI))
+        continue;
+
+      if (Event.Type == EventType::Write) {
+        // This is a def event - update earliest def cycle.
+        MinDefCycle = std::min(MinDefCycle, static_cast<int>(Cycle));
+      } else if (Event.Type == EventType::Read) {
+        // This is a use event - update latest use cycle.
+        MaxUseCycle = std::max(MaxUseCycle, static_cast<int>(Cycle));
+      }
+    }
+  }
+
+  // The minimal live length is the distance from first def event to the cycle
+  // before the last use event (the value is live from def until consumed).
+  unsigned MinimalLength = 0;
+  if (MinDefCycle != INT_MAX && MaxUseCycle != INT_MIN) {
+    MinimalLength = MaxUseCycle - MinDefCycle;
+  }
+
+  return LiveRangeScheduleResult(MinimalLength);
+}
+
+} // end namespace llvm::AIE
diff --git a/llvm/lib/Target/AIE/AIELiveRangeUtils.h b/llvm/lib/Target/AIE/AIELiveRangeUtils.h
new file mode 100644
index 000000000000..51c67dfdb6c2
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIELiveRangeUtils.h
@@ -0,0 +1,59 @@
+//===- AIELiveRangeUtils.h - Live Range Utilities -------------------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utilities for analyzing and scheduling live ranges.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AIE_AIELIVERANGEUTILS_H
+#define LLVM_LIB_TARGET_AIE_AIELIVERANGEUTILS_H
+
+namespace llvm {
+
+class AIEHazardRecognizer;
+class AIEScheduleInterpreter;
+class RegLiveRange;
+class ScheduleDAG;
+
+namespace AIE {
+
+/// Result of live range scheduling analysis.
+class LiveRangeScheduleResult {
+  unsigned MinimalLength;
+
+public:
+  LiveRangeScheduleResult(unsigned MinimalLength)
+      : MinimalLength(MinimalLength) {}
+
+  /// Get the minimal live length for the range.
+  unsigned getMinimalLiveLength() const { return MinimalLength; }
+};
+
+/// Compute the minimal live length for a single live range.
+///
+/// Schedules the instructions in the live range (defs and uses) greedily
+/// using the AIEScheduleInterpreter for latency information and
+/// AIEHazardRecognizer for structural resource checking. Returns the
+/// minimal event-space coverage from first def to last use.
+///
+/// \param LR The live range to schedule
+/// \param DAG The schedule DAG providing dependency information
+/// \param HR The hazard recognizer for resource checking
+/// \param Interp The schedule interpreter providing latency/event mapping
+/// \return Result containing the minimal live length
+LiveRangeScheduleResult
+computeMinimalSchedule(const RegLiveRange &LR, const ScheduleDAG &DAG,
+                       const AIEHazardRecognizer &HR,
+                       const AIEScheduleInterpreter &Interp);
+
+} // end namespace AIE
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AIE_AIELIVERANGEUTILS_H
diff --git a/llvm/lib/Target/AIE/AIELivenessVector.cpp b/llvm/lib/Target/AIE/AIELivenessVector.cpp
new file mode 100644
index 000000000000..90cc008708c6
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIELivenessVector.cpp
@@ -0,0 +1,177 @@
+//===- AIELivenessVector.cpp - Liveness vector implementation ------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a vector-like container for liveness information that
+// provides safe out-of-range access and common operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AIELivenessVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+
+namespace llvm {
+namespace AIE {
+
+bool Liveness::conflictsWith(const Liveness &Other) const {
+  // Check register file lane conflicts.
+  if ((Lanes & Other.Lanes).any()) {
+    return true;
+  }
+
+  // Check bypass conflicts: read in one, write in other (same class).
+  for (unsigned ReadClass : BypassReads) {
+    if (llvm::is_contained(Other.BypassWrites, ReadClass)) {
+      return true;
+    }
+  }
+  for (unsigned WriteClass : BypassWrites) {
+    if (llvm::is_contained(Other.BypassReads, WriteClass)) {
+      return true;
+    }
+  }
+
+  // Check bypass vs register file conflicts.
+  // If one has bypass activity and the other has register lanes, they
+  // conflict because they share the same register address.
+  const bool ThisHasBypass = !BypassReads.empty() || !BypassWrites.empty();
+  const bool OtherHasBypass =
+      !Other.BypassReads.empty() || !Other.BypassWrites.empty();
+
+  if (ThisHasBypass && Other.Lanes.any()) {
+    return true;
+  }
+  if (OtherHasBypass && Lanes.any()) {
+    return true;
+  }
+
+  return false;
+}
+
+LivenessVector::LivenessVector(size_t Size) : Elements(Size) {}
+
+LivenessVector::LivenessVector(size_t Size, LaneBitmask InitialValue)
+    : Elements(Size, Liveness(InitialValue)) {}
+
+size_t LivenessVector::size() const { return Elements.size(); }
+
+bool LivenessVector::empty() const { return Elements.empty(); }
+
+Liveness &LivenessVector::operator[](size_t Index) {
+  assert(Index < Elements.size() && "Index out of range");
+  return Elements[Index];
+}
+
+const Liveness &LivenessVector::operator[](size_t Index) const {
+  assert(Index < Elements.size() && "Index out of range");
+  return Elements[Index];
+}
+
+Liveness LivenessVector::at(size_t Index) const {
+  if (Index >= Elements.size()) {
+    return Liveness();
+  }
+  return Elements[Index];
+}
+
+const SmallVector<Liveness, 8> &LivenessVector::getElements() const {
+  return Elements;
+}
+
+LivenessVector &LivenessVector::operator|=(const LivenessVector &Other) {
+  // Determine the maximum size needed
+  const size_t MaxSize = std::max(Elements.size(), Other.Elements.size());
+
+  // Extend this vector if needed
+  if (MaxSize > Elements.size()) {
+    Elements.resize(MaxSize);
+  }
+
+  // Union using at() which returns empty for out-of-bounds
+  for (size_t I = 0; I < MaxSize; ++I) {
+    Elements[I] |= Other.at(I);
+  }
+  return *this;
+}
+
+LivenessVector &LivenessVector::operator&=(const LivenessVector &Other) {
+  // Use at() which returns empty for out-of-bounds
+  for (size_t I = 0; I < Elements.size(); ++I) {
+    Elements[I] &= Other.at(I);
+  }
+  return *this;
+}
+
+LivenessVector &LivenessVector::operator-=(const LivenessVector &Other) {
+  // Use at() which returns empty for out-of-bounds
+  for (size_t I = 0; I < Elements.size(); ++I) {
+    Elements[I] -= Other.at(I);
+  }
+  return *this;
+}
+
+LivenessVector LivenessVector::operator|(const LivenessVector &Other) const {
+  LivenessVector Result = *this;
+  Result |= Other;
+  return Result;
+}
+
+LivenessVector LivenessVector::operator&(const LivenessVector &Other) const {
+  LivenessVector Result = *this;
+  Result &= Other;
+  return Result;
+}
+
+LivenessVector LivenessVector::operator-(const LivenessVector &Other) const {
+  LivenessVector Result = *this;
+  Result -= Other;
+  return Result;
+}
+
+bool LivenessVector::overlaps(const LivenessVector &Other) const {
+  const size_t MinSize = std::min(Elements.size(), Other.Elements.size());
+  for (size_t I = 0; I < MinSize; ++I) {
+    if (Elements[I].conflictsWith(Other.Elements[I])) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool LivenessVector::any() const {
+  return llvm::any_of(Elements, [](const Liveness &L) { return L.any(); });
+}
+
+bool LivenessVector::none() const {
+  return llvm::none_of(Elements, [](const Liveness &L) { return L.any(); });
+}
+
+void LivenessVector::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+void LivenessVector::print(raw_ostream &OS) const {
+  OS << "[";
+  for (size_t I = 0; I < Elements.size(); ++I) {
+    if (I > 0)
+      OS << ", ";
+    OS << PrintLaneMask(Elements[I].getLanes());
+  }
+  OS << "]";
+}
+
+} // namespace AIE
+} // namespace llvm
diff --git a/llvm/lib/Target/AIE/AIELivenessVector.h b/llvm/lib/Target/AIE/AIELivenessVector.h
new file mode 100644
index 000000000000..ee1901484529
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIELivenessVector.h
@@ -0,0 +1,222 @@
+//===- AIELivenessVector.h - Liveness vector container ---------*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a vector-like container for liveness information that
+// provides safe out-of-range access and common operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AIE_AIELIVENESSVECTOR_H
+#define LLVM_LIB_TARGET_AIE_AIELIVENESSVECTOR_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/LaneBitmask.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace AIE {
+
+/// Liveness information for a single cycle/offset.
+/// Tracks both register file lanes and bypass usage to detect conflicts.
+class Liveness {
+private:
+  LaneBitmask Lanes;
+  // Set of bypass classes being read from at this cycle
+  SmallVector<unsigned, 2> BypassReads;
+  // Set of bypass classes being written to at this cycle
+  SmallVector<unsigned, 2> BypassWrites;
+
+public:
+  /// Construct with no lanes live
+  Liveness() : Lanes(LaneBitmask::getNone()) {}
+
+  /// Construct with specific lane mask
+  Liveness(LaneBitmask L) : Lanes(L) {}
+
+  /// Get the lane mask
+  LaneBitmask getLanes() const { return Lanes; }
+
+  /// Set the lane mask
+  void setLanes(LaneBitmask L) { Lanes = L; }
+
+  /// Add a bypass read for a specific forwarding class
+  void addBypassRead(unsigned ForwardingClass) {
+    if (ForwardingClass != 0 &&
+        !llvm::is_contained(BypassReads, ForwardingClass)) {
+      BypassReads.push_back(ForwardingClass);
+    }
+  }
+
+  /// Add a bypass write for a specific forwarding class
+  void addBypassWrite(unsigned ForwardingClass) {
+    if (ForwardingClass != 0 &&
+        !llvm::is_contained(BypassWrites, ForwardingClass)) {
+      BypassWrites.push_back(ForwardingClass);
+    }
+  }
+
+  /// Get bypass reads
+  ArrayRef<unsigned> getBypassReads() const { return BypassReads; }
+
+  /// Get bypass writes
+  ArrayRef<unsigned> getBypassWrites() const { return BypassWrites; }
+
+  /// Check if this liveness conflicts with another.
+  /// Conflicts occur when:
+  /// 1. Register file lanes overlap, OR
+  /// 2. A bypass read and bypass write use the same forwarding class, OR
+  /// 3. One has bypass activity and the other has register lanes
+  ///    (they share the same register address)
+  bool conflictsWith(const Liveness &Other) const;
+
+  /// Union with another liveness
+  Liveness &operator|=(const Liveness &Other) {
+    Lanes |= Other.Lanes;
+    // Merge bypass reads
+    for (unsigned FC : Other.BypassReads) {
+      addBypassRead(FC);
+    }
+    // Merge bypass writes
+    for (unsigned FC : Other.BypassWrites) {
+      addBypassWrite(FC);
+    }
+    return *this;
+  }
+
+  /// Intersection with another liveness
+  Liveness &operator&=(const Liveness &Other) {
+    Lanes &= Other.Lanes;
+    // For intersection, keep only bypass classes present in both
+    SmallVector<unsigned, 2> NewBypassReads;
+    for (unsigned FC : BypassReads) {
+      if (llvm::is_contained(Other.BypassReads, FC)) {
+        NewBypassReads.push_back(FC);
+      }
+    }
+    BypassReads = std::move(NewBypassReads);
+
+    SmallVector<unsigned, 2> NewBypassWrites;
+    for (unsigned FC : BypassWrites) {
+      if (llvm::is_contained(Other.BypassWrites, FC)) {
+        NewBypassWrites.push_back(FC);
+      }
+    }
+    BypassWrites = std::move(NewBypassWrites);
+    return *this;
+  }
+
+  /// Difference with another liveness
+  Liveness &operator-=(const Liveness &Other) {
+    Lanes &= ~Other.Lanes;
+    // For difference, remove bypass classes present in Other
+    SmallVector<unsigned, 2> NewBypassReads;
+    for (unsigned FC : BypassReads) {
+      if (!llvm::is_contained(Other.BypassReads, FC)) {
+        NewBypassReads.push_back(FC);
+      }
+    }
+    BypassReads = std::move(NewBypassReads);
+
+    SmallVector<unsigned, 2> NewBypassWrites;
+    for (unsigned FC : BypassWrites) {
+      if (!llvm::is_contained(Other.BypassWrites, FC)) {
+        NewBypassWrites.push_back(FC);
+      }
+    }
+    BypassWrites = std::move(NewBypassWrites);
+    return *this;
+  }
+
+  /// Check if any lanes are live or any bypasses are active
+  bool any() const {
+    return Lanes.any() || !BypassReads.empty() || !BypassWrites.empty();
+  }
+
+  /// Check if no lanes are live and no bypasses are active
+  bool none() const {
+    return Lanes.none() && BypassReads.empty() && BypassWrites.empty();
+  }
+
+  /// Get the number of lanes set
+  unsigned getNumLanes() const { return Lanes.getNumLanes(); }
+
+  /// Implicit conversion to LaneBitmask for compatibility
+  operator LaneBitmask() const { return Lanes; }
+};
+
+/// A vector-like container for liveness information that provides safe
+/// out-of-range access and common operations.
+class LivenessVector {
+private:
+  SmallVector<Liveness, 8> Elements;
+
+public:
+  /// Construct with given size, all elements initialized to no liveness
+  explicit LivenessVector(size_t Size = 0);
+
+  /// Construct with given size and initial lane mask
+  LivenessVector(size_t Size, LaneBitmask InitialValue);
+
+  /// Get the size of the vector
+  size_t size() const;
+
+  /// Check if empty
+  bool empty() const;
+
+  /// Access element with bounds checking in debug mode
+  Liveness &operator[](size_t Index);
+  const Liveness &operator[](size_t Index) const;
+
+  /// Safe access - returns empty liveness if out of range
+  Liveness at(size_t Index) const;
+
+  /// Get the underlying elements
+  const SmallVector<Liveness, 8> &getElements() const;
+
+  /// Union with another vector
+  LivenessVector &operator|=(const LivenessVector &Other);
+
+  /// Intersection with another vector
+  LivenessVector &operator&=(const LivenessVector &Other);
+
+  /// Difference with another vector (this & ~Other)
+  LivenessVector &operator-=(const LivenessVector &Other);
+
+  /// Create union with another vector
+  LivenessVector operator|(const LivenessVector &Other) const;
+
+  /// Create intersection with another vector
+  LivenessVector operator&(const LivenessVector &Other) const;
+
+  /// Create difference with another vector
+  LivenessVector operator-(const LivenessVector &Other) const;
+
+  /// Check if any liveness overlaps with another vector
+  bool overlaps(const LivenessVector &Other) const;
+
+  /// Check if any element has liveness
+  bool any() const;
+
+  /// Check if no elements have liveness
+  bool none() const;
+
+  /// Debug dump
+  void dump() const;
+
+  /// Print to stream
+  void print(raw_ostream &OS) const;
+};
+
+} // namespace AIE
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AIE_AIELIVENESSVECTOR_H
diff --git a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp
index e1c969d26e57..295c31bd376d 100644
--- a/llvm/lib/Target/AIE/AIEMachineScheduler.cpp
+++ b/llvm/lib/Target/AIE/AIEMachineScheduler.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/ResourceScoreboard.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include <memory>
 
@@ -1498,7 +1499,7 @@ void llvm::AIEPostRASchedStrategy::buildGraph(ScheduleDAGMI &DAG, AAResults *AA,
   auto &BS = InterBlock.getBlockState(CurMBB);
   const auto &Region = BS.getCurrentRegion();
   int NCopies = 1;
-  if (BS.FixPoint.II) {
+  if (BS.FixPoint.Stage == SchedulingStage::Pipelining) {
     assert(BS.Kind == BlockType::Loop);
     assert(BS.getRegions().size() == 1);
     assert(Region.getBotFixedBundles().empty());
@@ -1508,6 +1509,8 @@ void llvm::AIEPostRASchedStrategy::buildGraph(ScheduleDAGMI &DAG, AAResults *AA,
     // dependences appear as forward dependences between the first and the
     // second iteration.
     NCopies = 2;
+    // Initialize pipelining.
+    BS.initPipelining();
   }
   DEBUG_BLOCKS(dbgs() << "    buildGraph, NCopies=" << NCopies << "\n");
   for (int S = 0; S < NCopies; S++) {
@@ -1519,7 +1522,9 @@ void llvm::AIEPostRASchedStrategy::buildGraph(ScheduleDAGMI &DAG, AAResults *AA,
   }
   DAG.ExitSU.setInstr(Region.getExitInstr());
   DAG.makeMaps();
-  DAG.buildEdges(Context->AA);
+  // We are in the postscheduler, RPTracker, PDiffs and LIS are null.
+  // For VirtMode, we do want to track LaneMasks though.
+  DAG.buildEdges(Context->AA, RPTracker, PDiffs, LIS, true, false);
   static_cast<AIEScheduleDAGMI &>(DAG).recordDbgInstrs(Region);
 }
 
@@ -1580,6 +1585,9 @@ void AIEScheduleDAGMI::schedule() {
     if (PostSWP.schedule(*this, BS.FixPoint.II, More)) {
       BS.setPipelined();
       LLVM_DEBUG(PostSWP.dump());
+    } else {
+      // Pipelining failed, restore original physical registers.
+      BS.restorePipelining();
     }
     return;
   }
diff --git a/llvm/lib/Target/AIE/AIEPostPipeliner.cpp b/llvm/lib/Target/AIE/AIEPostPipeliner.cpp
index 5a0552b5a1f5..c1506788d512 100644
--- a/llvm/lib/Target/AIE/AIEPostPipeliner.cpp
+++ b/llvm/lib/Target/AIE/AIEPostPipeliner.cpp
@@ -12,7 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "AIEPostPipeliner.h"
+#include "AIEDataDependenceHelper.h"
+#include "AIELiveRangeUtils.h"
+#include "AIEMachineScheduler.h"
+#include "AIEPostRegAlloc.h"
+#include "AIERegDefUseTracker.h"
 #include "AIESWPSolver.h"
+#include "AIEScarceRegScheduling.h"
+#include "AIEScheduleInterpreter.h"
 #include "AIESlotUtils.h"
 #include "Utils/AIELoopUtils.h"
 #include "Utils/AIEMachineInstrPrint.h"
@@ -23,6 +30,7 @@
 #include "llvm/CodeGen/ResourceScoreboard.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <limits>
 #include <string>
@@ -47,6 +55,15 @@ static cl::opt<int> PresetII("aie-postpipeliner-target-ii",
                              cl::desc("II for which to allow the solver"),
                              cl::init(0), cl::Hidden);
 
+// Debug option. Setting it to one will implement the linear schedule
+// without pipeline parallelism.
+static cl::opt<int>
+    ForcedStageCount("aie-postpipeliner-force-stagecount",
+                     cl::desc("Extract a pipeline with the given stage"
+                              " count. This is only granted if it divides the"
+                              " computed stage count."),
+                     cl::init(0), cl::Hidden);
+
 PipelineScheduleVisitor::~PipelineScheduleVisitor() {}
 
 std::optional<int> PostPipelinerStrategy::fitInInterval(
@@ -95,8 +112,10 @@ class PostPipelineDumper : public PipelineScheduleVisitor {
 // The latency state is maintained in an 'Earliest' entry for each SUnit,
 // which is updated whenvever we schedule a predecessor of that SUnit.
 
-PostPipeliner::PostPipeliner(const AIEHazardRecognizer &HR, int NInstr)
-    : HR(HR), NInstr(NInstr) {}
+PostPipeliner::PostPipeliner(const AIEHazardRecognizer &HR, int NInstr,
+                             RegLiveRangeTracker &RegTracker,
+                             const MachineFunction &MF)
+    : HR(HR), RegTracker(RegTracker), Interpreter(MF), NInstr(NInstr) {}
 
 bool PostPipeliner::isPostPipelineCandidate(MachineBasicBlock &LoopBlock) {
   // We leave the single-block loop criterion to our caller. It is fulfilled
@@ -455,6 +474,68 @@ void PostPipeliner::computeRecMII() {
   LLVM_DEBUG(dbgs() << "RecMII=" << RecMII << "\n");
 }
 
+int PostPipeliner::computeScarceRegMII() {
+  int ScarceRegMII = 0;
+
+  // Group scarce live ranges by their base register.
+  DenseMap<MCRegister, SmallVector<const RegLiveRange *, 4>> ScarceRangesByReg;
+  for (const auto &LR : RegTracker.getLiveRanges()) {
+    // Only consider ranges that are marked as scarce.
+    if (!LR.isScarce()) {
+      continue;
+    }
+
+    MCRegister BaseReg = LR.getBaseReg();
+    if (BaseReg != MCRegister::NoRegister) {
+      ScarceRangesByReg[BaseReg].push_back(&LR);
+    }
+  }
+
+  // For each register with multiple competing scarce ranges, compute the sum
+  // of minimal live lengths.
+  DEBUG_WITH_TYPE("aie-reg-liverange", {
+    dbgs() << "\n=== Scarce Register Analysis (II=" << II << ") ===\n";
+  });
+
+  for (const auto &[Reg, Ranges] : ScarceRangesByReg) {
+    // Only consider registers with multiple competing ranges.
+    if (Ranges.size() <= 1)
+      continue;
+
+    unsigned TotalLength = 0;
+    DEBUG_WITH_TYPE("aie-reg-liverange", {
+      const auto *TRI = DAG->MF.getSubtarget().getRegisterInfo();
+      dbgs() << "Register " << TRI->getName(Reg) << " has " << Ranges.size()
+             << " competing ranges (1 available):\n";
+    });
+
+    for (const RegLiveRange *LR : Ranges) {
+      auto Result = AIE::computeMinimalSchedule(*LR, *DAG, HR, Interpreter);
+      unsigned MinLength = Result.getMinimalLiveLength();
+      TotalLength += MinLength;
+
+      DEBUG_WITH_TYPE("aie-reg-liverange", {
+        dbgs() << "  Range with " << LR->getNumDefs() << " defs, "
+               << LR->getNumUses() << " uses: minimal length = " << MinLength
+               << "\n";
+      });
+    }
+
+    DEBUG_WITH_TYPE("aie-reg-liverange",
+                    { dbgs() << "  Total length: " << TotalLength << "\n"; });
+
+    ScarceRegMII = std::max(ScarceRegMII, static_cast<int>(TotalLength));
+  }
+
+  DEBUG_WITH_TYPE("aie-reg-liverange", {
+    dbgs() << "ScarceRegMII=" << ScarceRegMII << "\n";
+    dbgs() << "============================\n\n";
+  });
+
+  LLVM_DEBUG(dbgs() << "ScarceRegMII=" << ScarceRegMII << "\n");
+  return ScarceRegMII;
+}
+
 bool PostPipeliner::computeLoopCarriedParameters() {
 
   // Initialize slot counts.
@@ -581,12 +662,37 @@ const char *getEdgeColor(SDep::Kind Kind) {
   return "gray";
 }
 
+// Returns edge attributes string including label (latency + register) and
+// color.
+std::string edgeAttributes(const SDep &Dep, const TargetRegisterInfo *TRI) {
+  std::string Label = std::to_string(Dep.getSignedLatency());
+  switch (Dep.getKind()) {
+  case SDep::Data:
+  case SDep::Output:
+  case SDep::Anti: {
+    const Register Reg = Dep.getReg();
+    if (Reg.isPhysical()) {
+      Label += " ";
+      Label += TRI->getName(Reg);
+    } else if (Reg.isVirtual()) {
+      Label += " VR";
+      Label += std::to_string(Register::virtReg2Index(Reg));
+    }
+    break;
+  }
+  case SDep::Order:
+    break;
+  }
+  return "[label=\"" + Label + "\", color=" + getEdgeColor(Dep.getKind()) + "]";
+}
+
 void dumpGraph(const ScheduleInfo &Info, ScheduleDAGInstrs *DAG) {
   dbgs() << "digraph {\n";
   const auto *TRI = DAG->MF.getSubtarget().getRegisterInfo();
 
   // Collect backedge sources and destinations for mirroring.
-  SmallVector<std::tuple<int, int, int, SDep::Kind>, 16> Lcds;
+  // Store the full SDep to preserve latency, kind, and register information.
+  SmallVector<std::tuple<int, int, SDep>, 16> Lcds;
   SmallSet<int, 16> LcdSrc;
   SmallSet<int, 16> LcdDst;
 
@@ -603,7 +709,7 @@ void dumpGraph(const ScheduleInfo &Info, ScheduleDAGInstrs *DAG) {
       }
       // This is a backedge from S to D in the next iteration.
       // Add it to the Lcds, and register src and dst nodes.
-      Lcds.emplace_back(S, D0, Dep.getSignedLatency(), Dep.getKind());
+      Lcds.emplace_back(S, D0, Dep);
       LcdSrc.insert(S);
       LcdDst.insert(D0);
     }
@@ -629,43 +735,26 @@ void dumpGraph(const ScheduleInfo &Info, ScheduleDAGInstrs *DAG) {
            << "\"]\n";
   }
 
-  for (const auto &[Src, Dst, Latency, Kind] : Lcds) {
+  // Emit loop-carried dependency edges (mirror edges).
+  for (const auto &[Src, Dst, Dep] : Lcds) {
+    const std::string Attrs = edgeAttributes(Dep, TRI);
     // Create an edge from the split source to the destination.
-    dbgs() << format("\tSU%d_src -> SU%d [label=%d, color=%s]\n", Src, Dst,
-                     Latency, getEdgeColor(Kind));
-    // Create an edge from the source to the split destination
-    dbgs() << format("\tSU%d -> SU%d_dst [label=%d, color=%s]\n", Src, Dst,
-                     Latency, getEdgeColor(Kind));
+    dbgs() << format("\tSU%d_src -> SU%d ", Src, Dst) << Attrs << "\n";
+    // Create an edge from the source to the split destination.
+    dbgs() << format("\tSU%d -> SU%d_dst ", Src, Dst) << Attrs << "\n";
   }
 
+  // Emit regular (intra-iteration) edges.
   for (int K = 0; K < Info.NInstr; K++) {
-    auto &SU = DAG->SUnits[K];
-    for (auto &Dep : SU.Succs) {
-      auto *Succ = Dep.getSUnit();
+    const SUnit &SU = DAG->SUnits[K];
+    for (const SDep &Dep : SU.Succs) {
+      const SUnit *Succ = Dep.getSUnit();
       const int S = Succ->NodeNum;
-      if (S > Info.NInstr || S % Info.NInstr == K || Succ->isBoundaryNode()) {
+      if (S >= Info.NInstr || S % Info.NInstr == K || Succ->isBoundaryNode()) {
         continue;
       }
-
-      dbgs() << "\tSU" << K << " -> " << "SU" << S;
-      dbgs() << " [ label=\"" << Dep.getSignedLatency();
-      switch (Dep.getKind()) {
-      case SDep::Data:
-      case SDep::Output:
-      case SDep::Anti: {
-        const Register Reg = Dep.getReg();
-        if (Reg.isPhysical()) {
-          dbgs() << format(" %s ", TRI->getName(Reg));
-        } else {
-          dbgs() << format(" VR%d ", Register::virtReg2Index(Reg));
-        }
-        break;
-      }
-      case SDep::Order:
-        break;
-      }
-      dbgs() << "\" color=" << getEdgeColor(Dep.getKind()) << " ] ";
-      dbgs() << "\n";
+      dbgs() << "\tSU" << K << " -> SU" << S << " " << edgeAttributes(Dep, TRI)
+             << "\n";
     }
   }
   dbgs() << "}\n";
@@ -764,6 +853,7 @@ int PostPipeliner::mostUrgent(PostPipelinerStrategy &Strategy) {
 
 void PostPipeliner::resetSchedule(bool FullReset) {
   Scoreboard.clear();
+  EventSched.clear();
   int K = 0;
   for (auto &N : Info.Nodes) {
     N.reset(FullReset);
@@ -835,6 +925,9 @@ bool PostPipeliner::scheduleFirstIteration(PostPipelinerStrategy &Strategy) {
     scheduleNode(SU, Actual, Strategy);
     Info.commitCycle(N);
 
+    // Populate event schedule for this representative instruction
+    Interpreter.addInstructionEvents(*SU.getInstr(), Actual, EventSched);
+
     DEBUG_FULL(dbgs() << "Scoreboard\n"; Scoreboard.dumpFull(););
   }
 
@@ -868,6 +961,7 @@ int computeEarliestFromPreds(const SUnit &SU, const ScheduleInfo &Info) {
   return Earliest;
 }
 #endif
+
 } // namespace
 
 bool PostPipeliner::scheduleOtherIterations(PostPipelinerStrategy &Strategy) {
@@ -953,6 +1047,47 @@ bool PostPipeliner::scheduleOtherIterations(PostPipelinerStrategy &Strategy) {
   return true;
 }
 
+bool PostPipeliner::tryScarceRangePacking() {
+  // Check applicability: get the cached most promising scarce range set.
+  const auto &ScarceRangePtrs = RegTracker.getMostPromisingScarceRanges();
+
+  // If no scarce ranges found, this approach is not applicable.
+  if (ScarceRangePtrs.empty()) {
+    return false;
+  }
+
+  // Build ScarceRange objects from the RegLiveRange pointers.
+  std::vector<ScarceRange> ScarceRanges;
+  ScarceRanges.reserve(ScarceRangePtrs.size());
+  for (const RegLiveRange *LR : ScarceRangePtrs) {
+    ScarceRanges.emplace_back(*LR, *DAG);
+  }
+
+  // Build the scarce-only DAG.
+  buildScarceDAG(ScarceRanges, Info, *DAG);
+
+  // The scarce-only DAG must be acyclic by construction (strict ordering of
+  // uses/defs on the same physreg).
+  assert(checkAcyclic(ScarceRanges) &&
+         "Scarce-only DAG must be acyclic by construction");
+
+  // Create the strategy once (precomputes predecessors and members).
+  BurstMostUrgentStrategy Strategy(*DAG, Info, ScarceRanges, MinLength + II);
+
+  // Enumerate orders and try scheduling with different orderings.
+  return enumerateRangeOrders(
+      ScarceRanges, [this, &Strategy](const SmallVector<int, 4> &Order) {
+        // Reset before each attempt.
+        resetSchedule(/*FullReset=*/false);
+
+        // Initialize the strategy with this order.
+        Strategy.init(Order);
+
+        // Try scheduling with this strategy.
+        return scheduleWithStrategy(Strategy);
+      });
+}
+
 bool PostPipeliner::scheduleWithStrategy(PostPipelinerStrategy &S) {
   DEBUG_SUMMARY(dbgs() << "Starting " << S.name() << "\n");
   if (!scheduleFirstIteration(S)) {
@@ -971,6 +1106,10 @@ bool PostPipeliner::scheduleWithStrategy(PostPipelinerStrategy &S) {
   Info.applyRotation(II);
   Info.resetRotation();
 
+  if (!tryAllocateRegisters()) {
+    return false;
+  }
+  DEBUG_SUMMARY(dbgs() << "   Register allocation successful\n");
   return true;
 }
 
@@ -1247,6 +1386,15 @@ static const ConfigStrategy::Configuration Heuristics[] = {
 
 bool PostPipeliner::tryApproaches() {
   DEBUG_SUMMARY(dbgs() << "-- MinLength=" << MinLength << "\n");
+
+  // Try scarce range packing approach (VRegMode only).
+  if (RegTracker.areRegistersVirtualized()) {
+    if (tryScarceRangePacking()) {
+      DEBUG_SUMMARY(dbgs() << "    Scarce range packing succeeded\n");
+      return true;
+    }
+  }
+
   int HeuristicIndex = 0;
   for (const auto &Config : Heuristics) {
     if (Heuristic >= 0 && Heuristic != HeuristicIndex++) {
@@ -1379,9 +1527,9 @@ bool PostPipeliner::applySolver(const SolverData &Data, SWPSolver &Solver,
 
 bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval,
                              MachineOptimizationRemarkEmitter &More) {
-
   II = InitiationInterval;
   DAG = &TheDAG;
+  DEBUG_SUMMARY(dbgs() << format("PSBEGIN II=%d\n", II));
 
   // We need to set up a scoreboard that gives us some look-ahead.
   // The look-ahead is used heuristically, to see conflicts with future
@@ -1412,25 +1560,99 @@ bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval,
              << "Longest circuit does not fit II." << ore::NV("II", II)
              << ore::NV("BasicBlock", BB->getName());
     });
+    DEBUG_SUMMARY(dbgs() << "PSEND\n");
     return false;
   }
+
+  // Check scarce register MII (VRegMode only).
+  if (RegTracker.areRegistersVirtualized()) {
+    const int ScarceRegMII = computeScarceRegMII();
+    if (II < ScarceRegMII) {
+      More.emit([&]() {
+        return MachineOptimizationRemarkMissed("postpipeliner", "schedule",
+                                               DbgLoc, BB)
+               << "Scarce register pressure does not fit II."
+               << ore::NV("II", II) << ore::NV("ScarceRegMII", ScarceRegMII)
+               << ore::NV("BasicBlock", BB->getName());
+      });
+      DEBUG_SUMMARY(dbgs() << "PSEND\n");
+      return false;
+    }
+  }
   LLVM_DEBUG(dumpIntervals(Info, MinLength, II));
   if (!tryApproaches()) {
     More.emit([&]() {
       return MachineOptimizationRemarkMissed("postpipeliner", "schedule",
                                              DbgLoc, BB)
-             << "No schedule found.";
+             << "No schedule found with register allocation.";
     });
-    LLVM_DEBUG(dbgs() << "PostPipeliner: No schedule found\n");
+    LLVM_DEBUG(
+        dbgs()
+        << "PostPipeliner: No schedule found with register allocation\n");
+    DEBUG_SUMMARY(dbgs() << "PSEND\n");
     return false;
   }
 
   More.emit([&]() {
     return MachineOptimizationRemark("postpipeliner", "schedule", DbgLoc, BB)
-           << "Schedule found" << ore::NV("NS", NStages) << ore::NV("II", II)
+           << "Schedule found with register allocation"
+           << ore::NV("NS", NStages) << ore::NV("II", II)
            << ore::NV("BasicBlock", BB->getName());
   });
+
   LLVM_DEBUG(dbgs() << "PostPipeliner: Success\n");
+  DEBUG_SUMMARY(dbgs() << "PSEND\n");
+  return true;
+}
+
+bool PostPipeliner::tryAllocateRegisters() {
+  // In physical mode, registers are not virtualized and no allocation is needed
+  // This is a trivial allocation that always succeeds
+  if (!RegTracker.areRegistersVirtualized()) {
+    LLVM_DEBUG(
+        dbgs() << "PostPipeliner: Physical mode - no allocation needed\n");
+    return true;
+  }
+
+  auto &MF = *DAG->getBB()->getParent();
+  auto &MRI = MF.getRegInfo();
+  const auto &ST = MF.getSubtarget();
+  const auto *TRI = ST.getRegisterInfo();
+
+  // Compute modulo live lanes from the event schedule populated during
+  // scheduling
+  auto LiveLanesByVirtReg = Interpreter.buildLiveLanes(EventSched, II);
+
+  // Debug dump if requested.
+  DEBUG_WITH_TYPE("aie-postregalloc", {
+    dbgs() << "\n=== Live Intervals ===\n";
+    Interpreter.dumpEventSchedule(EventSched, dbgs());
+    dbgs() << "\n";
+    Interpreter.dumpLiveLanes(LiveLanesByVirtReg, II, dbgs());
+    dbgs() << "=================================\n\n";
+  });
+
+  // Perform register allocation.
+  DenseMap<Register, MCRegister> VRegToPhysReg;
+  const bool Success = AIEPostRegAlloc::allocate(
+      LiveLanesByVirtReg, II, RegTracker, MF, *TRI, MRI, VRegToPhysReg);
+
+  if (!Success) {
+    LLVM_DEBUG(dbgs() << "PostPipeliner: Register allocation failed\n");
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << "PostPipeliner: Register allocation succeeded with "
+                    << VRegToPhysReg.size() << " assignments\n");
+
+  // Apply the register assignments through RegTracker
+  // This properly handles the virtualization state and updates the
+  // MachineFunction
+  RegTracker.rewriteToPhysRegs(VRegToPhysReg);
+
+  LLVM_DEBUG(dbgs() << "PostPipeliner: Applied register allocation through "
+                       "RegTracker\n");
+
   return true;
 }
 
@@ -1520,14 +1742,14 @@ bool PostPipeliner::checkStages() {
 }
 
 void PostPipeliner::visitPipelineSection(
-    PipelineScheduleVisitor &Visitor, int StageCount,
+    PipelineScheduleVisitor &Visitor, int Repeat,
     std::function<bool(const NodeInfo &Node, int Stage, int M)> Filter) const {
 
-  // This runs StageCount times across the original body instructions and
+  // This runs Repeat times across the original body instructions and
   // calls the bundle emission callbacks according to Filter.
   // It provide the stage and the modulo cycle in that stage
   // (both starting at zero) to the filter
-  for (int Stage = 0; Stage < StageCount; Stage++) {
+  for (int Stage = 0; Stage < Repeat; Stage++) {
     for (int M = 0; M < II; M++) {
       Visitor.startBundle();
       for (int K = 0; K < NInstr; K++) {
@@ -1593,6 +1815,28 @@ int PostPipeliner::getFinalMinTripCount() const {
   return MinTripCount - Delta;
 }
 
+void PostPipeliner::materializePipeline(PipelineScheduleVisitor &Visitor) {
+  // A schedule NS=N, II=L is compatible with NS=1, II=N*L. We provide an
+  // actual implementation of such less dense schedules, since it can provide
+  // debugging insights.
+  if (ForcedStageCount && NStages % ForcedStageCount == 0 &&
+      NPrologueStages == NStages - 1) {
+    // Fix the II, recompute ModuloCycle and Stage, fix stagecount and
+    // prologue stages count
+    const int Factor = NStages / ForcedStageCount;
+    II *= Factor;
+    for (int K = 0; K < NInstr; K++) {
+      auto &Node = Info[K];
+      Node.update(II);
+    }
+    NStages = ForcedStageCount;
+    NPrologueStages = NStages - 1;
+  }
+
+  visitPipelineSchedule(Visitor);
+  updateTripCount();
+}
+
 void NodeInfo::reset(bool FullReset) {
   Cycle = 0;
   Scheduled = false;
diff --git a/llvm/lib/Target/AIE/AIEPostPipeliner.h b/llvm/lib/Target/AIE/AIEPostPipeliner.h
index d664a79700dc..96740f559d78 100644
--- a/llvm/lib/Target/AIE/AIEPostPipeliner.h
+++ b/llvm/lib/Target/AIE/AIEPostPipeliner.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_AIE_AIEPOSTPIPELINER_H
 
 #include "AIEHazardRecognizer.h"
+#include "AIEScheduleInterpreter.h"
 #include "AIESlotCounts.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/ResourceScoreboard.h"
@@ -27,7 +28,12 @@ class AIEHazardRecognizer;
 class MachineOptimizationRemarkEmitter;
 } // namespace llvm
 
+namespace llvm {
+class RegLiveRangeTracker; // Forward declaration
+}
+
 namespace llvm::AIE {
+
 namespace Solver {
 class SolverData;
 class SWPSolver;
@@ -220,9 +226,16 @@ class PipelineScheduleVisitor {
 
 class PostPipeliner {
   const AIEHazardRecognizer &HR;
+  RegLiveRangeTracker &RegTracker;
   ScheduleDAGMI *DAG = nullptr;
   const AIEBaseInstrInfo *TII = nullptr;
 
+  // Schedule interpreter for computing modulo live ranges
+  AIEScheduleInterpreter Interpreter;
+
+  // Event schedule populated during scheduling
+  EventSchedule EventSched;
+
   int FirstUnscheduled = 0;
   int LastUnscheduled = -1;
 
@@ -288,6 +301,7 @@ class PostPipeliner {
   void computeForward();
   bool computeBackward();
   void computeRecMII();
+  int computeScarceRegMII();
 
   /// Given Earliest and Latest of each node in the first iteration,
   /// compute the smallest length of the linear schedule that is feasible.
@@ -323,13 +337,24 @@ class PostPipeliner {
   /// Top level strategy scheduler
   bool scheduleWithStrategy(PostPipelinerStrategy &Strategy);
 
+  /// Try to schedule scarce ranges by enumerating orders and using
+  /// BurstMostUrgentStrategy.
+  /// Checks applicability, finds scarce ranges, and attempts scheduling.
+  /// Returns true if scheduling succeeded, false otherwise.
+  bool tryScarceRangePacking();
+
   /// Reset dynamic scheduling data.
   /// If FullReset is set, also reset information collected from earlier
   /// data mining scheduling rounds.
   void resetSchedule(bool FullReset);
 
+  /// Try to allocate registers for the current schedule
+  /// Returns true if register allocation succeeds
+  bool tryAllocateRegisters();
+
 public:
-  PostPipeliner(const AIEHazardRecognizer &HR, int NInstr);
+  PostPipeliner(const AIEHazardRecognizer &HR, int NInstr,
+                RegLiveRangeTracker &RegTracker, const MachineFunction &MF);
 
   /// Check whether this is a suitable loop for the PostPipeliner. It also
   /// leaves some useful information.
@@ -360,12 +385,14 @@ class PostPipeliner {
   // It will not call the section delimitor methods.
   // \param Filter will decide on calling Visitor.addToBundle().
   void visitPipelineSection(
-      PipelineScheduleVisitor &Visitor, int StageCount,
+      PipelineScheduleVisitor &Visitor, int Repeat,
       std::function<bool(const NodeInfo &Node, int Stage, int M)> Filter) const;
 
   // Modify the tripcount to run StageCount-1 less iterations.
   void updateTripCount() const;
 
+  void materializePipeline(PipelineScheduleVisitor &Visitor);
+
   int getFinalMinTripCount() const;
 
   void dump() const;
diff --git a/llvm/lib/Target/AIE/AIEPostRegAlloc.cpp b/llvm/lib/Target/AIE/AIEPostRegAlloc.cpp
new file mode 100644
index 000000000000..e7f291c4b135
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIEPostRegAlloc.cpp
@@ -0,0 +1,581 @@
+//===- AIEPostRegAlloc.cpp - Post-scheduling register allocator ----------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2024-2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a post-scheduling register allocator for AIE targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AIEPostRegAlloc.h"
+#include "AIELivenessVector.h"
+#include "AIERegDefUseTracker.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <vector>
+
+#define DEBUG_TYPE "aie-postregalloc"
+
+using namespace llvm;
+using namespace llvm::AIE;
+
+// Initialize allocation state and compute interference graphs.
+void AIEPostRegAlloc::AllocState::init(
+    const TargetRegisterInfo *InTRI,
+    const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVReg,
+    const RegLiveRangeTracker *RegTracker, const MachineRegisterInfo &MRI) {
+  this->RegUnitOccupancy.clear();
+  this->PhysOccupancy.clear();
+  this->TRI = InTRI;
+
+  const auto &AvailableRegs = RegTracker->getAvailablePhysRegs();
+
+  // Build register class interference graph once.
+  // Iterate over LiveRanges to get register class IDs.
+  DenseSet<unsigned> UsedRCIds;
+  for (const RegLiveRange &LR : RegTracker->getLiveRanges()) {
+    if (const TargetRegisterClass *RC = LR.getRegisterClass())
+      UsedRCIds.insert(RC->getID());
+  }
+  this->RCInterferenceGraph =
+      AIEPostRegAlloc::buildRCInterferenceGraph(UsedRCIds, *InTRI);
+
+  // Build virtual register interference graph once.
+  this->VRegInterferenceGraph = AIEPostRegAlloc::buildVRegInterferenceGraph(
+      LiveLanesByVReg, MRI, RCInterferenceGraph);
+
+  // Pre-compute metrics for all LiveRanges.
+  this->AllMetrics.clear();
+  for (const RegLiveRange &LR : RegTracker->getLiveRanges()) {
+    const unsigned VReg = LR.getVReg().id();
+    auto It = LiveLanesByVReg.find(VReg);
+    if (It == LiveLanesByVReg.end())
+      continue;
+    const AIE::LivenessVector &Masks = It->second;
+    AllMetrics[VReg] = AIEPostRegAlloc::computeMetrics(
+        LR, Masks, VRegInterferenceGraph, LiveLanesByVReg, RCInterferenceGraph,
+        AvailableRegs, MRI, *InTRI);
+  }
+}
+
+// Check if VReg can be placed in PhysReg without conflicts.
+bool AIEPostRegAlloc::AllocState::canPlace(
+    unsigned VReg, Register PhysReg, const AIE::LivenessVector &VRegMasks,
+    const TargetRegisterClass *RC) const {
+
+  // Check RegUnit conflicts - this handles aliasing automatically.
+  // Two registers interfere if they share any RegUnits.
+  for (MCRegUnitIterator Units(PhysReg.asMCReg(), TRI); Units.isValid();
+       ++Units) {
+    unsigned Unit = *Units;
+    auto It = RegUnitOccupancy.find(Unit);
+    if (It != RegUnitOccupancy.end()) {
+      // This RegUnit is occupied. Check if it conflicts with our VRegMasks.
+      const auto &UnitOcc = It->second;
+      if (VRegMasks.overlaps(UnitOcc)) {
+        LLVM_DEBUG(dbgs() << "  RegUnit conflict detected for "
+                          << printReg(VReg, TRI) << " in "
+                          << printReg(PhysReg, TRI) << " (unit " << Unit
+                          << ")\n");
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+// Place VReg in PhysReg (updates occupancy).
+void AIEPostRegAlloc::AllocState::place(unsigned VReg, Register PhysReg,
+                                        const AIE::LivenessVector &VRegMasks,
+                                        const TargetRegisterClass *RC) {
+
+  // Update lane mask occupancy for the specific register (for compatibility).
+  PhysOccupancy[PhysReg] |= VRegMasks;
+
+  // Update RegUnit occupancy - this automatically handles aliasing.
+  unsigned NumUnits = 0;
+  for (MCRegUnitIterator Units(PhysReg.asMCReg(), TRI); Units.isValid();
+       ++Units) {
+    RegUnitOccupancy[*Units] |= VRegMasks;
+    NumUnits++;
+  }
+
+  LLVM_DEBUG(dbgs() << "  Placed " << printReg(VReg, TRI) << " in "
+                    << printReg(PhysReg, TRI) << " (updated " << NumUnits
+                    << " RegUnits)\n");
+}
+
+// Build register class interference graph with asymmetric weights.
+AIEPostRegAlloc::WeightedAsymmetricGraph
+AIEPostRegAlloc::buildRCInterferenceGraph(const DenseSet<unsigned> &UsedRCIds,
+                                          const TargetRegisterInfo &TRI) {
+  WeightedAsymmetricGraph Graph;
+
+  // Check all ordered pairs of register classes.
+  for (unsigned RCId1 : UsedRCIds) {
+    const TargetRegisterClass *RC1 = TRI.getRegClass(RCId1);
+
+    for (unsigned RCId2 : UsedRCIds) {
+      if (RCId1 == RCId2)
+        continue;
+
+      const TargetRegisterClass *RC2 = TRI.getRegClass(RCId2);
+      unsigned RC2Size = std::distance(RC2->begin(), RC2->end());
+
+      // Count how many RC1 registers are blocked by each RC2 register.
+      // For asymmetric weight: if I allocate one register from RC2,
+      // how many RC1 registers become unavailable on average?
+      unsigned TotalRC1Blocked = 0;
+
+      for (MCPhysReg Reg2 : *RC2) {
+        unsigned RC1BlockedByThisReg2 = 0;
+        for (MCPhysReg Reg1 : *RC1) {
+          if (TRI.regsOverlap(Reg1, Reg2)) {
+            RC1BlockedByThisReg2++;
+          }
+        }
+        TotalRC1Blocked += RC1BlockedByThisReg2;
+      }
+
+      if (TotalRC1Blocked > 0) {
+        // Weight = average number of RC1 registers blocked per RC2 register.
+        // Scale by 100 to preserve precision.
+        // This gives asymmetric weights:
+        // - eY -> VEC512: each VEC512 blocks ~0.5 eY registers
+        // - VEC512 -> eY: each eY blocks ~2 VEC512 registers
+        unsigned Weight = (TotalRC1Blocked * 100) / RC2Size;
+        // Ensure minimum weight of 1 for any overlap.
+        Weight = std::max(1u, Weight);
+        Graph.addInterference(RCId1, RCId2, Weight);
+
+        LLVM_DEBUG(dbgs() << "RC interference: " << TRI.getRegClassName(RC1)
+                          << " -> " << TRI.getRegClassName(RC2)
+                          << " weight=" << Weight << " (avg " << TotalRC1Blocked
+                          << "/" << RC2Size << ")\n");
+      }
+    }
+  }
+
+  return Graph;
+}
+
+// Build virtual register interference graph (symmetric).
+AIEPostRegAlloc::WeightedSymmetricGraph
+AIEPostRegAlloc::buildVRegInterferenceGraph(
+    const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVReg,
+    const MachineRegisterInfo &MRI,
+    const WeightedAsymmetricGraph &RCInterferenceGraph) {
+
+  WeightedSymmetricGraph Graph;
+
+  // Build a vector of VRegs for iteration (to ensure consistent ordering).
+  std::vector<unsigned> VRegs;
+  for (const auto &[VReg, _] : LiveLanesByVReg) {
+    VRegs.push_back(VReg);
+  }
+
+  // Check all pairs of virtual registers.
+  // Use symmetry: only check pairs where I < J.
+  for (size_t I = 0; I < VRegs.size(); ++I) {
+    unsigned VReg1 = VRegs[I];
+    const auto &Masks1 = LiveLanesByVReg.find(VReg1)->second;
+    unsigned RCId1 = MRI.getRegClass(VReg1)->getID();
+
+    for (size_t J = I + 1; J < VRegs.size(); ++J) {
+      unsigned VReg2 = VRegs[J];
+      const auto &Masks2 = LiveLanesByVReg.find(VReg2)->second;
+      unsigned RCId2 = MRI.getRegClass(VReg2)->getID();
+
+      // First check if their register classes can interfere.
+      if (!RCInterferenceGraph.interferes(RCId1, RCId2))
+        continue;
+
+      // Then check if their live ranges overlap temporally.
+      if (Masks1.overlaps(Masks2)) {
+        Graph.addInterference(VReg1, VReg2);
+      }
+    }
+  }
+
+  return Graph;
+}
+
+// Compute metrics for a live range.
+AIEPostRegAlloc::VRegMetrics AIEPostRegAlloc::computeMetrics(
+    const RegLiveRange &LR, const AIE::LivenessVector &Masks,
+    const WeightedSymmetricGraph &VRegInterferenceGraph,
+    const DenseMap<unsigned, AIE::LivenessVector> &AllVRegs,
+    const WeightedAsymmetricGraph &RCInterferenceGraph,
+    const DenseSet<MCRegister> &AvailableRegs, const MachineRegisterInfo &MRI,
+    const TargetRegisterInfo &TRI) {
+  VRegMetrics Metrics = {0, 0, 0, 0, 0, 0};
+
+  const Register VReg = LR.getVReg();
+
+  // Compute basic metrics.
+  for (const auto &Mask : Masks.getElements()) {
+    if (Mask.any()) {
+      unsigned LanesInCycle = Mask.getNumLanes();
+      Metrics.TotalLanes += LanesInCycle;
+      Metrics.MaxWidth = std::max(Metrics.MaxWidth, LanesInCycle);
+      Metrics.Duration++;
+    }
+  }
+
+  // Compute pure and aliasing interference degrees.
+  // Use the register class from the LiveRange.
+  const TargetRegisterClass *RC = LR.getRegisterClass();
+  unsigned RCId = RC->getID();
+
+  for (const auto &[OtherVReg, _] : AllVRegs) {
+    if (OtherVReg != VReg &&
+        VRegInterferenceGraph.interferes(VReg, OtherVReg)) {
+      // For interference with other VRegs, we still need MRI to look up
+      // their register class. A future optimization could pass a map
+      // from VReg to LiveRange to avoid this MRI dependency.
+      const TargetRegisterClass *OtherRC = MRI.getRegClass(OtherVReg);
+      unsigned OtherRCId = OtherRC->getID();
+
+      if (RCId == OtherRCId) {
+        // Same register class - pure interference.
+        Metrics.PureInterferenceDegree++;
+      } else if (RCInterferenceGraph.interferes(RCId, OtherRCId)) {
+        // Different but overlapping register classes - aliasing interference.
+        // Use asymmetric weight: how much does OtherVReg's class affect
+        // VReg's class?
+        unsigned Weight =
+            RCInterferenceGraph.getInterferenceWeight(RCId, OtherRCId);
+        Metrics.AliasingInterferenceDegree += Weight;
+      }
+    }
+  }
+
+  // Count available registers using per-LR AdmissibleRegs.
+  std::vector<Register> Candidates =
+      getCandidatePhysRegs(LR.getAdmissibleRegs(), AvailableRegs);
+  Metrics.NumAvailableRegs = Candidates.size();
+
+  return Metrics;
+}
+
+// Get allocatable physical registers for a live range.
+// Returns the intersection of AdmissibleRegs (semantic constraint from
+// instruction encoding) and AvailableRegs (global availability).
+std::vector<Register> AIEPostRegAlloc::getCandidatePhysRegs(
+    const DenseSet<MCRegister> &AdmissibleRegs,
+    const DenseSet<MCRegister> &AvailableRegs) {
+
+  std::vector<Register> Candidates;
+
+  // Return the intersection of admissible and available registers.
+  // AdmissibleRegs represents the semantic constraint from the LiveRange.
+  // AvailableRegs represents the global set of registers available for
+  // reallocation.
+  for (MCRegister PhysReg : AdmissibleRegs) {
+    if (AvailableRegs.count(PhysReg)) {
+      Candidates.push_back(PhysReg);
+    }
+  }
+
+  return Candidates;
+}
+
+// Try to allocate using a specific scoring function for ordering.
+AIEPostRegAlloc::AllocResult AIEPostRegAlloc::tryAllocate(
+    const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVReg,
+    const RegLiveRangeTracker *RegTracker, const TargetRegisterInfo &TRI,
+    const MachineRegisterInfo &MRI, AllocState &State, ScoringFunction ScoreFn,
+    DenseMap<Register, MCRegister> &OutAssign) {
+
+  // Clear per-attempt state.
+  State.RegUnitOccupancy.clear();
+  State.PhysOccupancy.clear();
+  OutAssign.clear();
+
+  const auto &AvailableRegs = RegTracker->getAvailablePhysRegs();
+
+  // Build sorted list of LiveRanges by difficulty.
+  struct LRInfo {
+    const RegLiveRange *LR;
+    unsigned VReg;
+    unsigned Score;
+    const AIE::LivenessVector *Masks;
+  };
+
+  // Score and collect LiveRanges using pre-computed metrics from State.
+  std::vector<LRInfo> LRInfos;
+  for (const RegLiveRange &LR : RegTracker->getLiveRanges()) {
+    const unsigned VReg = LR.getVReg().id();
+    auto It = LiveLanesByVReg.find(VReg);
+    if (It == LiveLanesByVReg.end())
+      continue;
+
+    LRInfo Info;
+    Info.LR = &LR;
+    Info.VReg = VReg;
+    Info.Score = ScoreFn(State.AllMetrics[VReg]);
+    Info.Masks = &It->second;
+    LRInfos.push_back(Info);
+  }
+
+  // Sort by descending score (hardest first).
+  // Use VReg as tiebreaker for deterministic ordering when scores are equal.
+  llvm::sort(LRInfos, [](const LRInfo &A, const LRInfo &B) {
+    if (A.Score != B.Score)
+      return A.Score > B.Score;
+    return A.VReg < B.VReg;
+  });
+
+  // Try to allocate each LiveRange.
+  for (const auto &Info : LRInfos) {
+    const RegLiveRange &LR = *Info.LR;
+    const unsigned VReg = Info.VReg;
+    const auto &VRegMasks = *Info.Masks;
+    const TargetRegisterClass *RC = LR.getRegisterClass();
+    const auto &Metrics = State.AllMetrics[VReg];
+
+    LLVM_DEBUG(dbgs() << "Allocating " << printReg(VReg, &TRI) << " class="
+                      << TRI.getRegClassName(RC) << " (score=" << Info.Score
+                      << ", available=" << Metrics.NumAvailableRegs
+                      << ", pure_int=" << Metrics.PureInterferenceDegree
+                      << ", alias_int=" << Metrics.AliasingInterferenceDegree
+                      << ")\n");
+
+    // Check for infeasible schedule: pure interference >= available registers.
+    // This is a global failure - no scoring function can fix this.
+    if (Metrics.PureInterferenceDegree >= Metrics.NumAvailableRegs) {
+      LLVM_DEBUG(dbgs() << "  Infeasible schedule detected: pure interference ("
+                        << Metrics.PureInterferenceDegree
+                        << ") >= available registers ("
+                        << Metrics.NumAvailableRegs << ")\n");
+      return AllocResult(/*InfeasibleSchedule=*/true);
+    }
+
+    // Get candidate physical registers using AdmissibleRegs from LiveRange.
+    std::vector<Register> Candidates =
+        getCandidatePhysRegs(LR.getAdmissibleRegs(), AvailableRegs);
+
+    if (Candidates.empty()) {
+      LLVM_DEBUG(dbgs() << "  No candidates available!\n");
+      return AllocResult(/*InfeasibleSchedule=*/false);
+    }
+
+    // Try to find a suitable physical register (first-fit).
+    Register ChosenPhys = Register();
+
+    for (Register PhysReg : Candidates) {
+      LLVM_DEBUG(dbgs() << "  Trying " << printReg(PhysReg, &TRI) << "\n");
+      if (State.canPlace(VReg, PhysReg, VRegMasks, RC)) {
+        ChosenPhys = PhysReg;
+        break;
+      }
+    }
+
+    if (!ChosenPhys.isValid()) {
+      LLVM_DEBUG(dbgs() << "  Failed to find suitable physreg!\n");
+      return AllocResult(/*InfeasibleSchedule=*/false);
+    }
+
+    // Place the VReg and record in output.
+    State.place(VReg, ChosenPhys, VRegMasks, RC);
+    OutAssign[Register(VReg)] = ChosenPhys.asMCReg();
+  }
+
+  LLVM_DEBUG(dbgs() << "Allocation succeeded with " << OutAssign.size()
+                    << " assignments\n");
+  return AllocResult();
+}
+
+// Dump virtual register metrics for debugging.
+void AIEPostRegAlloc::dumpVRegMetrics(
+    const DenseMap<unsigned, VRegMetrics> &AllMetrics,
+    const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) {
+
+  dbgs() << "=== Virtual Register Metrics Dump ===\n";
+  dbgs() << "Total Virtual Registers: " << AllMetrics.size() << "\n\n";
+
+  // Collect and sort VRegs for consistent output.
+  std::vector<std::pair<unsigned, VRegMetrics>> VRegMetricsList;
+  for (const auto &[VReg, Metrics] : AllMetrics) {
+    VRegMetricsList.push_back({VReg, Metrics});
+  }
+
+  // Sort by VReg number for consistent output.
+  llvm::sort(VRegMetricsList, [](const auto &A, const auto &B) {
+    return Register::virtReg2Index(A.first) < Register::virtReg2Index(B.first);
+  });
+
+  // Print header.
+  dbgs() << "VReg      RegClass                 Avail  Pure  Alias  "
+            "TotalLanes  MaxWidth  Duration\n";
+  dbgs() << "--------  -----------------------  -----  ----  -----  "
+            "----------  --------  --------\n";
+
+  // Print metrics for each VReg.
+  for (const auto &[VReg, Metrics] : VRegMetricsList) {
+    const TargetRegisterClass *RC = MRI.getRegClass(VReg);
+    const char *Status =
+        (Metrics.PureInterferenceDegree >= Metrics.NumAvailableRegs) ? " FAIL"
+                                                                     : "";
+    dbgs() << format("%%vreg%-4u  %-23s  %5u  %4u  %5u  %10u  %8u  %8u%s\n",
+                     Register::virtReg2Index(VReg), TRI.getRegClassName(RC),
+                     Metrics.NumAvailableRegs, Metrics.PureInterferenceDegree,
+                     Metrics.AliasingInterferenceDegree, Metrics.TotalLanes,
+                     Metrics.MaxWidth, Metrics.Duration, Status);
+  }
+
+  // Print summary statistics.
+  dbgs() << "\n=== Summary Statistics ===\n";
+
+  // Compute aggregate statistics.
+  unsigned TotalLanesSum = 0;
+  unsigned MaxWidthMax = 0;
+  unsigned MaxDuration = 0;
+  unsigned MaxPureInterferenceDegree = 0;
+  unsigned MaxAliasingInterferenceDegree = 0;
+  double AvgPureInterferenceDegree = 0.0;
+  double AvgAliasingInterferenceDegree = 0.0;
+
+  for (const auto &[_, Metrics] : VRegMetricsList) {
+    TotalLanesSum += Metrics.TotalLanes;
+    MaxWidthMax = std::max(MaxWidthMax, Metrics.MaxWidth);
+    MaxDuration = std::max(MaxDuration, Metrics.Duration);
+    MaxPureInterferenceDegree =
+        std::max(MaxPureInterferenceDegree, Metrics.PureInterferenceDegree);
+    MaxAliasingInterferenceDegree = std::max(
+        MaxAliasingInterferenceDegree, Metrics.AliasingInterferenceDegree);
+    AvgPureInterferenceDegree += Metrics.PureInterferenceDegree;
+    AvgAliasingInterferenceDegree += Metrics.AliasingInterferenceDegree;
+  }
+
+  if (!VRegMetricsList.empty()) {
+    AvgPureInterferenceDegree /= VRegMetricsList.size();
+    AvgAliasingInterferenceDegree /= VRegMetricsList.size();
+  }
+
+  dbgs() << "Total Lanes (sum):              " << TotalLanesSum << "\n";
+  dbgs() << "Max Width (max):                " << MaxWidthMax << "\n";
+  dbgs() << "Max Duration:                   " << MaxDuration << "\n";
+  dbgs() << "Max Pure Interference Degree:   " << MaxPureInterferenceDegree
+         << "\n";
+  dbgs() << "Max Aliasing Interference Deg:  " << MaxAliasingInterferenceDegree
+         << "\n";
+  dbgs() << format("Avg Pure Interference Degree:   %.2f\n",
+                   AvgPureInterferenceDegree);
+  dbgs() << format("Avg Aliasing Interference Deg:  %.2f\n",
+                   AvgAliasingInterferenceDegree);
+
+  // Count register classes used.
+  DenseMap<const TargetRegisterClass *, unsigned> RCCounts;
+  for (const auto &[VReg, _] : VRegMetricsList) {
+    RCCounts[MRI.getRegClass(VReg)]++;
+  }
+
+  dbgs() << "\n=== Register Class Distribution ===\n";
+  std::vector<std::pair<const TargetRegisterClass *, unsigned>> RCCountVec;
+  for (const auto &[RC, Count] : RCCounts) {
+    RCCountVec.push_back({RC, Count});
+  }
+  llvm::sort(RCCountVec, [](const auto &A, const auto &B) {
+    // Sort by count descending.
+    return A.second > B.second;
+  });
+
+  for (const auto &[RC, Count] : RCCountVec) {
+    dbgs() << format("  %-25s: %u\n", TRI.getRegClassName(RC), Count);
+  }
+
+  dbgs() << "\n=== End Virtual Register Metrics ===\n\n";
+}
+
+// Main allocation entry point.
+bool AIEPostRegAlloc::allocate(
+    const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVReg, int II,
+    RegLiveRangeTracker &RegTracker, const MachineFunction &MF,
+    const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI,
+    DenseMap<Register, MCRegister> &OutAssign) {
+
+  LLVM_DEBUG(dbgs() << "AIEPostRegAlloc::allocate for "
+                    << LiveLanesByVReg.size() << " vregs, II=" << II << "\n");
+
+  if (LiveLanesByVReg.empty()) {
+    LLVM_DEBUG(dbgs() << "No vregs to allocate\n");
+    return true;
+  }
+
+  LLVM_DEBUG(dbgs() << "Available " << RegTracker.getAvailablePhysRegs().size()
+                    << " physical registers\n");
+
+  // Initialize allocation state with interference graphs computed once.
+  AllocState State;
+  State.init(&TRI, LiveLanesByVReg, &RegTracker, MRI);
+
+  // Dump virtual register metrics when debug output is enabled.
+  LLVM_DEBUG(dumpVRegMetrics(State.AllMetrics, MRI, TRI));
+
+  // Define the allocation strategies to try.
+  struct AllocationStrategy {
+    const char *Name;
+    ScoringFunction ScoreFn;
+  };
+
+  std::vector<AllocationStrategy> Strategies = {
+      // Try scarce register class priority scoring first.
+      {"scarce register class scoring", scoreByScarceRegClass},
+      // Try interference-based scoring (graph coloring inspired).
+      {"interference degree scoring", scoreByInterference},
+      // Try with area+width scoring (original).
+      {"area+width scoring", scoreByAreaPlusWidth},
+      // Try with pure area scoring.
+      {"area scoring", scoreByArea},
+      // Try with width-priority scoring.
+      {"width scoring", scoreByWidth},
+      // Try with duration scoring.
+      {"duration scoring", scoreByDuration},
+      // Try a custom non-linear scoring function.
+      {"quadratic width scoring",
+       [](const VRegMetrics &M) {
+         if (M.PureInterferenceDegree >= M.NumAvailableRegs)
+           return UINT_MAX;
+         // Quadratic penalty for width, linear for duration.
+         return M.MaxWidth * M.MaxWidth + M.Duration;
+       }},
+  };
+
+  // Try each strategy in order.
+  for (const auto &Strategy : Strategies) {
+    LLVM_DEBUG(dbgs() << "Trying allocation with " << Strategy.Name << "\n");
+
+    AllocResult Result = tryAllocate(LiveLanesByVReg, &RegTracker, TRI, MRI,
+                                     State, Strategy.ScoreFn, OutAssign);
+
+    if (Result) {
+      LLVM_DEBUG(dbgs() << "Allocation succeeded with " << Strategy.Name
+                        << "\n");
+      return true;
+    }
+
+    LLVM_DEBUG(dbgs() << Strategy.Name << " failed\n");
+
+    // If the schedule is infeasible, no other scoring function will succeed.
+    if (Result.isInfeasibleSchedule()) {
+      LLVM_DEBUG(dbgs() << "Schedule is infeasible - skipping remaining "
+                        << "allocation strategies\n");
+      break;
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "All allocation attempts failed\n");
+  return false;
+}
diff --git a/llvm/lib/Target/AIE/AIEPostRegAlloc.h b/llvm/lib/Target/AIE/AIEPostRegAlloc.h
new file mode 100644
index 000000000000..63ccd3c7625a
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIEPostRegAlloc.h
@@ -0,0 +1,320 @@
+//===- AIEPostRegAlloc.h - Post-scheduling register allocator ------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a post-scheduling register allocator for AIE targets.
+// It performs modulo-aware register allocation for pipelined loops and can
+// also be used for non-loop blocks. The allocator is transactional and does
+// not spill - it returns false if allocation fails.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AIE_AIEPOSTREGALLOC_H
+#define LLVM_LIB_TARGET_AIE_AIEPOSTREGALLOC_H
+
+#include "AIELivenessVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegister.h"
+#include <functional>
+#include <vector>
+
+namespace llvm {
+
+class MachineFunction;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+class TargetRegisterClass;
+class RegLiveRangeTracker;
+class RegLiveRange;
+
+namespace AIE {
+
+/// Post-scheduling register allocator for AIE targets.
+///
+/// This allocator performs modulo-aware register allocation using lane masks
+/// to track sub-register liveness. It properly handles physical register
+/// aliasing, ensuring that allocating a register blocks all its aliases
+/// (sub-registers and super-registers). It is transactional (does not modify
+/// MRI until a complete solution is found) and does not spill (returns false
+/// if allocation fails).
+class AIEPostRegAlloc {
+private:
+  /// Interference graph with configurable weight type and symmetry.
+  /// @tparam WeightT Type of edge weights (bool for simple, unsigned for
+  ///                 weighted).
+  /// @tparam IsSymmetric Whether the graph is symmetric (undirected) or
+  ///                     asymmetric (directed).
+  template <typename WeightT = bool, bool IsSymmetric = true>
+  class InterferenceGraph {
+    // For symmetric graphs, store upper triangle; for asymmetric, store
+    // full matrix. Key is (from, to) pair - order matters for asymmetric.
+    DenseMap<std::pair<unsigned, unsigned>, WeightT> Edges;
+
+  public:
+    /// Add an interference edge with optional weight.
+    /// For symmetric graphs, order doesn't matter.
+    /// For asymmetric graphs, this is the weight from A to B.
+    void addInterference(unsigned A, unsigned B, WeightT Weight = WeightT(1)) {
+      if constexpr (IsSymmetric) {
+        if (A > B)
+          std::swap(A, B);
+      }
+      Edges[std::make_pair(A, B)] = Weight;
+    }
+
+    /// Check if A and B interfere.
+    bool interferes(unsigned A, unsigned B) const {
+      if (A == B)
+        return true; // A node interferes with itself.
+      if constexpr (IsSymmetric) {
+        if (A > B)
+          std::swap(A, B);
+      }
+      auto It = Edges.find(std::make_pair(A, B));
+      if constexpr (std::is_same_v<WeightT, bool>) {
+        return It != Edges.end() && It->second;
+      } else {
+        return It != Edges.end() && It->second > 0;
+      }
+    }
+
+    /// Get the weight of interference from A to B.
+    /// For asymmetric graphs, this is directional.
+    WeightT getInterferenceWeight(unsigned A, unsigned B) const {
+      if (A == B)
+        return WeightT(0); // No weight for self-interference.
+      if constexpr (IsSymmetric) {
+        if (A > B)
+          std::swap(A, B);
+      }
+      auto It = Edges.find(std::make_pair(A, B));
+      return (It != Edges.end()) ? It->second : WeightT(0);
+    }
+  };
+
+  // Type aliases for common use cases.
+  using SimpleSymmetricGraph = InterferenceGraph<bool, true>;
+  using WeightedSymmetricGraph = InterferenceGraph<unsigned, true>;
+  using WeightedAsymmetricGraph = InterferenceGraph<unsigned, false>;
+
+  /// Pre-computed metrics for a virtual register.
+  struct VRegMetrics {
+    // Sum of lanes across all cycles.
+    unsigned TotalLanes;
+    // Maximum lanes in any single cycle.
+    unsigned MaxWidth;
+    // Number of cycles where register is live.
+    unsigned Duration;
+    // Number of other VRegs in the SAME register class that interfere.
+    unsigned PureInterferenceDegree;
+    // Weighted interference from VRegs in aliasing register classes.
+    unsigned AliasingInterferenceDegree;
+    // Number of available registers in this register class.
+    unsigned NumAvailableRegs;
+  };
+
+  /// Result of an allocation attempt.
+  /// Default construction indicates success.
+  /// Construction with bool parameter indicates failure (true = infeasible).
+  class AllocResult {
+    bool Success = true;
+    bool InfeasibleSchedule = false;
+
+  public:
+    // Default constructor - indicates success.
+    AllocResult() = default;
+
+    // Constructor for failure cases.
+    // InfeasibleSchedule=true means no scoring function can succeed.
+    // InfeasibleSchedule=false means this scoring function failed but another
+    // might work.
+    explicit AllocResult(bool InfeasibleSchedule)
+        : Success(false), InfeasibleSchedule(InfeasibleSchedule) {}
+
+    // Check if the schedule is provably infeasible.
+    bool isInfeasibleSchedule() const { return InfeasibleSchedule; }
+
+    // Implicit conversion to bool - true if allocation succeeded.
+    operator bool() const { return Success; }
+  };
+
+  /// Internal allocation state with RegUnit-based interference tracking.
+  struct AllocState {
+    /// RegUnit occupancy - tracks lane masks for each register unit.
+    /// RegUnits are the fundamental units of register interference in LLVM.
+    /// Two registers interfere if they share any RegUnits.
+    DenseMap<unsigned /*RegUnit*/, AIE::LivenessVector> RegUnitOccupancy;
+
+    /// Physical register occupancy - tracks lane masks for each allocated
+    /// physical register (kept for compatibility with existing code).
+    DenseMap<Register, AIE::LivenessVector> PhysOccupancy;
+
+    /// Pre-computed interference graphs (reused across scoring attempts).
+    WeightedAsymmetricGraph RCInterferenceGraph;
+    WeightedSymmetricGraph VRegInterferenceGraph;
+
+    /// Pre-computed metrics for all LiveRanges (reused across scoring
+    /// attempts). Keyed by VReg since there is a 1:1 mapping.
+    DenseMap<unsigned, VRegMetrics> AllMetrics;
+
+    /// Target register info for RegUnit computation.
+    const TargetRegisterInfo *TRI = nullptr;
+
+    /// Initialize occupancy and compute interference graphs.
+    /// The RegTracker provides the problem description (LiveRanges,
+    /// AvailableRegs, AdmissibleRegs per LR). LiveLanesByVReg provides the
+    /// temporal liveness data computed during scheduling.
+    void init(const TargetRegisterInfo *TRI,
+              const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVReg,
+              const RegLiveRangeTracker *RegTracker,
+              const MachineRegisterInfo &MRI);
+
+    /// Check if VReg can be placed in PhysReg without conflicts.
+    /// This checks RegUnit conflicts to handle aliasing properly.
+    bool canPlace(unsigned VReg, Register PhysReg,
+                  const AIE::LivenessVector &VRegMasks,
+                  const TargetRegisterClass *RC) const;
+
+    /// Place VReg in PhysReg (updates RegUnit occupancy).
+    void place(unsigned VReg, Register PhysReg,
+               const AIE::LivenessVector &VRegMasks,
+               const TargetRegisterClass *RC);
+  };
+
+  /// Scoring function type - takes pre-computed metrics and returns a score.
+  using ScoringFunction = std::function<unsigned(const VRegMetrics &)>;
+
+public:
+  /// Allocate physical registers for virtual registers.
+  ///
+  /// \param LiveLanesByVReg Map from virtual register to per-cycle lane masks.
+  /// \param II Initiation interval for pipelined loops (>= 1).
+  ///        For non-pipelined blocks, use 0 or the schedule length.
+  /// \param RegTracker RegLiveRangeTracker providing register information.
+  /// \param MF Machine function being processed.
+  /// \param TRI Target register info.
+  /// \param MRI Machine register info (not modified).
+  /// \param OutAssign Output map from virtual to physical registers.
+  /// \return True if allocation succeeded, false if no solution found.
+  static bool
+  allocate(const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVirtReg,
+           int II, RegLiveRangeTracker &RegTracker, const MachineFunction &MF,
+           const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI,
+           DenseMap<Register /*VReg*/, MCRegister /*Phys*/> &OutAssign);
+
+private:
+  /// Try to allocate using a specific scoring function for ordering.
+  /// Returns AllocResult which implicitly converts to bool (true = success).
+  /// On success, OutAssign contains the virtual to physical register mapping.
+  /// The RegTracker provides the problem description (LiveRanges,
+  /// AvailableRegs, AdmissibleRegs per LR).
+  static AllocResult
+  tryAllocate(const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVReg,
+              const RegLiveRangeTracker *RegTracker,
+              const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI,
+              AllocState &State, ScoringFunction ScoreFn,
+              DenseMap<Register, MCRegister> &OutAssign);
+
+  /// Compute metrics for a live range.
+  /// \param LR The live range to compute metrics for.
+  /// \param Masks The lane masks for this live range.
+  /// \param VRegInterferenceGraph Pre-computed virtual register interference
+  ///                               graph.
+  /// \param AllVRegs All virtual registers to compute degree against.
+  /// \param RCInterferenceGraph Register class interference graph with
+  ///                            weights.
+  /// \param AvailableRegs Available physical registers.
+  /// \param MRI Machine register info (for looking up other VRegs' RCs).
+  /// \param TRI Target register info.
+  static VRegMetrics
+  computeMetrics(const RegLiveRange &LR, const AIE::LivenessVector &Masks,
+                 const WeightedSymmetricGraph &VRegInterferenceGraph,
+                 const DenseMap<unsigned, AIE::LivenessVector> &AllVRegs,
+                 const WeightedAsymmetricGraph &RCInterferenceGraph,
+                 const DenseSet<MCRegister> &AvailableRegs,
+                 const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI);
+
+  /// Build register class interference graph with asymmetric weights.
+  static WeightedAsymmetricGraph
+  buildRCInterferenceGraph(const DenseSet<unsigned> &UsedRCIds,
+                           const TargetRegisterInfo &TRI);
+
+  /// Build virtual register interference graph (symmetric).
+  static WeightedSymmetricGraph buildVRegInterferenceGraph(
+      const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVirtReg,
+      const MachineRegisterInfo &MRI,
+      const WeightedAsymmetricGraph &RCInterferenceGraph);
+
+  /// Predefined scoring functions.
+  /// All return infinite score when pure degree >= available registers.
+  static unsigned scoreByArea(const VRegMetrics &M) {
+    if (M.PureInterferenceDegree >= M.NumAvailableRegs)
+      return UINT_MAX;
+    return M.TotalLanes;
+  }
+  static unsigned scoreByWidth(const VRegMetrics &M) {
+    if (M.PureInterferenceDegree >= M.NumAvailableRegs)
+      return UINT_MAX;
+    return M.MaxWidth;
+  }
+  static unsigned scoreByDuration(const VRegMetrics &M) {
+    if (M.PureInterferenceDegree >= M.NumAvailableRegs)
+      return UINT_MAX;
+    return M.Duration;
+  }
+  static unsigned scoreByAreaPlusWidth(const VRegMetrics &M) {
+    if (M.PureInterferenceDegree >= M.NumAvailableRegs)
+      return UINT_MAX;
+    return M.TotalLanes * 10 + M.MaxWidth;
+  }
+  // Score by interference degree - considers both pure and aliasing.
+  static unsigned scoreByInterference(const VRegMetrics &M) {
+    if (M.PureInterferenceDegree >= M.NumAvailableRegs)
+      return UINT_MAX;
+    // Pure interference is critical, aliasing interference is secondary.
+    return M.PureInterferenceDegree * 1000 + M.AliasingInterferenceDegree * 10 +
+           M.TotalLanes;
+  }
+  // Score prioritizing scarce register classes (fewer available registers).
+  // Register classes with fewer available registers get HIGHER scores,
+  // so they are allocated FIRST, giving them first pick of registers.
+  static unsigned scoreByScarceRegClass(const VRegMetrics &M) {
+    if (M.PureInterferenceDegree >= M.NumAvailableRegs)
+      return UINT_MAX;
+    // Fewer available registers = higher scarceness bonus.
+    // This ensures scarce register classes are allocated first.
+    // Use a large multiplier to make this the dominant factor.
+    unsigned ScarcenessBonus = (100 - M.NumAvailableRegs) * 10000;
+    // Add interference as secondary factor.
+    unsigned InterferenceScore = M.PureInterferenceDegree * 1000 +
+                                 M.AliasingInterferenceDegree * 10 +
+                                 M.TotalLanes;
+    return ScarcenessBonus + InterferenceScore;
+  }
+
+  /// Get allocatable physical registers for a live range.
+  /// Returns the intersection of AdmissibleRegs (semantic constraint from
+  /// instruction encoding) and AvailableRegs (global availability).
+  static std::vector<Register>
+  getCandidatePhysRegs(const DenseSet<MCRegister> &AdmissibleRegs,
+                       const DenseSet<MCRegister> &AvailableRegs);
+
+  /// Dump virtual register metrics for debugging.
+  static void dumpVRegMetrics(const DenseMap<unsigned, VRegMetrics> &AllMetrics,
+                              const MachineRegisterInfo &MRI,
+                              const TargetRegisterInfo &TRI);
+};
+
+} // namespace AIE
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AIE_AIEPOSTREGALLOC_H
diff --git a/llvm/lib/Target/AIE/AIERegDefUseTracker.cpp b/llvm/lib/Target/AIE/AIERegDefUseTracker.cpp
new file mode 100644
index 000000000000..212e577c561c
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIERegDefUseTracker.cpp
@@ -0,0 +1,1780 @@
+//===- AIERegDefUseTracker.cpp - Track Register Live Ranges --------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tracking and analysis of register live ranges in a
+// MachineBasicBlock. The tracker performs the following:
+// - Identifies register definitions and uses that form live ranges
+// - Merges aliasing register accesses into unified live ranges
+// - Filters out unsafe ranges (tied operands, live-in/out, implicit uses)
+// - Computes appropriate register classes for each live range
+// - Optionally replaces physical registers with virtual registers for testing
+//
+//===----------------------------------------------------------------------===//
+
+#include "AIERegDefUseTracker.h"
+#include "AIEBaseInstrInfo.h"
+#include "AIEBaseRegisterInfo.h"
+#include "Utils/AIEMachineInstrPrint.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aie-reg-liverange"
+
+using namespace llvm;
+
+namespace {
+
+/// Check if a register overlaps with a RegisterMaskPair (live-in/out entry).
+/// Currently uses conservative full-register overlap; lane mask support can
+/// be added later.
+bool overlapsRMP(MCRegister Reg, const MachineBasicBlock::RegisterMaskPair &RMP,
+                 const TargetRegisterInfo *TRI) {
+  return TRI->regsOverlap(Reg, RMP.PhysReg);
+}
+
+} // end anonymous namespace
+
+void RegLiveRange::dumpBrief(const TargetRegisterInfo *TRI) const {
+  StringRef Name =
+      (BaseReg != MCRegister::NoRegister) ? TRI->getName(BaseReg) : "unknown";
+
+  dbgs() << "  - LR#" << ID << " Base=" << Name << " defs=" << getNumDefs()
+         << " uses=" << getNumUses();
+
+  if (IsReserved) {
+    dbgs() << " [RESERVED]";
+  }
+
+  // Print first def if available
+  if (!Defs.empty()) {
+    const MachineInstr *MI = Defs[0].getOperand()->getParent();
+    assert(MI && "Def operand must have a parent instruction");
+    dbgs() << " firstDef: " << AIE::NoDebug(*MI);
+  }
+
+  dbgs() << "\n";
+}
+
+static cl::opt<std::string> ExcludeLiveRangesByRegClass(
+    "aie-exclude-liveranges-by-regclass", cl::Hidden, cl::init(""),
+    cl::desc("[AIE] Exclude live ranges of the specified register class name. "
+             "Empty string means no filtering."));
+
+static cl::opt<bool> AddUnusedCallerSavedRegs(
+    "aie-add-unused-caller-saved-regs", cl::Hidden, cl::init(false),
+    cl::desc("[AIE] Add unused caller-saved registers to the available "
+             "register pool for pipelining. Only safe when loops with calls "
+             "are excluded from pipelining."));
+
+RegLiveRangeTracker::RegLiveRangeTracker(MachineBasicBlock &MBB)
+    : MF(MBB.getParent()), TRI(MF->getSubtarget().getRegisterInfo()),
+      TII(static_cast<const AIEBaseInstrInfo *>(
+          MF->getSubtarget().getInstrInfo())) {
+  assert(MF && "MachineFunction cannot be null");
+  assert(TRI && "TargetRegisterInfo cannot be null");
+  assert(TII && "TargetInstrInfo cannot be null");
+}
+
+void RegLiveRange::addDef(MachineOperand *DefOp, unsigned SubRegIdx) {
+  Defs.emplace_back(DefOp, SubRegIdx);
+}
+
+void RegLiveRange::addUse(MachineOperand *UseOp, unsigned SubRegIdx) {
+  Uses.emplace_back(UseOp, SubRegIdx);
+}
+
+void RegLiveRange::mergeFrom(const RegLiveRange &Other,
+                             const TargetRegisterInfo *TRI) {
+  // Helper to compute sub-register index.
+  auto GetSubRegIdx = [TRI](MCRegister AccessReg,
+                            MCRegister NewBaseReg) -> unsigned {
+    if (AccessReg == NewBaseReg)
+      return 0;
+    for (MCSubRegIndexIterator SubRegIdxIt(NewBaseReg, TRI);
+         SubRegIdxIt.isValid(); ++SubRegIdxIt) {
+      if (SubRegIdxIt.getSubReg() == AccessReg) {
+        return SubRegIdxIt.getSubRegIndex();
+      }
+    }
+    return 0;
+  };
+
+  // Helper to check if Reg1 is a sub-register of Reg2 (Reg2 is larger).
+  auto IsSubReg = [TRI](MCRegister Reg1, MCRegister Reg2) -> bool {
+    for (MCSubRegIndexIterator SubRegIdxIt(Reg2, TRI); SubRegIdxIt.isValid();
+         ++SubRegIdxIt) {
+      if (SubRegIdxIt.getSubReg() == Reg1) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  // Helper to check if a candidate register contains all operand registers.
+  // A register R "contains" an operand register OR if OR == R or OR is a
+  // sub-register of R.
+  auto ContainsAllOperands =
+      [&IsSubReg](MCRegister Candidate,
+                  ArrayRef<MCRegister> OperandRegs) -> bool {
+    for (MCRegister OpReg : OperandRegs) {
+      if (OpReg != Candidate && !IsSubReg(OpReg, Candidate)) {
+        return false;
+      }
+    }
+    return true;
+  };
+
+  // Collect all operand registers from both ranges.
+  SmallVector<MCRegister, 8> AllOperandRegs;
+  for (const auto &DefInfo : Defs) {
+    AllOperandRegs.push_back(DefInfo.getOperand()->getReg().asMCReg());
+  }
+  for (const auto &UseInfo : Uses) {
+    AllOperandRegs.push_back(UseInfo.getOperand()->getReg().asMCReg());
+  }
+  for (const auto &DefInfo : Other.Defs) {
+    AllOperandRegs.push_back(DefInfo.getOperand()->getReg().asMCReg());
+  }
+  for (const auto &UseInfo : Other.Uses) {
+    AllOperandRegs.push_back(UseInfo.getOperand()->getReg().asMCReg());
+  }
+
+  // Compute the new base register: the smallest register that contains all
+  // operand registers. Start with the current base registers as candidates.
+  MCRegister NewBaseReg = BaseReg;
+  if (NewBaseReg == MCRegister::NoRegister) {
+    NewBaseReg = Other.BaseReg;
+  } else if (Other.BaseReg != MCRegister::NoRegister) {
+    // Check if we need to update to a larger base register.
+    if (IsSubReg(NewBaseReg, Other.BaseReg)) {
+      NewBaseReg = Other.BaseReg;
+    }
+  }
+
+  // If the current NewBaseReg doesn't contain all operands (e.g., sibling
+  // registers like cml4 and cmh4), find the smallest common super-register.
+  if (NewBaseReg != MCRegister::NoRegister &&
+      !ContainsAllOperands(NewBaseReg, AllOperandRegs)) {
+    // Search for the smallest super-register that contains all operands.
+    // We iterate through super-registers of NewBaseReg in ascending order
+    // (MCSuperRegIterator yields them from smallest to largest).
+    for (MCSuperRegIterator SuperIt(NewBaseReg, TRI); SuperIt.isValid();
+         ++SuperIt) {
+      if (ContainsAllOperands(*SuperIt, AllOperandRegs)) {
+        NewBaseReg = *SuperIt;
+        break;
+      }
+    }
+  }
+
+  // Re-add existing operands with updated sub-register indices if base
+  // changed.
+  if (NewBaseReg != BaseReg) {
+    SmallVector<RegOperandInfo, 4> OldDefs = std::move(Defs);
+    SmallVector<RegOperandInfo, 4> OldUses = std::move(Uses);
+    Defs.clear();
+    Uses.clear();
+
+    for (const auto &DefInfo : OldDefs) {
+      const MCRegister DefReg = DefInfo.getOperand()->getReg().asMCReg();
+      Defs.emplace_back(DefInfo.getOperand(), GetSubRegIdx(DefReg, NewBaseReg));
+    }
+    for (const auto &UseInfo : OldUses) {
+      const MCRegister UseReg = UseInfo.getOperand()->getReg().asMCReg();
+      Uses.emplace_back(UseInfo.getOperand(), GetSubRegIdx(UseReg, NewBaseReg));
+    }
+
+    BaseReg = NewBaseReg;
+  }
+
+  // Merge defs from Other with computed sub-register indices.
+  for (const auto &DefInfo : Other.defs()) {
+    const MCRegister DefReg = DefInfo.getOperand()->getReg().asMCReg();
+    Defs.emplace_back(DefInfo.getOperand(), GetSubRegIdx(DefReg, NewBaseReg));
+  }
+
+  // Merge uses from Other with computed sub-register indices.
+  for (const auto &UseInfo : Other.uses()) {
+    const MCRegister UseReg = UseInfo.getOperand()->getReg().asMCReg();
+    Uses.emplace_back(UseInfo.getOperand(), GetSubRegIdx(UseReg, NewBaseReg));
+  }
+
+  // Propagate reserved status: if Other is reserved, this becomes reserved.
+  if (Other.IsReserved) {
+    IsReserved = true;
+  }
+}
+
+void RegLiveRange::expandBaseToInclude(MCRegister ExtReg,
+                                       const TargetRegisterInfo *TRI) {
+  if (ExtReg == MCRegister::NoRegister)
+    return;
+
+  // Helper to compute sub-register index.
+  auto GetSubRegIdx = [TRI](MCRegister AccessReg,
+                            MCRegister NewBaseReg) -> unsigned {
+    if (AccessReg == NewBaseReg)
+      return 0;
+    for (MCSubRegIndexIterator SubRegIdxIt(NewBaseReg, TRI);
+         SubRegIdxIt.isValid(); ++SubRegIdxIt) {
+      if (SubRegIdxIt.getSubReg() == AccessReg) {
+        return SubRegIdxIt.getSubRegIndex();
+      }
+    }
+    return 0;
+  };
+
+  // Helper to check if Reg1 is a sub-register of Reg2 (Reg2 is larger).
+  auto IsSubReg = [TRI](MCRegister Reg1, MCRegister Reg2) -> bool {
+    for (MCSubRegIndexIterator SubRegIdxIt(Reg2, TRI); SubRegIdxIt.isValid();
+         ++SubRegIdxIt) {
+      if (SubRegIdxIt.getSubReg() == Reg1) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  // If BaseReg is not set, just use ExtReg.
+  if (BaseReg == MCRegister::NoRegister) {
+    BaseReg = ExtReg;
+    return;
+  }
+
+  // If ExtReg is already contained by BaseReg, nothing to do.
+  if (ExtReg == BaseReg || IsSubReg(ExtReg, BaseReg))
+    return;
+
+  // If BaseReg is contained by ExtReg, upgrade to ExtReg.
+  if (IsSubReg(BaseReg, ExtReg)) {
+    // Recompute SubRegIdx for existing operands.
+    SmallVector<RegOperandInfo, 4> OldDefs = std::move(Defs);
+    SmallVector<RegOperandInfo, 4> OldUses = std::move(Uses);
+    Defs.clear();
+    Uses.clear();
+
+    for (const auto &DefInfo : OldDefs) {
+      const MCRegister DefReg = DefInfo.getOperand()->getReg().asMCReg();
+      Defs.emplace_back(DefInfo.getOperand(), GetSubRegIdx(DefReg, ExtReg));
+    }
+    for (const auto &UseInfo : OldUses) {
+      const MCRegister UseReg = UseInfo.getOperand()->getReg().asMCReg();
+      Uses.emplace_back(UseInfo.getOperand(), GetSubRegIdx(UseReg, ExtReg));
+    }
+
+    BaseReg = ExtReg;
+    return;
+  }
+
+  // Neither is a subreg of the other - find the smallest common super-register.
+  // Collect all operand registers plus ExtReg.
+  SmallVector<MCRegister, 8> AllRegs;
+  AllRegs.push_back(ExtReg);
+  for (const auto &DefInfo : Defs) {
+    AllRegs.push_back(DefInfo.getOperand()->getReg().asMCReg());
+  }
+  for (const auto &UseInfo : Uses) {
+    AllRegs.push_back(UseInfo.getOperand()->getReg().asMCReg());
+  }
+
+  // Helper to check if a candidate register contains all registers.
+  auto ContainsAll = [&IsSubReg](MCRegister Candidate,
+                                 ArrayRef<MCRegister> Regs) -> bool {
+    for (MCRegister R : Regs) {
+      if (R != Candidate && !IsSubReg(R, Candidate)) {
+        return false;
+      }
+    }
+    return true;
+  };
+
+  // Search for the smallest super-register that contains all.
+  MCRegister NewBaseReg = BaseReg;
+  for (MCSuperRegIterator SuperIt(BaseReg, TRI); SuperIt.isValid(); ++SuperIt) {
+    if (ContainsAll(*SuperIt, AllRegs)) {
+      NewBaseReg = *SuperIt;
+      break;
+    }
+  }
+
+  // Recompute SubRegIdx for existing operands.
+  if (NewBaseReg != BaseReg) {
+    SmallVector<RegOperandInfo, 4> OldDefs = std::move(Defs);
+    SmallVector<RegOperandInfo, 4> OldUses = std::move(Uses);
+    Defs.clear();
+    Uses.clear();
+
+    for (const auto &DefInfo : OldDefs) {
+      const MCRegister DefReg = DefInfo.getOperand()->getReg().asMCReg();
+      Defs.emplace_back(DefInfo.getOperand(), GetSubRegIdx(DefReg, NewBaseReg));
+    }
+    for (const auto &UseInfo : OldUses) {
+      const MCRegister UseReg = UseInfo.getOperand()->getReg().asMCReg();
+      Uses.emplace_back(UseInfo.getOperand(), GetSubRegIdx(UseReg, NewBaseReg));
+    }
+
+    BaseReg = NewBaseReg;
+  }
+}
+
+void RegLiveRange::clear() {
+  Defs.clear();
+  Uses.clear();
+  BaseReg = MCRegister::NoRegister;
+  RegisterClass = nullptr;
+  AdmissibleRegs.clear();
+  VReg = Register();
+  IsScarce = false;
+  IsReserved = false;
+  ID = -1;
+}
+
+/// Get the sub-register index if AccessReg is a sub-register of BaseReg.
+/// Returns 0 if AccessReg is not a sub-register of BaseReg.
+unsigned RegLiveRangeTracker::getSubRegIndex(MCRegister AccessReg,
+                                             MCRegister BaseReg) const {
+  if (AccessReg == BaseReg)
+    return 0;
+
+  // Check if AccessReg is a sub-register of BaseReg
+  for (MCSubRegIndexIterator SubRegIdxIt(BaseReg, TRI); SubRegIdxIt.isValid();
+       ++SubRegIdxIt) {
+    if (SubRegIdxIt.getSubReg() == AccessReg) {
+      return SubRegIdxIt.getSubRegIndex();
+    }
+  }
+
+  return 0;
+}
+
+bool RegLiveRangeTracker::overlapsAnyInSet(
+    MCRegister Reg, const DenseSet<MCRegister> &RegSet) const {
+  for (MCRegister R : RegSet) {
+    if (TRI->regsOverlap(Reg, R))
+      return true;
+  }
+  return false;
+}
+
+bool RegLiveRangeTracker::isFullyDefined(
+    const RegLiveRange &LR,
+    const DenseMap<MCRegister, LaneBitmask> &LocalLiveLaneMasks,
+    const MachineBasicBlock &MBB) const {
+  // A live range is fully defined if its algorithm-local live lanemasks
+  // do not intersect with the live-in set of the block.
+  //
+  // This is more precise than just checking register overlap: it allows
+  // ranges where the live lanes are disjoint from the live-in lanes.
+  //
+  // Importantly, this can discriminate between a truly undefined register
+  // (which is not in the live-in set and is safe to virtualize) and a
+  // register that was defined outside of the loop (which is in the live-in
+  // set and should be rejected because changing it would affect loop-carried
+  // values).
+
+  // Check each register in LocalLiveLaneMasks that overlaps with the base
+  // register.
+  for (const auto &[LiveReg, LocalLanes] : LocalLiveLaneMasks) {
+    if (!TRI->regsOverlap(LR.getBaseReg(), LiveReg))
+      continue;
+
+    // Found an overlapping register with non-zero live lanes.
+    // Check if these lanes intersect with the live-in set.
+    for (const auto &LiveIn : MBB.liveins()) {
+      if (!TRI->regsOverlap(LiveReg, LiveIn.PhysReg))
+        continue;
+
+      // Check if the algorithm-local live lanes intersect with the live-in
+      // lanes.
+      if ((LocalLanes & LiveIn.LaneMask).any()) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool RegLiveRangeTracker::hasTiedOperands(const RegLiveRange &LR) const {
+  assert(TII);
+
+  // Check if any operand in this live range is tied
+  for (const auto &Def : LR.defs()) {
+    MachineOperand *MO = Def.getOperand();
+    if (MO->isTied())
+      return true;
+
+    MachineInstr *MI = MO->getParent();
+    assert(MI);
+
+    // Get the operand index for this def
+    unsigned OpIdx = MO->getOperandNo();
+
+    // Check AIE-specific tied register info
+    const auto TiedInfo = TII->getTiedRegInfo(*MI);
+    for (const auto &TiedSet : TiedInfo) {
+      // Check if this operand is in the destination operands of a tied set
+      for (const auto &DstOp : TiedSet.DstOps) {
+        if (DstOp.OpIdx == OpIdx)
+          return true;
+      }
+      // Check if this operand is in the source operands of a tied set
+      for (const auto &SrcOp : TiedSet.SrcOps) {
+        if (SrcOp.OpIdx == OpIdx)
+          return true;
+      }
+    }
+
+    const MCRegister R = MO->getReg().asMCReg();
+    const int DefIdx = MI->findRegisterDefOperandIdx(R, TRI);
+    if (DefIdx >= 0 && MI->isRegTiedToUseOperand(DefIdx))
+      return true;
+  }
+
+  // Also check uses for tied operands
+  for (const auto &Use : LR.uses()) {
+    MachineOperand *MO = Use.getOperand();
+
+    // Get the operand index for this use
+    unsigned OpIdx = MO->getOperandNo();
+
+    MachineInstr *MI = Use.getOperand()->getParent();
+    assert(MI);
+
+    // Check AIE-specific tied register info
+    const auto TiedInfo = TII->getTiedRegInfo(*MI);
+    for (const auto &TiedSet : TiedInfo) {
+      // Check if this operand is in the source operands of a tied set
+      for (const auto &SrcOp : TiedSet.SrcOps) {
+        if (SrcOp.OpIdx == OpIdx)
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+void RegLiveRangeTracker::pruneByFullCoverage() {
+  LLVM_DEBUG(dbgs() << "\nPrune by full coverage: " << LiveRanges.size()
+                    << " ranges before pruning\n");
+
+  // We run this in a fixed point loop, since pruning a range may uncover ranges
+  // that were previously covered by it.
+  bool Changed = true;
+  while (Changed) {
+    Changed = false;
+
+    // Build coverage map from current LiveRanges
+    DenseSet<MachineOperand *> CoveredOps;
+    for (const RegLiveRange &LR : LiveRanges) {
+      for (const auto &R : LR.operands()) {
+        CoveredOps.insert(R.getOperand());
+      }
+    }
+
+    // Check if there are any uncovered operands that alias with this LR's
+    // registers
+    auto HasUncoveredAlias = [&](const DenseSet<MCRegister> &LRRegs,
+                                 MCRegister *SampleUncovered = nullptr) {
+      for (MachineOperand *MO : AllPhysRegOperands) {
+        if (!CoveredOps.contains(MO)) {
+          MCRegister UncoveredReg = MO->getReg().asMCReg();
+          // Check if this uncovered operand aliases with any register in this
+          // LR
+          for (const MCRegister LRReg : LRRegs) {
+            if (TRI->regsOverlap(UncoveredReg, LRReg)) {
+              if (SampleUncovered)
+                *SampleUncovered = UncoveredReg;
+              return true;
+            }
+          }
+        }
+      }
+      return false;
+    };
+
+    // For each live range, check if ALL operands of its register group are
+    // covered
+    SmallVector<RegLiveRange, 16> NewLiveRanges;
+    for (const RegLiveRange &LR : LiveRanges) {
+      // Collect all registers used in this live range
+      DenseSet<MCRegister> LRRegs;
+      for (const auto &R : LR.operands()) {
+        LRRegs.insert(R.getOperand()->getReg().asMCReg());
+      }
+
+      MCRegister SampleUncovered = MCRegister::NoRegister;
+      if (!HasUncoveredAlias(LRRegs, &SampleUncovered)) {
+        NewLiveRanges.push_back(LR);
+      } else {
+        LLVM_DEBUG({
+          dbgs() << "Reject: pruned by full coverage";
+          if (SampleUncovered != MCRegister::NoRegister)
+            dbgs() << " (uncovered alias " << TRI->getName(SampleUncovered)
+                   << ")";
+          dbgs() << ": ";
+          LR.dumpBrief(TRI);
+        });
+        Changed = true;
+      }
+    }
+
+    LiveRanges = std::move(NewLiveRanges);
+  }
+
+  LLVM_DEBUG(dbgs() << "After pruning: " << LiveRanges.size() << " ranges\n");
+
+#ifndef NDEBUG
+  // Verify that all remaining operands are covered
+  DenseSet<MachineOperand *> FinalCoveredOps;
+  for (const RegLiveRange &LR : LiveRanges) {
+    for (const auto &R : LR.operands()) {
+      FinalCoveredOps.insert(R.getOperand());
+    }
+  }
+
+  for (MachineOperand *MO : AllPhysRegOperands) {
+    if (!FinalCoveredOps.contains(MO)) {
+      const MCRegister U = MO->getReg().asMCReg();
+      // Verify no LR overlaps with this uncovered operand
+      for (const RegLiveRange &LR : LiveRanges) {
+        for (const auto &R : LR.operands()) {
+          assert(!TRI->regsOverlap(U, R.getOperand()->getReg().asMCReg()) &&
+                 "Uncovered operand overlaps with kept live range!");
+        }
+      }
+    }
+  }
+#endif
+}
+
+void RegLiveRangeTracker::mergeAliasingLiveRanges(
+    unsigned DefLRIdx, MCRegister DefReg,
+    DenseMap<MCRegister, std::pair<int, LaneBitmask>> &LiveRegs,
+    DenseMap<MachineOperand *, unsigned> &OperandToLiveRange) {
+
+  // Helper to check if a def register's lanes overlap with a live register's
+  // current lanes. This is critical for separating live ranges: after x10 is
+  // defined, any y5 (containing x10) should only have x11's lanes live, and a
+  // subsequent x10 def should NOT merge into that y5 range.
+  auto LanesOverlap = [this](MCRegister DefR, MCRegister LiveR,
+                             LaneBitmask LiveLanes) -> bool {
+    // If registers are equal, check if any lanes are live.
+    if (DefR == LiveR)
+      return LiveLanes.any();
+
+    // Check if DefR is a subreg of LiveR.
+    for (MCSubRegIndexIterator SubIdxIt(LiveR, TRI); SubIdxIt.isValid();
+         ++SubIdxIt) {
+      if (SubIdxIt.getSubReg() == DefR) {
+        // DefR is a subreg of LiveR - check if DefR's lanes are live.
+        const LaneBitmask DefLanes =
+            TRI->getSubRegIndexLaneMask(SubIdxIt.getSubRegIndex());
+        return (LiveLanes & DefLanes).any();
+      }
+    }
+
+    // Check if LiveR is a subreg of DefR.
+    for (MCSubRegIndexIterator SubIdxIt(DefR, TRI); SubIdxIt.isValid();
+         ++SubIdxIt) {
+      if (SubIdxIt.getSubReg() == LiveR) {
+        // LiveR is a subreg of DefR - if any lanes of LiveR are live,
+        // they overlap with DefR.
+        return LiveLanes.any();
+      }
+    }
+
+    // Registers overlap but no subreg relationship - conservatively treat
+    // as overlapping if any lanes are live.
+    return LiveLanes.any();
+  };
+
+  // Collect all aliasing live registers and their live ranges.
+  // Only include registers where the lanes actually overlap.
+  SmallVector<std::pair<MCRegister, int>, 8> AliasingLiveRegs;
+  for (const auto &[LiveReg, Info] : LiveRegs) {
+    if (TRI->regsOverlap(DefReg, LiveReg) &&
+        LanesOverlap(DefReg, LiveReg, Info.second)) {
+      AliasingLiveRegs.push_back({LiveReg, Info.first});
+    }
+  }
+
+  if (AliasingLiveRegs.empty())
+    return;
+
+  // Collect all unique live range indices to merge (excluding NoLiveRange
+  // sentinels which represent live-out registers without actual ranges).
+  SmallVector<unsigned, 4> ToMerge;
+  for (const auto &[LiveReg, LRIdx] : AliasingLiveRegs) {
+    if (LRIdx != RegLiveRange::NoLiveRange) {
+      // Check if we already have this index.
+      if (llvm::find(ToMerge, static_cast<unsigned>(LRIdx)) == ToMerge.end() &&
+          static_cast<unsigned>(LRIdx) != DefLRIdx) {
+        ToMerge.push_back(static_cast<unsigned>(LRIdx));
+      }
+    }
+  }
+
+  // Compute reserved status before merging.
+  // Check if any aliasing live register is a live-out sentinel.
+  bool IsReservedFromLiveOut = false;
+  for (const auto &[LiveReg, LRIdx] : AliasingLiveRegs) {
+    if (LRIdx == RegLiveRange::NoLiveRange) {
+      IsReservedFromLiveOut = true;
+      break;
+    }
+  }
+
+  // Also check if any subreg of DefReg is live-out.
+  if (!IsReservedFromLiveOut) {
+    for (MCSubRegIterator SubIt(DefReg, TRI, /*IncludeSelf=*/true);
+         SubIt.isValid(); ++SubIt) {
+      auto It = LiveRegs.find(*SubIt);
+      if (It != LiveRegs.end() &&
+          It->second.first == RegLiveRange::NoLiveRange) {
+        IsReservedFromLiveOut = true;
+        break;
+      }
+    }
+  }
+
+  // Get the target live range and update its reserved status.
+  RegLiveRange &TargetLR = LiveRanges[DefLRIdx];
+  if (IsReservedFromLiveOut) {
+    TargetLR.setIsReserved(true);
+  }
+
+  // Expand TargetLR's base to include any external registers from
+  // AliasingLiveRegs that don't have actual live ranges (live-out sentinels).
+  // These registers affect the base register size but have no operands.
+  for (const auto &[LiveReg, LRIdx] : AliasingLiveRegs) {
+    if (LRIdx == RegLiveRange::NoLiveRange) {
+      TargetLR.expandBaseToInclude(LiveReg, TRI);
+    }
+  }
+
+  // Incrementally merge all other live ranges into the target.
+  // The enhanced mergeFrom() automatically computes the smallest common
+  // super-register that contains all operands from both ranges.
+  for (unsigned LRIdx : ToMerge) {
+    TargetLR.mergeFrom(LiveRanges[LRIdx], TRI);
+
+    // Clear the source range (mark as invalid).
+    LiveRanges[LRIdx].clear();
+
+    // Update all LiveRegs entries that pointed to the merged range.
+    for (auto &[LiveReg, Info] : LiveRegs) {
+      if (Info.first == static_cast<int>(LRIdx)) {
+        Info.first = static_cast<int>(DefLRIdx);
+      }
+    }
+
+    // Update OperandToLiveRange.
+    for (auto &Entry : OperandToLiveRange) {
+      if (Entry.second == LRIdx) {
+        Entry.second = DefLRIdx;
+      }
+    }
+  }
+
+  // Remove fully redefined registers from LiveRegs.
+  for (const auto &[LiveReg, LRIdx] : AliasingLiveRegs) {
+    if (DefReg == LiveReg || getSubRegIndex(LiveReg, DefReg) != 0) {
+      LiveRegs.erase(LiveReg);
+    }
+  }
+
+  // Update lane masks for partially redefined super-registers.
+  // When DefReg is a subreg of LiveReg, the def kills DefReg's lanes within
+  // LiveReg. This is critical for separating live ranges: after x10 is defined,
+  // any y5 (containing x10) should only have x11's lanes live, not x10's.
+  for (const auto &[LiveReg, OrigLRIdx] : AliasingLiveRegs) {
+    // Skip if already erased (fully redefined).
+    auto LiveIt = LiveRegs.find(LiveReg);
+    if (LiveIt == LiveRegs.end())
+      continue;
+
+    // Check if DefReg is a subreg of LiveReg (DefReg partially kills LiveReg).
+    const unsigned SubRegIdx = getSubRegIndex(DefReg, LiveReg);
+    if (SubRegIdx != 0) {
+      // DefReg is a subreg of LiveReg - update LiveReg's lane mask.
+      const LaneBitmask DefLanes = TRI->getSubRegIndexLaneMask(SubRegIdx);
+      LiveIt->second.second &= ~DefLanes;
+
+      // If no lanes remain live, remove the entry entirely.
+      if (LiveIt->second.second.none()) {
+        LiveRegs.erase(LiveIt);
+      }
+    }
+  }
+
+  // Check if this def, combined with other defs in the merged range,
+  // fully defines a super-register. If so, remove the super-register from
+  // LiveRegs.
+  const MCRegister MergedBaseReg = TargetLR.getBaseReg();
+
+  // Collect all defined sub-registers.
+  DenseSet<MCRegister> AllDefinedRegs;
+  for (const auto &DefInfo : TargetLR.defs()) {
+    const MCRegister DefRegister = DefInfo.getOperand()->getReg().asMCReg();
+    AllDefinedRegs.insert(DefRegister);
+    // Also add all sub-registers of this defined register.
+    for (MCSubRegIterator SubIt(DefRegister, TRI, /*IncludeSelf=*/false);
+         SubIt.isValid(); ++SubIt) {
+      AllDefinedRegs.insert(*SubIt);
+    }
+  }
+
+  // Check if all sub-registers of a register are defined.
+  auto FullyCovered = [&](MCRegister Reg) {
+    for (MCSubRegIterator SubIt(Reg, TRI, /*IncludeSelf=*/false);
+         SubIt.isValid(); ++SubIt) {
+      if (!AllDefinedRegs.count(*SubIt)) {
+        return false;
+      }
+    }
+    return true;
+  };
+
+  // Check BaseReg and its super-registers.
+  SmallVector<MCRegister, 4> RegsToCheck;
+  RegsToCheck.push_back(MergedBaseReg);
+  for (MCSuperRegIterator SuperIt(MergedBaseReg, TRI); SuperIt.isValid();
+       ++SuperIt) {
+    RegsToCheck.push_back(*SuperIt);
+  }
+
+  for (const MCRegister CheckReg : RegsToCheck) {
+    if (FullyCovered(CheckReg)) {
+      LiveRegs.erase(CheckReg);
+      for (MCSuperRegIterator SuperIt(CheckReg, TRI); SuperIt.isValid();
+           ++SuperIt) {
+        LiveRegs.erase(*SuperIt);
+      }
+    }
+  }
+}
+
+DenseSet<MCRegister> RegLiveRangeTracker::collectReservedBaseRegs() const {
+  DenseSet<MCRegister> ReservedRegs;
+  for (const RegLiveRange &LR : LiveRanges) {
+    if (LR.isReserved()) {
+      ReservedRegs.insert(LR.getBaseReg());
+    }
+  }
+  return ReservedRegs;
+}
+
+void RegLiveRangeTracker::computeAvailableFromLiveRanges(
+    const DenseSet<MCRegister> &ReservedRegs) {
+
+  // Lambda to check if a register overlaps with any reserved register.
+  auto OverlapsReserved = [&](MCRegister Reg) {
+    return llvm::any_of(ReservedRegs, [&](MCRegister Reserved) {
+      return TRI->regsOverlap(Reg, Reserved);
+    });
+  };
+
+  // Build AvailablePhysRegs from non-reserved ranges, excluding any
+  // register that overlaps with a reserved register.
+  AvailablePhysRegs.clear();
+  for (const RegLiveRange &LR : LiveRanges) {
+    assert(LR.getRegisterClass() &&
+           "Live range must have a valid register class");
+    assert(LR.getBaseReg() != MCRegister::NoRegister &&
+           "Live range must have a base register");
+    assert(LR.getBaseReg().isPhysical() &&
+           "BaseReg must be a physical register");
+
+    // Skip if this range is reserved.
+    if (LR.isReserved()) {
+      continue;
+    }
+
+    // Skip if base register overlaps with any reserved register.
+    // Sub-registers are contained within the base, so if the base doesn't
+    // overlap with reserved, neither will any sub-register.
+    if (OverlapsReserved(LR.getBaseReg())) {
+      continue;
+    }
+
+    // Add base register and all its sub-registers.
+    AvailablePhysRegs.insert(LR.getBaseReg());
+    for (MCSubRegIterator SubIt(LR.getBaseReg(), TRI, /*IncludeSelf=*/false);
+         SubIt.isValid(); ++SubIt) {
+      AvailablePhysRegs.insert(*SubIt);
+    }
+  }
+}
+
+void RegLiveRangeTracker::deriveSuperRegsFromSubRegs() {
+  // If all sub-registers of a super-register are available, add the
+  // super-register as well. This avoids repeated computation in PostRegAlloc.
+  SmallVector<MCRegister, 32> RegsToCheck(AvailablePhysRegs.begin(),
+                                          AvailablePhysRegs.end());
+  for (MCRegister AvailReg : RegsToCheck) {
+    for (MCSuperRegIterator SuperIt(AvailReg, TRI, /*IncludeSelf=*/false);
+         SuperIt.isValid(); ++SuperIt) {
+      const MCRegister SuperReg = *SuperIt;
+
+      // Skip if already available.
+      if (AvailablePhysRegs.count(SuperReg))
+        continue;
+
+      // Check if all sub-registers of SuperReg are available.
+      bool AllSubregsAvailable = true;
+      unsigned SubregCount = 0;
+      for (MCSubRegIterator SubIt(SuperReg, TRI, /*IncludeSelf=*/false);
+           SubIt.isValid(); ++SubIt) {
+        ++SubregCount;
+        if (!AvailablePhysRegs.count(*SubIt)) {
+          AllSubregsAvailable = false;
+          break;
+        }
+      }
+
+      // If we have at least 2 sub-registers and all are available,
+      // add this super-register.
+      if (AllSubregsAvailable && SubregCount >= 2) {
+        AvailablePhysRegs.insert(SuperReg);
+      }
+    }
+  }
+}
+
+void RegLiveRangeTracker::addUnusedCallerSavedRegs(
+    MachineBasicBlock &MBB, const DenseSet<MCRegister> &ImplicitRegs,
+    const DenseSet<MCRegister> &ReservedRegs) {
+
+  // This feature is controlled by a command-line option because it changes
+  // the available register pool, which can affect register allocation results.
+  if (!AddUnusedCallerSavedRegs)
+    return;
+
+  // Augment AvailablePhysRegs with caller-saved registers that are completely
+  // unused in this block. Since pipelining excludes loops with calls, these
+  // registers are safe to use as additional allocation candidates.
+  //
+  // A caller-saved register is safe to add if:
+  // 1. It is allocatable (not reserved by the target)
+  // 2. It belongs to a register class used by at least one live range
+  // 3. It does not overlap with any register used in the block (explicit ops)
+  // 4. It does not overlap with any register used implicitly
+  // 5. It does not overlap with any live-in register (respecting lane masks)
+  // 6. It does not overlap with any live-out register (respecting lane masks)
+  // 7. It does not overlap with any reserved live range
+
+  // Collect the set of register classes used by live ranges.
+  SmallPtrSet<const TargetRegisterClass *, 8> UsedRegClasses;
+  for (const RegLiveRange &LR : LiveRanges) {
+    if (LR.getRegisterClass()) {
+      UsedRegClasses.insert(LR.getRegisterClass());
+    }
+  }
+
+  // If no live ranges have register classes, nothing to add.
+  if (UsedRegClasses.empty())
+    return;
+
+  const auto *AIERII = static_cast<const AIEBaseRegisterInfo *>(TRI);
+
+  // Get the call-preserved mask. clobbersPhysReg returns true for caller-saved
+  // registers (those NOT preserved across calls).
+  const uint32_t *PreservedMask =
+      AIERII->getCallPreservedMask(*MF, CallingConv::C);
+  const BitVector AllocatableRegs = TRI->getAllocatableSet(*MF);
+
+  // Generic lambda to check if a register overlaps with any register in a
+  // range. Works with any range that yields MCRegister.
+  auto OverlapsAny = [this](MCRegister Reg, auto &&Range) {
+    return llvm::any_of(Range,
+                        [&](MCRegister R) { return TRI->regsOverlap(Reg, R); });
+  };
+
+  // Generic lambda to check if a register overlaps with any RegisterMaskPair
+  // in a range. Works with MBB.liveins() and MBB.liveouts().
+  auto OverlapsAnyRMP = [this](MCRegister Reg, auto &&Range) {
+    return llvm::any_of(Range,
+                        [&](const MachineBasicBlock::RegisterMaskPair &RMP) {
+                          return overlapsRMP(Reg, RMP, TRI);
+                        });
+  };
+
+  // Helper to check if Reg is caller-saved (clobbered by calls).
+  auto IsCallerSaved = [PreservedMask](MCRegister Reg) {
+    return MachineOperand::clobbersPhysReg(PreservedMask, Reg);
+  };
+
+  // Transformer for AllPhysRegOperands to yield MCRegister.
+  auto ToReg = [](const MachineOperand *MO) { return MO->getReg().asMCReg(); };
+
+  // Iterate over allocatable registers and add unused caller-saved ones.
+  unsigned NumUnusedCallerSavedAdded = 0;
+  for (unsigned RegIdx = 0, E = TRI->getNumRegs(); RegIdx < E; ++RegIdx) {
+    const MCRegister Reg = MCRegister::from(RegIdx);
+
+    // Skip if already available.
+    if (AvailablePhysRegs.count(Reg))
+      continue;
+
+    // Must be allocatable.
+    if (!AllocatableRegs.test(RegIdx))
+      continue;
+
+    // Must be caller-saved (clobbered by calls).
+    if (!IsCallerSaved(Reg))
+      continue;
+
+    // Must belong to at least one register class used by live ranges.
+    bool BelongsToUsedClass = llvm::any_of(
+        UsedRegClasses, [Reg](auto *RC) { return RC->contains(Reg); });
+    if (!BelongsToUsedClass)
+      continue;
+
+    // Must not overlap with any explicitly used register in the block.
+    if (OverlapsAny(Reg, llvm::map_range(AllPhysRegOperands, ToReg)))
+      continue;
+
+    // Must not overlap with any implicit register.
+    if (OverlapsAny(Reg, ImplicitRegs))
+      continue;
+
+    // Must not overlap with any live-in register (respecting lane masks).
+    if (OverlapsAnyRMP(Reg, MBB.liveins()))
+      continue;
+
+    // Must not overlap with any live-out register (respecting lane masks).
+    if (OverlapsAnyRMP(Reg, MBB.liveouts()))
+      continue;
+
+    // Must not overlap with any reserved base register.
+    if (OverlapsAny(Reg, ReservedRegs))
+      continue;
+
+    // This register is safe to use as an additional allocation candidate.
+    AvailablePhysRegs.insert(Reg);
+    ++NumUnusedCallerSavedAdded;
+
+    LLVM_DEBUG(dbgs() << "Added unused caller-saved register: "
+                      << TRI->getName(Reg) << "\n");
+  }
+
+  LLVM_DEBUG(dbgs() << "Added " << NumUnusedCallerSavedAdded
+                    << " unused caller-saved registers to available set\n");
+}
+
+void RegLiveRangeTracker::markScarceRanges() {
+  // Mark live ranges as scarce if they have exactly 1 available register.
+  for (RegLiveRange &LR : LiveRanges) {
+    const TargetRegisterClass *RC = LR.getRegisterClass();
+    if (!RC) {
+      continue;
+    }
+
+    unsigned AvailableCount = 0;
+    for (MCPhysReg PhysReg : *RC) {
+      if (AvailablePhysRegs.count(PhysReg)) {
+        ++AvailableCount;
+        if (AvailableCount > 1) {
+          break;
+        }
+      }
+    }
+
+    LR.setIsScarce(AvailableCount == 1);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Analyze helper methods (decomposition of analyze())
+//===----------------------------------------------------------------------===//
+
+void RegLiveRangeTracker::buildInstructionOrderAndCollectOperands(
+    ArrayRef<MachineInstr *> SemanticOrder, LivenessScanState &State) {
+  unsigned InstrIdx = 0;
+  for (MachineInstr *MI : SemanticOrder) {
+    InstrOrder[MI] = InstrIdx++;
+
+    for (MachineOperand &MO : MI->operands()) {
+      if (!MO.isReg() || !MO.getReg().isPhysical()) {
+        continue;
+      }
+      if (MO.isImplicit()) {
+        // Track implicit registers - we won't create live ranges for these
+        // but will use them to invalidate explicit ranges.
+        const MCRegister Reg = MO.getReg().asMCReg();
+
+        // Add all aliases.
+        for (MCRegAliasIterator AI(Reg, TRI, /*IncludeSelf=*/true);
+             AI.isValid(); ++AI) {
+          State.ImplicitRegs.insert(*AI);
+        }
+      } else {
+        AllPhysRegOperands.push_back(&MO);
+      }
+    }
+  }
+}
+
+void RegLiveRangeTracker::initLiveRegsFromLiveOuts(const MachineBasicBlock &MBB,
+                                                   LivenessScanState &State) {
+  // Initialize with live-out registers using NoLiveRange as sentinel and their
+  // lane masks.
+  for (const auto &RMP : MBB.liveouts()) {
+    State.LiveRegs[RMP.PhysReg] = {RegLiveRange::NoLiveRange, RMP.LaneMask};
+  }
+}
+
+unsigned RegLiveRangeTracker::getOrCreateLiveRangeForOperand(
+    MCRegister Reg, MachineOperand *MO, LivenessScanState &State) {
+  bool IsReserved = false;
+
+  // Check if this register or an aliasing register is already live.
+  // We need to find an entry where the lanes actually overlap, not just
+  // the registers.  This is critical for separating live ranges: after
+  // x10 is defined, any y5 (containing x10) should only have x11's lanes
+  // live, and a subsequent x10 access should NOT merge into that y5 range.
+  auto It = llvm::find_if(State.LiveRegs, [Reg, TRI = TRI](const auto &Entry) {
+    if (!TRI->regsOverlap(Reg, Entry.first))
+      return false;
+
+    // Registers overlap - now check if lanes overlap.
+    const MCRegister LiveReg = Entry.first;
+    const LaneBitmask LiveLanes = Entry.second.second;
+
+    // If LiveReg equals Reg, check if any lanes are live.
+    if (LiveReg == Reg)
+      return LiveLanes.any();
+
+    // Check if Reg is a subreg of LiveReg.
+    for (MCSubRegIndexIterator SubIdxIt(LiveReg, TRI); SubIdxIt.isValid();
+         ++SubIdxIt) {
+      if (SubIdxIt.getSubReg() == Reg) {
+        // Reg is a subreg of LiveReg - check if Reg's lanes are live.
+        const LaneBitmask RegLanes =
+            TRI->getSubRegIndexLaneMask(SubIdxIt.getSubRegIndex());
+        return (LiveLanes & RegLanes).any();
+      }
+    }
+
+    // Check if LiveReg is a subreg of Reg.
+    for (MCSubRegIndexIterator SubIdxIt(Reg, TRI); SubIdxIt.isValid();
+         ++SubIdxIt) {
+      if (SubIdxIt.getSubReg() == LiveReg) {
+        // LiveReg is a subreg of Reg - if any lanes of LiveReg are live,
+        // they overlap with Reg.
+        return LiveLanes.any();
+      }
+    }
+
+    // Registers overlap but no subreg relationship - conservatively treat
+    // as overlapping if any lanes are live.
+    return LiveLanes.any();
+  });
+
+  if (It != State.LiveRegs.end()) {
+    const int LRIdx = It->second.first;
+
+    if (LRIdx == RegLiveRange::NoLiveRange) {
+      // Found a live-out register (NoLiveRange sentinel).
+      // Mark the new range as reserved.
+      IsReserved = true;
+    } else {
+      // Found an aliasing live register with an actual live range.
+      assert(LRIdx >= 0 && "LRIdx must be valid");
+      State.OperandToLiveRange[MO] = LRIdx;
+
+      // Update base register for this live range if needed.
+      MCRegister CurrentBase = LiveRanges[LRIdx].getBaseReg();
+      if (CurrentBase == MCRegister::NoRegister) {
+        // No base yet - expand base to include this register.
+        LiveRanges[LRIdx].expandBaseToInclude(Reg, TRI);
+      } else {
+        // Check if we need to update to a larger base register.
+        assert(CurrentBase.isPhysical() && "CurrentBase must be physical");
+        assert(Reg.isPhysical() && "Reg must be physical");
+        if (getSubRegIndex(Reg, CurrentBase) == 0 &&
+            getSubRegIndex(CurrentBase, Reg) != 0) {
+          // Reg is larger than current base - update BaseReg and recompute
+          // SubRegIdx for all existing operands.
+          LiveRanges[LRIdx].expandBaseToInclude(Reg, TRI);
+        }
+      }
+
+      return LRIdx;
+    }
+  }
+
+  // Create a new live range.
+  const unsigned NewLRIdx = LiveRanges.size();
+  LiveRanges.emplace_back(NextLiveRangeID++, Reg, IsReserved);
+  State.LiveRegs[Reg] = {static_cast<int>(NewLRIdx), LaneBitmask::getAll()};
+  State.OperandToLiveRange[MO] = NewLRIdx;
+  return NewLRIdx;
+}
+
+void RegLiveRangeTracker::processDefsInInstruction(MachineInstr &MI,
+                                                   LivenessScanState &State) {
+  for (MachineOperand &MO : MI.defs()) {
+    if (!MO.isReg() || !MO.getReg().isPhysical() || MO.isImplicit())
+      continue;
+
+    const MCRegister Reg = MO.getReg().asMCReg();
+    const unsigned DefLRIdx = getOrCreateLiveRangeForOperand(Reg, &MO, State);
+
+    // Add def to the live range with SubRegIdx relative to base.
+    const MCRegister CurrentBase = LiveRanges[DefLRIdx].getBaseReg();
+    const unsigned SubRegIdx = getSubRegIndex(Reg, CurrentBase);
+    LiveRanges[DefLRIdx].addDef(&MO, SubRegIdx);
+
+    // Merge with any aliasing live ranges.
+    mergeAliasingLiveRanges(DefLRIdx, Reg, State.LiveRegs,
+                            State.OperandToLiveRange);
+  }
+}
+
+void RegLiveRangeTracker::processUsesInInstruction(MachineInstr &MI,
+                                                   LivenessScanState &State) {
+  for (MachineOperand &MO : MI.uses()) {
+    if (!MO.isReg() || !MO.getReg().isPhysical() || MO.isImplicit())
+      continue;
+
+    const MCRegister Reg = MO.getReg().asMCReg();
+    const unsigned LRIdx = getOrCreateLiveRangeForOperand(Reg, &MO, State);
+
+    // Add use to the live range with SubRegIdx relative to base.
+    const MCRegister CurrentBase = LiveRanges[LRIdx].getBaseReg();
+    const unsigned SubRegIdx = getSubRegIndex(Reg, CurrentBase);
+    LiveRanges[LRIdx].addUse(&MO, SubRegIdx);
+  }
+}
+
+void RegLiveRangeTracker::performLivenessScan(
+    ArrayRef<MachineInstr *> SemanticOrder, LivenessScanState &State) {
+  // Process instructions in reverse semantic order (backward pass).
+  for (MachineInstr *MI : llvm::reverse(SemanticOrder)) {
+    // In backward pass: process defs first (they kill liveness), then uses
+    // (they start liveness). This order is critical for read-modify-write
+    // instructions where the same register is both read and written.
+    // The def terminates the current live range, and the use starts a new one.
+    processDefsInInstruction(*MI, State);
+    processUsesInInstruction(*MI, State);
+  }
+}
+
+void RegLiveRangeTracker::applySafetyFiltering(
+    const MachineBasicBlock &MBB, const LivenessScanState &State,
+    const DenseMap<MCRegister, LaneBitmask> &LocalLiveLaneMasks) {
+  LLVM_DEBUG({ dump("CANDIDATE LIVE RANGES\n"); });
+  LLVM_DEBUG(dbgs() << "\nFirst-stage filtering: " << LiveRanges.size()
+                    << " candidate ranges\n");
+
+  SmallVector<RegLiveRange, 16> SafeRanges;
+  for (const RegLiveRange &LR : LiveRanges) {
+    // Skip invalid/cleared ranges from merging.
+    if (LR.getID() < 0)
+      continue;
+
+    // Filter out live ranges whose base register is not fully defined.
+    // This checks that the range doesn't read from live-in values, which
+    // would make it unsafe to virtualize (we'd be changing loop-carried
+    // values). This also implicitly handles use-before-def cases.
+    if (!isFullyDefined(LR, LocalLiveLaneMasks, MBB)) {
+      LLVM_DEBUG({
+        dbgs() << "Reject: base register not fully defined in block: ";
+        LR.dumpBrief(TRI);
+      });
+      continue;
+    }
+
+    // Filter out any live range that uses an implicit register.
+    auto UsesImplicitReg = [&State](const RegOperandInfo &OperInfo) {
+      const MCRegister Reg = OperInfo.getOperand()->getReg().asMCReg();
+      return State.ImplicitRegs.count(Reg) > 0;
+    };
+
+    if (llvm::any_of(LR.operands(), UsesImplicitReg)) {
+      LLVM_DEBUG({
+        dbgs() << "Reject: uses implicit register ";
+        for (const auto &OI : LR.operands()) {
+          MCRegister R = OI.getOperand()->getReg().asMCReg();
+          if (State.ImplicitRegs.count(R)) {
+            dbgs() << TRI->getName(R) << " ";
+            break;
+          }
+        }
+        dbgs() << ": ";
+        LR.dumpBrief(TRI);
+      });
+      continue;
+    }
+
+    // Reject tied operands.
+    if (hasTiedOperands(LR)) {
+      LLVM_DEBUG({
+        dbgs() << "Reject: has tied operands: ";
+        LR.dumpBrief(TRI);
+      });
+      continue;
+    }
+
+    // Note: We don't check killedBeforeEndOfBlock because:
+    // 1. Live-out is already filtered by isCarriedByLiveInOut check
+    // 2. We want to allow def-only ranges (garbage bin registers)
+
+    LLVM_DEBUG({
+      dbgs() << "Keep: ";
+      LR.dumpBrief(TRI);
+    });
+    SafeRanges.push_back(LR);
+  }
+
+  LLVM_DEBUG(dbgs() << "After first-stage: " << SafeRanges.size()
+                    << " safe ranges\n");
+
+  LiveRanges = std::move(SafeRanges);
+}
+
+void RegLiveRangeTracker::computeRegisterClassesAndFilter() {
+  LLVM_DEBUG(dbgs() << "\nRegister class computation and filtering\n");
+
+  SmallVector<RegLiveRange, 16> ValidRanges;
+  for (RegLiveRange &LR : LiveRanges) {
+    computeRegisterClass(LR);
+
+    // Filter out ranges with no valid register class.
+    if (!LR.getRegisterClass()) {
+      LLVM_DEBUG({
+        dbgs() << "Reject: no valid register class: ";
+        LR.dumpBrief(TRI);
+      });
+      continue;
+    }
+
+    // Apply register class filtering if specified.
+    if (!ExcludeLiveRangesByRegClass.empty() &&
+        StringRef(TRI->getRegClassName(LR.getRegisterClass())) ==
+            ExcludeLiveRangesByRegClass) {
+      LLVM_DEBUG({
+        dbgs() << "Reject: excluded register class "
+               << TRI->getRegClassName(LR.getRegisterClass()) << ": ";
+        LR.dumpBrief(TRI);
+      });
+      continue;
+    }
+
+    ValidRanges.push_back(std::move(LR));
+  }
+  LiveRanges = std::move(ValidRanges);
+
+  LLVM_DEBUG(dbgs() << "After register class filtering: " << LiveRanges.size()
+                    << " ranges\n");
+}
+
+void RegLiveRangeTracker::finalizeAvailabilityAndScarcity(
+    MachineBasicBlock &MBB, const LivenessScanState &State) {
+  // Second-stage full coverage pruning.
+  // This happens AFTER register class filtering.
+  pruneByFullCoverage();
+
+  // Compute and cache available physical registers.
+  const DenseSet<MCRegister> ReservedRegs = collectReservedBaseRegs();
+  computeAvailableFromLiveRanges(ReservedRegs);
+  deriveSuperRegsFromSubRegs();
+
+  addUnusedCallerSavedRegs(MBB, State.ImplicitRegs, ReservedRegs);
+  markScarceRanges();
+
+  // Compute and cache the most promising scarce range set.
+  MostPromisingScarceRanges = findMostPromisingScarceRanges(AvailablePhysRegs);
+}
+
+void RegLiveRangeTracker::analyze(MachineBasicBlock &MBB,
+                                  ArrayRef<MachineInstr *> SemanticOrder) {
+  assert(!SemanticOrder.empty() && "SemanticOrder must be provided - MBB order "
+                                   "is unreliable after scheduling");
+  clear();
+
+  // Initialize state for liveness scan.
+  LivenessScanState State;
+
+  // Build instruction order map and collect operands.
+  buildInstructionOrderAndCollectOperands(SemanticOrder, State);
+
+  // Initialize live registers from live-outs.
+  initLiveRegsFromLiveOuts(MBB, State);
+
+  // Perform the liveness scan to build live ranges.
+  performLivenessScan(SemanticOrder, State);
+
+  // Extract lane masks from LiveRegs for the isFullyDefined check.
+  DenseMap<MCRegister, LaneBitmask> LocalLiveLaneMasks;
+  for (const auto &[Reg, Info] : State.LiveRegs) {
+    LocalLiveLaneMasks[Reg] = Info.second;
+  }
+
+  // Apply first-stage safety filtering.
+  applySafetyFiltering(MBB, State, LocalLiveLaneMasks);
+
+  // Compute register classes and apply filtering.
+  computeRegisterClassesAndFilter();
+
+  // Finalize availability and scarcity.
+  finalizeAvailabilityAndScarcity(MBB, State);
+}
+
+void RegLiveRange::setRegisterClass(const TargetRegisterClass *RC) {
+  RegisterClass = RC;
+
+  // Populate AdmissibleRegs from RegisterClass.
+  // This is initially equivalent to the RC membership, but can be further
+  // constrained later by per-LR requirements (e.g., bypass constraints).
+  AdmissibleRegs.clear();
+  if (RC) {
+    for (MCPhysReg Reg : *RC) {
+      AdmissibleRegs.insert(Reg);
+    }
+  }
+}
+
+void RegLiveRangeTracker::computeRegisterClass(RegLiveRange &LR) const {
+  if (LR.getBaseReg() == MCRegister::NoRegister)
+    return;
+
+  // Start with nullptr, representing the universe of all register classes.
+  // Intersection with nullptr is identity: intersect(nullptr, X) = X
+  const TargetRegisterClass *CommonRC = nullptr;
+
+  // Process all operands (defs and uses) to compute register class constraints
+  for (const auto &OpInfo : LR.operands()) {
+    MachineInstr *MI = OpInfo.getOperand()->getParent();
+    const unsigned OpIdx = OpInfo.getOperand()->getOperandNo();
+
+    // Get the register class constraint for this operand
+    const TargetRegisterClass *OpRC =
+        MI->getRegClassConstraint(OpIdx, TII, TRI);
+
+    if (OpRC) {
+      // Account for subregister access
+      if (OpInfo.getSubRegIdx() != 0) {
+        // Get the class that can be used with this subreg index
+        OpRC = TRI->getSubClassWithSubReg(OpRC, OpInfo.getSubRegIdx());
+      }
+
+      if (OpRC) {
+        // Intersect: nullptr is identity, otherwise find common subclass
+        if (!CommonRC) {
+          CommonRC = OpRC;
+        } else {
+          CommonRC = TRI->getCommonSubClass(CommonRC, OpRC);
+          if (!CommonRC) {
+            // No common class possible - this live range is illegal.
+            LR.setRegisterClass(nullptr);
+            return;
+          }
+        }
+      }
+    }
+  }
+
+  // If no operand constraints were found, fall back to minimal class.
+  if (!CommonRC) {
+    CommonRC = TRI->getMinimalPhysRegClass(LR.getBaseReg());
+    assert(CommonRC && "Physical register must have a register class");
+  }
+
+  LR.setRegisterClass(CommonRC);
+}
+
+void RegLiveRangeTracker::virtualizeFilteredPhysRegs(OverlapPolicy Policy) {
+  assert(!RegistersVirtualized && "Registers are already virtualized");
+
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Clear the NoVRegs property.
+  MF->getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
+
+  // Build the set of RESERVED base registers.
+  DenseSet<MCRegister> ReservedBases;
+  for (const RegLiveRange &LR : LiveRanges) {
+    if (LR.isReserved()) {
+      ReservedBases.insert(LR.getBaseReg());
+    }
+  }
+
+  // Create and rewrite virtual registers. Live ranges are created in reverse,
+  // so we run this loop in reverse order to make the dumps more intuitive.
+  for (RegLiveRange &LR : reverse(LiveRanges)) {
+    // The analysis should have filtered out any live ranges without a valid
+    // register class.
+    assert(LR.getRegisterClass() &&
+           "Live range must have a valid register class");
+
+    // The analysis should have assigned a base register to every live range.
+    assert(LR.getBaseReg() != MCRegister::NoRegister &&
+           "Live range must have a base register");
+
+    // Never virtualize RESERVED ranges themselves.
+    if (LR.isReserved()) {
+      continue;
+    }
+
+    // Apply the overlap policy.
+    if (Policy == OverlapPolicy::DisallowOverlapWithReservedBase) {
+      // Check if this LR's base register overlaps any RESERVED base.
+      bool OverlapsReserved = false;
+      for (MCRegister ReservedBase : ReservedBases) {
+        if (TRI->regsOverlap(LR.getBaseReg(), ReservedBase)) {
+          OverlapsReserved = true;
+          break;
+        }
+      }
+      if (OverlapsReserved) {
+        // Skip virtualization for this range.
+        continue;
+      }
+    }
+    // If Policy == AllowOverlapWithReservedBase, we proceed to virtualize.
+
+    // Create a virtual register for this live range.
+    const Register VReg = MRI.createVirtualRegister(LR.getRegisterClass());
+
+    // Store the VReg in the LiveRange for later mapping.
+    LR.setVReg(VReg);
+
+    // Replace all operands in this live range with the virtual register.
+    const auto RewriteOperand = [VReg](const RegOperandInfo &Info) {
+      MachineOperand *MO = Info.getOperand();
+      MO->setReg(VReg);
+      MO->setSubReg(Info.getSubRegIdx());
+    };
+
+    // Rewrite all operands.
+    for (const auto &OpInfo : LR.operands()) {
+      RewriteOperand(OpInfo);
+    }
+  }
+
+  // Mark as virtualized even if no live ranges were virtualized.
+  RegistersVirtualized = true;
+}
+
+void RegLiveRangeTracker::rewriteToPhysRegs(
+    const DenseMap<Register, MCRegister> &VRegToPhysMap) {
+  assert(RegistersVirtualized && "Registers are not virtualized");
+
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  for (const RegLiveRange &LR : LiveRanges) {
+    const Register VReg = LR.getVReg();
+
+    // Skip live ranges that were not virtualized (partial virtualization).
+    if (!VReg.isValid()) {
+      continue;
+    }
+
+    // Look up the physical register for this virtual register.
+    auto It = VRegToPhysMap.find(VReg);
+    assert(It != VRegToPhysMap.end() &&
+           "VReg must have a mapping in VRegToPhysMap");
+
+    const MCRegister PhysReg = It->second;
+
+    // Rewrite all operands in this live range to the physical register.
+    for (const auto &OpInfo : LR.operands()) {
+      MachineOperand *MO = OpInfo.getOperand();
+      if (MO->getReg() == VReg) {
+        // Compute the actual physical register considering subregs.
+        Register FinalReg = PhysReg;
+        if (OpInfo.getSubRegIdx() != 0) {
+          FinalReg = TRI->getSubReg(PhysReg, OpInfo.getSubRegIdx());
+          assert(FinalReg && "Invalid subregister index for physical register");
+        }
+        MO->setReg(FinalReg);
+        MO->setSubReg(0);
+      }
+    }
+  }
+
+  // Clear virtual registers from MRI and restore NoVRegs property.
+  MRI.clearVirtRegs();
+  MF->getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+
+  // Mark as no longer virtualized.
+  RegistersVirtualized = false;
+
+  LLVM_DEBUG(dbgs() << "Rewritten virtual registers to physical registers\n");
+}
+
+void RegLiveRangeTracker::restoreOriginalPhysRegs() {
+  // Build the mapping from VRegs to their original PhysRegs
+  DenseMap<Register, MCRegister> VRegToPhysMap;
+  for (const RegLiveRange &LR : LiveRanges) {
+    if (LR.getVReg().isValid()) {
+      VRegToPhysMap[LR.getVReg()] = LR.getBaseReg();
+    }
+  }
+
+  // Use the general rewrite method
+  rewriteToPhysRegs(VRegToPhysMap);
+  LLVM_DEBUG(dbgs() << "Restored original physical registers\n");
+}
+
+bool RegLiveRangeTracker::areRegistersVirtualized() const {
+  return RegistersVirtualized;
+}
+
+void RegLiveRangeTracker::filterByRegisterAvailability() {
+  // Lambda to check if a live range has only one choice of physical register.
+  auto HasNoChoice = [&](const RegLiveRange &LR) -> bool {
+    // By this point, all live ranges should have a register class.
+    assert(LR.getRegisterClass() && "Live range must have a register class");
+
+    // Count how many physical registers from this register class are available.
+    unsigned AvailableCount = 0;
+    for (MCPhysReg PhysReg : *LR.getRegisterClass()) {
+      if (AvailablePhysRegs.count(PhysReg)) {
+        AvailableCount++;
+        // If we find at least 2, this live range has choices.
+        if (AvailableCount > 1) {
+          return false;
+        }
+      }
+    }
+
+    // Has no choice if 0 or 1 available registers.
+    return true;
+  };
+
+  // Build a new list of live ranges, excluding those with no choice.
+  SmallVector<RegLiveRange, 16> FilteredLiveRanges;
+
+  for (const RegLiveRange &LR : LiveRanges) {
+    // Skip live ranges that have no choice of physical register.
+    if (HasNoChoice(LR)) {
+      LLVM_DEBUG(dbgs() << "Filtering out live range for "
+                        << TRI->getName(LR.getBaseReg())
+                        << " - no alternative physical registers\n");
+      continue;
+    }
+
+    // This live range has choices, keep it.
+    FilteredLiveRanges.push_back(LR);
+  }
+
+  // Replace the live ranges with the filtered set.
+  LiveRanges = std::move(FilteredLiveRanges);
+
+  LLVM_DEBUG(dbgs() << "Register availability filtering complete: "
+                    << LiveRanges.size() << " live ranges remaining\n");
+}
+
+void RegLiveRangeTracker::clear() {
+  // Clear all containers.
+  LiveRanges.clear();
+  AllPhysRegOperands.clear();
+  InstrOrder.clear();
+
+  // Reset the virtualization flag.
+  RegistersVirtualized = false;
+
+  // Reset the ID counter.
+  NextLiveRangeID = 0;
+
+  // Note: MF, TRI, and TII are not cleared as they are set in the constructor
+  // and represent the context in which this tracker operates.
+}
+
+void RegLiveRangeTracker::dump(const char *Header) const {
+  if (Header) {
+    dbgs() << Header;
+  }
+  dbgs() << "================================\n";
+  dbgs() << "Total live ranges: " << LiveRanges.size() << "\n\n";
+
+  // Create a sorted index array to ensure deterministic output
+  SmallVector<size_t, 16> SortedIndices;
+  for (size_t LRIdx = 0; LRIdx < LiveRanges.size(); ++LRIdx) {
+    SortedIndices.push_back(LRIdx);
+  }
+
+  // Sort by base register ID first, then by first def instruction pointer
+  // This ensures a stable, deterministic order
+  llvm::sort(SortedIndices, [this](size_t A, size_t B) {
+    const RegLiveRange &LRA = LiveRanges[A];
+    const RegLiveRange &LRB = LiveRanges[B];
+
+    // First sort by base register ID
+    if (LRA.getBaseReg() != LRB.getBaseReg()) {
+      return LRA.getBaseReg() < LRB.getBaseReg();
+    }
+
+    // Then by first def instruction address (if any)
+    if (!LRA.defs().empty() && !LRB.defs().empty()) {
+      const MachineInstr *MIA = LRA.defs().begin()->getOperand()->getParent();
+      const MachineInstr *MIB = LRB.defs().begin()->getOperand()->getParent();
+      if (MIA != MIB) {
+        // Use instruction order if available
+        auto ItA = InstrOrder.find(MIA);
+        auto ItB = InstrOrder.find(MIB);
+        if (ItA != InstrOrder.end() && ItB != InstrOrder.end()) {
+          return ItA->second < ItB->second;
+        }
+      }
+    }
+
+    // Finally by original index for stability
+    return A < B;
+  });
+
+  for (size_t SortedIdx = 0; SortedIdx < SortedIndices.size(); ++SortedIdx) {
+    const size_t LRIdx = SortedIndices[SortedIdx];
+    const RegLiveRange &LR = LiveRanges[LRIdx];
+
+    // Skip invalid/cleared ranges
+    if (LR.getID() < 0)
+      continue;
+
+    // Use the stored base register
+    const MCRegister BaseReg = LR.getBaseReg();
+    StringRef PrimaryReg = "unknown";
+    if (BaseReg != MCRegister::NoRegister) {
+      PrimaryReg = TRI->getName(BaseReg);
+    }
+
+    dbgs() << "Live Range #" << LR.getID() << " for " << PrimaryReg;
+    if (LR.isReserved()) {
+      dbgs() << " [RESERVED]";
+    }
+    dbgs() << ":\n";
+
+    dbgs() << "  Definitions (" << LR.getNumDefs() << "):\n";
+    size_t DefIdx = 0;
+    for (const RegOperandInfo &DefInfo : LR.defs()) {
+      dbgs() << "    [" << DefIdx++ << "] ";
+      Register Reg = DefInfo.getOperand()->getReg();
+      if (Reg.isPhysical()) {
+        dbgs() << "Register: " << TRI->getName(Reg);
+      } else {
+        dbgs() << "Register: %vreg" << Reg.virtRegIndex();
+      }
+      if (DefInfo.getSubRegIdx() != 0) {
+        dbgs() << " (SubRegIdx: " << DefInfo.getSubRegIdx() << ")";
+      }
+      dbgs() << " ";
+      if (MachineInstr *DefInstr = DefInfo.getOperand()->getParent()) {
+        dbgs() << AIE::NoDebug(*DefInstr) << "\n";
+      } else {
+        dbgs() << "<orphaned operand>\n";
+      }
+    }
+
+    dbgs() << "  Uses (" << LR.getNumUses() << "):\n";
+    size_t UseIdx = 0;
+    for (const RegOperandInfo &UseInfo : LR.uses()) {
+      dbgs() << "    [" << UseIdx++ << "] ";
+      Register Reg = UseInfo.getOperand()->getReg();
+      if (Reg.isPhysical()) {
+        dbgs() << "Register: " << TRI->getName(Reg);
+      } else {
+        dbgs() << "Register: %vreg" << Reg.virtRegIndex();
+      }
+      if (UseInfo.getSubRegIdx() != 0) {
+        dbgs() << " (SubRegIdx: " << UseInfo.getSubRegIdx() << ")";
+      }
+      dbgs() << " ";
+      if (MachineInstr *UseInstr = UseInfo.getOperand()->getParent()) {
+        dbgs() << AIE::NoDebug(*UseInstr) << "\n";
+      } else {
+        dbgs() << "<orphaned operand>\n";
+      }
+    }
+    dbgs() << "\n";
+  }
+
+  // Dump available physical registers if live ranges exist.
+  if (!LiveRanges.empty()) {
+    DenseSet<MCRegister> AvailablePhysRegs = getAvailablePhysRegs();
+    dbgs() << "Available Physical Registers for Reallocation:\n";
+    dbgs() << "==============================================\n";
+    SmallVector<MCRegister, 32> SortedRegs(AvailablePhysRegs.begin(),
+                                           AvailablePhysRegs.end());
+    llvm::sort(SortedRegs);
+    for (MCRegister Reg : SortedRegs) {
+      // MCRegister should always be physical, but check to be safe.
+      if (Reg.isPhysical()) {
+        dbgs() << "  " << TRI->getName(Reg) << "\n";
+      }
+    }
+    dbgs() << "Total: " << AvailablePhysRegs.size() << " registers\n\n";
+  }
+
+  // Emit end marker if header was provided
+  if (Header) {
+    dbgs() << "=== END " << Header;
+  }
+}
+
+std::vector<const RegLiveRange *>
+RegLiveRangeTracker::findMostPromisingScarceRanges(
+    const DenseSet<MCRegister> &AvailablePhysRegs) const {
+
+  // Group live ranges by base register (not register class).
+  // This ensures we only get ranges for the same physical register.
+  DenseMap<MCRegister, std::vector<const RegLiveRange *>> RangesByBaseReg;
+
+  for (const auto &LR : LiveRanges) {
+    // Only consider ranges that are marked as scarce.
+    if (!LR.isScarce()) {
+      continue;
+    }
+
+    const MCRegister BaseReg = LR.getBaseReg();
+    assert(BaseReg != MCRegister::NoRegister &&
+           "LiveRange must have a BaseReg after analysis");
+
+    RangesByBaseReg[BaseReg].push_back(&LR);
+  }
+
+  // Helper to check if a set of ranges has overlapping instructions.
+  auto HasOverlap = [](const std::vector<const RegLiveRange *> &Ranges) {
+    DenseSet<const MachineInstr *> SeenInstrs;
+    for (const RegLiveRange *LR : Ranges) {
+      for (const auto &Info : LR->operands()) {
+        if (!SeenInstrs.insert(Info.getOperand()->getParent()).second) {
+          return true;
+        }
+      }
+    }
+    return false;
+  };
+
+  // Find the largest non-overlapping set with actual competition.
+  std::vector<const RegLiveRange *> LargestSet;
+  for (const auto &Entry : RangesByBaseReg) {
+    const auto &Ranges = Entry.second;
+
+    if (Ranges.size() > 1 && !HasOverlap(Ranges) &&
+        Ranges.size() > LargestSet.size()) {
+      LargestSet = Ranges;
+    }
+  }
+
+  return LargestSet;
+}
diff --git a/llvm/lib/Target/AIE/AIERegDefUseTracker.h b/llvm/lib/Target/AIE/AIERegDefUseTracker.h
new file mode 100644
index 000000000000..459c34fcb18d
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIERegDefUseTracker.h
@@ -0,0 +1,427 @@
+//===- AIERegDefUseTracker.h - Track Register Live Ranges ----------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declarations for tracking and analyzing register live
+// ranges in a MachineBasicBlock. The tracker performs the following:
+// - Identifies register definitions and uses that form live ranges
+// - Merges aliasing register accesses into unified live ranges
+// - Filters out unsafe ranges (tied operands, live-in/out, implicit uses)
+// - Computes appropriate register classes for each live range
+// - Optionally replaces physical registers with virtual registers for testing
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AIE_AIEREGDEFUSETRACKER_H
+#define LLVM_LIB_TARGET_AIE_AIEREGDEFUSETRACKER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/MC/MCRegister.h"
+
+namespace llvm {
+
+struct AIEBaseInstrInfo;
+struct LaneBitmask;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+class TargetRegisterClass;
+
+/// Represents a register operand with its sub-register index
+class RegOperandInfo {
+  MachineOperand *Operand;
+  unsigned SubRegIdx;
+
+public:
+  RegOperandInfo(MachineOperand *Op, unsigned SubIdx = 0)
+      : Operand(Op), SubRegIdx(SubIdx) {}
+
+  MachineOperand *getOperand() const { return Operand; }
+  unsigned getSubRegIdx() const { return SubRegIdx; }
+};
+
+/// Structure representing a live range for a register
+/// A live range can have multiple definitions (e.g., when different
+/// sub-registers are defined separately) and multiple uses
+class RegLiveRange {
+public:
+  // Sentinel value for live-out registers not yet associated with a live range
+  static constexpr int NoLiveRange = -1;
+
+private:
+  // All definitions that contribute to this live range
+  SmallVector<RegOperandInfo, 4> Defs;
+
+  // All uses of this live range
+  SmallVector<RegOperandInfo, 4> Uses;
+
+  // Base register for this live range (largest register that covers all
+  // operands)
+  MCRegister BaseReg = MCRegister::NoRegister;
+
+  // Register class that satisfies all constraints for this live range.
+  const TargetRegisterClass *RegisterClass = nullptr;
+
+  // Explicit set of admissible physical registers for this live range.
+  // This represents the semantic constraint: which registers can be used
+  // based on instruction encoding. Initially populated from RegisterClass,
+  // but can be further constrained by per-LR requirements (e.g., bypass).
+  // Note: this is separate from availability - PostRegAlloc intersects this
+  // with the global available registers set to get candidates.
+  DenseSet<MCRegister> AdmissibleRegs;
+
+  // Virtual register assigned to this live range (if virtualized)
+  Register VReg;
+
+  // Whether this live range is scarce (has exactly 1 available register)
+  bool IsScarce = false;
+
+  // Whether this live range is reserved (virtualizable but register reserved).
+  // This is used for disjoint live ranges that share a physical register with
+  // subsequent full defs. The range can be virtualized to allow pipelining,
+  // but its physical register must remain reserved for the subsequent def.
+  bool IsReserved = false;
+
+  // Unique ID for this live range (for debugging/tracking)
+  // Use -1 as sentinel for invalid/cleared ranges
+  int ID = -1;
+
+public:
+  RegLiveRange() = default;
+
+  /// Construct a live range with the given ID, base register, and reserved
+  /// status. This is the primary constructor used when creating new ranges.
+  RegLiveRange(int ID, MCRegister BaseReg, bool IsReserved = false)
+      : BaseReg(BaseReg), IsReserved(IsReserved), ID(ID) {}
+
+  void addDef(MachineOperand *DefOp, unsigned SubRegIdx);
+  void addUse(MachineOperand *UseOp, unsigned SubRegIdx);
+
+  /// Get the number of definitions
+  size_t getNumDefs() const { return Defs.size(); }
+
+  /// Get the number of uses
+  size_t getNumUses() const { return Uses.size(); }
+
+  /// Iterator access to definitions
+  auto defs() const { return llvm::make_range(Defs.begin(), Defs.end()); }
+
+  /// Iterator access to uses
+  auto uses() const { return llvm::make_range(Uses.begin(), Uses.end()); }
+
+  /// Iterator across all defs and uses.
+  auto operands() const {
+    return llvm::concat<const RegOperandInfo>(Uses, Defs);
+  }
+
+  /// Get the base register for this live range.
+  MCRegister getBaseReg() const { return BaseReg; }
+
+  /// Get the register class for this live range.
+  const TargetRegisterClass *getRegisterClass() const { return RegisterClass; }
+
+  /// Get the admissible physical registers for this live range.
+  const DenseSet<MCRegister> &getAdmissibleRegs() const {
+    return AdmissibleRegs;
+  }
+
+  /// Check if a register is admissible for this live range.
+  bool isAdmissible(MCRegister Reg) const {
+    return AdmissibleRegs.contains(Reg);
+  }
+
+  /// Get the number of admissible registers.
+  size_t getNumAdmissibleRegs() const { return AdmissibleRegs.size(); }
+
+  /// Get the virtual register assigned to this live range
+  Register getVReg() const { return VReg; }
+
+  /// Set the virtual register for this live range
+  void setVReg(Register R) { VReg = R; }
+
+  /// Check if this live range is scarce (has exactly 1 available register)
+  bool isScarce() const { return IsScarce; }
+
+  /// Set whether this live range is scarce
+  void setIsScarce(bool Scarce) { IsScarce = Scarce; }
+
+  /// Check if this live range is reserved (virtualizable but register reserved)
+  bool isReserved() const { return IsReserved; }
+
+  /// Set whether this live range is reserved
+  void setIsReserved(bool Reserved) { IsReserved = Reserved; }
+
+  /// Get the unique ID for this live range.
+  int getID() const { return ID; }
+
+  /// Set the register class and populate AdmissibleRegs.
+  /// AdmissibleRegs is initially populated from the register class membership.
+  void setRegisterClass(const TargetRegisterClass *RC);
+
+  /// Merge another live range into this one.
+  /// Copies all defs and uses from Other into this range.
+  /// Updates BaseReg to the smallest register that contains all operands from
+  /// both ranges. This handles sibling registers (e.g., cml4 and cmh4) by
+  /// finding their common super-register (dm4).
+  /// Other is NOT cleared after the merge (caller must do that if needed).
+  /// @param Other The live range to merge from.
+  /// @param TRI Target register info for computing sub-register indices.
+  void mergeFrom(const RegLiveRange &Other, const TargetRegisterInfo *TRI);
+
+  /// Expand the base register to include an external register.
+  /// This is used for registers that affect the live range's base (e.g.,
+  /// live-out sentinels) but don't have corresponding operands.
+  /// If ExtReg is larger than BaseReg, or if they are siblings requiring
+  /// a common super-register, BaseReg is updated accordingly.
+  /// Existing operands have their SubRegIdx values recomputed.
+  /// @param ExtReg The external register to include.
+  /// @param TRI Target register info for computing sub-register indices.
+  void expandBaseToInclude(MCRegister ExtReg, const TargetRegisterInfo *TRI);
+
+  /// Clear all state, making this an invalid/empty range.
+  void clear();
+
+  /// Check if this live range is empty/invalid.
+  bool isEmpty() const { return ID < 0; }
+
+  /// Dump a brief summary of this live range for debugging.
+  void dumpBrief(const TargetRegisterInfo *TRI) const;
+};
+
+/// Tracker for register live ranges in a MachineBasicBlock
+class RegLiveRangeTracker {
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+  const AIEBaseInstrInfo *TII;
+
+  // List of all live ranges found in the block
+  SmallVector<RegLiveRange, 16> LiveRanges;
+
+  // All physical register operands in the block
+  SmallVector<MachineOperand *, 32> AllPhysRegOperands;
+
+  // Instruction order mapping for determining earliest operand
+  DenseMap<const MachineInstr *, unsigned> InstrOrder;
+
+  // Track whether registers have been virtualized
+  mutable bool RegistersVirtualized = false;
+
+  // Cached available physical registers (computed during analyze)
+  DenseSet<MCRegister> AvailablePhysRegs;
+
+  // Cached most promising scarce range set (computed during analyze)
+  std::vector<const RegLiveRange *> MostPromisingScarceRanges;
+
+  // Counter for assigning unique IDs to live ranges
+  int NextLiveRangeID = 0;
+
+  /// Get the sub-register index if AccessReg is a sub-register of BaseReg
+  /// Returns 0 if AccessReg is not a sub-register of BaseReg
+  unsigned getSubRegIndex(MCRegister AccessReg, MCRegister BaseReg) const;
+
+  /// Check if a register overlaps with any register in a set
+  bool overlapsAnyInSet(MCRegister Reg,
+                        const DenseSet<MCRegister> &RegSet) const;
+
+  /// Compute the register class for a live range based on all its operands
+  void computeRegisterClass(RegLiveRange &LR) const;
+
+  /// First-stage safety filtering.
+  bool hasTiedOperands(const RegLiveRange &LR) const;
+
+  /// Check if a live range's base register is fully defined in the block.
+  /// Uses lane mask intersection with the block's live-in set to determine
+  /// if the register is truly defined within the block or comes from outside.
+  /// This can discriminate between a truly undefined register (not in live-in,
+  /// safe to virtualize) and a register defined outside the loop (in live-in,
+  /// should be rejected to preserve loop-carried values).
+  bool
+  isFullyDefined(const RegLiveRange &LR,
+                 const DenseMap<MCRegister, LaneBitmask> &LocalLiveLaneMasks,
+                 const MachineBasicBlock &MBB) const;
+
+  /// Second-stage full coverage pruning
+  void pruneByFullCoverage();
+
+  /// Merge aliasing live ranges when a definition is encountered.
+  void mergeAliasingLiveRanges(
+      unsigned DefLRIdx, MCRegister DefReg,
+      DenseMap<MCRegister, std::pair<int, LaneBitmask>> &LiveRegs,
+      DenseMap<MachineOperand *, unsigned> &OperandToLiveRange);
+
+  /// Helper to find the most promising scarce range set.
+  /// Called by analyze() to populate MostPromisingScarceRanges.
+  std::vector<const RegLiveRange *> findMostPromisingScarceRanges(
+      const DenseSet<MCRegister> &AvailablePhysRegs) const;
+
+  /// Collect base registers from RESERVED live ranges.
+  DenseSet<MCRegister> collectReservedBaseRegs() const;
+
+  /// Populate AvailablePhysRegs from non-reserved live ranges.
+  /// Adds base registers and sub-registers that don't overlap with reserved.
+  void computeAvailableFromLiveRanges(const DenseSet<MCRegister> &ReservedRegs);
+
+  /// Extend AvailablePhysRegs with super-registers whose sub-regs are all
+  /// available.
+  void deriveSuperRegsFromSubRegs();
+
+  /// Add caller-saved registers that are completely unused in the block.
+  /// Uses AllPhysRegOperands member for used registers, and iterates
+  /// MBB.liveins() and MBB.liveouts() directly (with lane mask support).
+  /// @param MBB The machine basic block (for live-in/out iteration).
+  /// @param ImplicitRegs Registers used implicitly.
+  /// @param ReservedRegs Reserved base registers.
+  void addUnusedCallerSavedRegs(MachineBasicBlock &MBB,
+                                const DenseSet<MCRegister> &ImplicitRegs,
+                                const DenseSet<MCRegister> &ReservedRegs);
+
+  /// Mark live ranges as scarce if they have exactly 1 available register.
+  void markScarceRanges();
+
+  //===--------------------------------------------------------------------===//
+  // Analyze helper methods (decomposition of analyze())
+  //===--------------------------------------------------------------------===//
+
+  /// State passed through the liveness scan.
+  /// Groups the mutable state that is threaded through the backward scan.
+  struct LivenessScanState {
+    /// Map from register to its current live range index (signed) and lane
+    /// mask. Use NoLiveRange as sentinel for live-out registers not yet
+    /// associated with a range.
+    DenseMap<MCRegister, std::pair<int, LaneBitmask>> LiveRegs;
+
+    /// Map from operand to live range index.
+    DenseMap<MachineOperand *, unsigned> OperandToLiveRange;
+
+    /// Set of registers used implicitly (invalidates explicit ranges).
+    DenseSet<MCRegister> ImplicitRegs;
+  };
+
+  /// Build instruction order map and collect physical register operands.
+  /// Also populates ImplicitRegs.
+  void buildInstructionOrderAndCollectOperands(
+      ArrayRef<MachineInstr *> SemanticOrder, LivenessScanState &State);
+
+  /// Initialize LiveRegs from live-out registers.
+  void initLiveRegsFromLiveOuts(const MachineBasicBlock &MBB,
+                                LivenessScanState &State);
+
+  /// Get or create a live range for a register operand.
+  /// Returns the live range index.
+  unsigned getOrCreateLiveRangeForOperand(MCRegister Reg, MachineOperand *MO,
+                                          LivenessScanState &State);
+
+  /// Process def operands for a single instruction (reverse pass).
+  void processDefsInInstruction(MachineInstr &MI, LivenessScanState &State);
+
+  /// Process use operands for a single instruction (reverse pass).
+  void processUsesInInstruction(MachineInstr &MI, LivenessScanState &State);
+
+  /// Perform the liveness scan over all instructions.
+  void performLivenessScan(ArrayRef<MachineInstr *> SemanticOrder,
+                           LivenessScanState &State);
+
+  /// Apply first-stage safety filtering to live ranges.
+  /// Returns the lane masks collected during analysis for isFullyDefined.
+  void applySafetyFiltering(
+      const MachineBasicBlock &MBB, const LivenessScanState &State,
+      const DenseMap<MCRegister, LaneBitmask> &LocalLiveLaneMasks);
+
+  /// Compute register classes and apply register class filtering.
+  void computeRegisterClassesAndFilter();
+
+  /// Finalize available registers and scarcity after all filtering.
+  void finalizeAvailabilityAndScarcity(MachineBasicBlock &MBB,
+                                       const LivenessScanState &State);
+
+public:
+  RegLiveRangeTracker(MachineBasicBlock &MBB);
+
+  /// Process a MachineBasicBlock to find all register live ranges
+  /// @param MBB The machine basic block to analyze
+  /// @param SemanticOrder The semantic instruction order (required - must be
+  ///                      non-empty)
+  void analyze(MachineBasicBlock &MBB, ArrayRef<MachineInstr *> SemanticOrder);
+
+  /// Get all live ranges
+  ArrayRef<RegLiveRange> getLiveRanges() const { return LiveRanges; }
+
+  /// Dump the live range information for debugging
+  /// @param Header Optional header string to print before the dump
+  void dump(const char *Header = nullptr) const;
+
+  /// Overlap policy for virtualization with respect to RESERVED ranges.
+  enum class OverlapPolicy {
+    /// Do not virtualize any range that overlaps a RESERVED base register.
+    /// This is the safe default that prevents regressions.
+    DisallowOverlapWithReservedBase,
+    /// Allow virtualizing ranges that overlap RESERVED bases.
+    /// This enables the RESERVED semantics for disjoint ranges sharing a base.
+    AllowOverlapWithReservedBase
+  };
+
+  /// Replace filtered physical registers with virtual registers.
+  /// This modifies the MachineBasicBlock and updates LiveRanges with VReg info.
+  /// RESERVED ranges themselves are never virtualized.
+  /// Other ranges may be filtered based on the policy.
+  /// This is a non-destructive operation that supports partial virtualization.
+  void virtualizeFilteredPhysRegs(
+      OverlapPolicy Policy = OverlapPolicy::DisallowOverlapWithReservedBase);
+
+  /// Get the set of physical registers that would be available for reallocation
+  /// Returns the cached value computed during analyze()
+  const DenseSet<MCRegister> &getAvailablePhysRegs() const {
+    return AvailablePhysRegs;
+  }
+
+  /// Rewrite virtual registers to physical registers using the provided
+  /// mapping.
+  /// @param VRegToPhysMap Mapping from virtual registers to physical registers
+  void rewriteToPhysRegs(const DenseMap<Register, MCRegister> &VRegToPhysMap);
+
+  /// Restore original physical registers from virtual registers
+  /// Uses the LiveRanges to map VRegs back to their original PhysRegs
+  /// This is a convenience method that builds the mapping and calls
+  /// rewriteToPhysRegs
+  void restoreOriginalPhysRegs();
+
+  /// Check if registers are currently virtualized
+  bool areRegistersVirtualized() const;
+
+  /// Filter live ranges based on available physical registers.
+  /// Removes live ranges that have only one available physical register
+  /// for their register class, as these should stay physical to avoid
+  /// pipeliner invalidation.
+  /// Uses the cached AvailablePhysRegs computed during analyze().
+  void filterByRegisterAvailability();
+
+  /// Clear all state and bring the tracker back to its default constructed
+  /// state
+  void clear();
+
+  /// Get the most promising scarce range set for packing.
+  /// Returns the cached value computed during analyze().
+  /// An empty vector signals that no such set could be found.
+  const std::vector<const RegLiveRange *> &
+  getMostPromisingScarceRanges() const {
+    return MostPromisingScarceRanges;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AIE_AIEREGDEFUSETRACKER_H
diff --git a/llvm/lib/Target/AIE/AIEScarceRegScheduling.cpp b/llvm/lib/Target/AIE/AIEScarceRegScheduling.cpp
new file mode 100644
index 000000000000..b4063e8b2440
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIEScarceRegScheduling.cpp
@@ -0,0 +1,418 @@
+//===- AIEScarceRegScheduling.cpp - Scarce Register Scheduling Strategy --===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+// This file implements a PostPipelinerStrategy that prioritizes scheduling
+// decisions based on scarce register pressure.
+//===----------------------------------------------------------------------===//
+
+#include "AIEScarceRegScheduling.h"
+#include "AIERegDefUseTracker.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+
+#define DEBUG_TYPE "scarce-reg-sched"
+
+namespace llvm::AIE {
+
+ScarceRange::ScarceRange(const RegLiveRange &LR, const ScheduleDAGInstrs &DAG)
+    : LiveRange(LR) {
+  // Collect all unique MachineInstr pointers from defs and uses.
+  DenseSet<const MachineInstr *> UniqueInstrs;
+
+  for (const auto &DefInfo : LR.defs()) {
+    MachineOperand *const DefOp = DefInfo.getOperand();
+    assert(DefOp && "DefOp should be valid");
+    MachineInstr *const DefMI = DefOp->getParent();
+    assert(DefMI && "Every operand should have a parent MachineInstr");
+    UniqueInstrs.insert(DefMI);
+  }
+
+  for (const auto &UseInfo : LR.uses()) {
+    MachineOperand *const UseOp = UseInfo.getOperand();
+    assert(UseOp && "UseOp should be valid");
+    MachineInstr *const UseMI = UseOp->getParent();
+    assert(UseMI && "Every operand should have a parent MachineInstr");
+    UniqueInstrs.insert(UseMI);
+  }
+
+  // Iterate over all SUnits and collect those whose instruction is in the set.
+  // This handles the case where multiple SUnits reference the same instruction.
+  // We only need the first (representative) SUnit for each instruction.
+  for (const auto &SU : DAG.SUnits) {
+    const MachineInstr *const MI = SU.getInstr();
+    assert(MI && "Every SUnit should have a MachineInstr");
+    if (UniqueInstrs.count(MI)) {
+      Members.push_back(SU.NodeNum);
+      // Early break when we've found all unique instructions.
+      if (Members.size() == UniqueInstrs.size()) {
+        break;
+      }
+    }
+  }
+
+  // Members are in SUnit order, which is deterministic.
+}
+
+ScarceRegScheduling::ScarceRegScheduling(ScheduleDAGInstrs &DAG,
+                                         ScheduleInfo &Info,
+                                         RegLiveRangeTracker &RegTracker,
+                                         int II)
+    : PostPipelinerStrategy(DAG, Info, /*LatestBias=*/0),
+      RegTracker(RegTracker), II(II) {}
+
+BurstMostUrgentStrategy::BurstMostUrgentStrategy(
+    ScheduleDAGInstrs &DAG, ScheduleInfo &Info,
+    const std::vector<ScarceRange> &ScarceRanges, int LatestBias)
+    : PostPipelinerStrategy(DAG, Info, LatestBias), ScarceRanges(ScarceRanges),
+      CurrentSet(0) {
+
+  assert(!ScarceRanges.empty() &&
+         "BurstMostUrgentStrategy requires at least one scarce range");
+
+  // Build a set to track which SUnits are part of scarce ranges.
+  const size_t NumSUnits = Info.NInstr;
+  SmallVector<bool, 64> IsScarceRangeMember(NumSUnits, false);
+  for (const auto &Range : ScarceRanges) {
+    for (int MemberIdx : Range.Members) {
+      assert(MemberIdx >= 0 && static_cast<size_t>(MemberIdx) < NumSUnits &&
+             "Scarce range member index out of bounds");
+      IsScarceRangeMember[MemberIdx] = true;
+    }
+  }
+
+  // Precompute predecessors and members for each range (in original order).
+  Predecessors.reserve(ScarceRanges.size());
+  Members.reserve(ScarceRanges.size());
+
+  for (const auto &Range : ScarceRanges) {
+    // Collect non-scarce predecessors for this range.
+    SmallVector<int, 4> RangePredecessors;
+    for (int MemberIdx : Range.Members) {
+      const auto &MemberNode = Info[MemberIdx];
+      for (int AncestorIdx : MemberNode.Ancestors) {
+        // Only include non-scarce ancestors.
+        if (static_cast<size_t>(AncestorIdx) < IsScarceRangeMember.size() &&
+            !IsScarceRangeMember[AncestorIdx]) {
+          // Avoid duplicates.
+          if (std::find(RangePredecessors.begin(), RangePredecessors.end(),
+                        AncestorIdx) == RangePredecessors.end()) {
+            RangePredecessors.push_back(AncestorIdx);
+          }
+        }
+      }
+    }
+
+    Predecessors.push_back(std::move(RangePredecessors));
+    Members.push_back(Range.Members);
+  }
+
+  // Pre-size OrderedMembers (will be populated by init()).
+  OrderedMembers.resize(ScarceRanges.size() * 2);
+}
+
+void BurstMostUrgentStrategy::init(const SmallVector<int, 4> &RangeOrder) {
+  assert(RangeOrder.size() == ScarceRanges.size() &&
+         "RangeOrder must have the same size as ScarceRanges");
+
+  // Reset state.
+  CurrentSet = 0;
+
+  // Build OrderedMembers by interleaving predecessors and members in the given
+  // order.
+  for (size_t I = 0; I < RangeOrder.size(); ++I) {
+    const int RangeIdx = RangeOrder[I];
+    OrderedMembers[2 * I] = Predecessors[RangeIdx];
+    OrderedMembers[2 * I + 1] = Members[RangeIdx];
+  }
+}
+
+bool BurstMostUrgentStrategy::better(const SUnit &A, const SUnit &B) {
+  const int AIdx = A.NodeNum;
+  const int BIdx = B.NodeNum;
+
+  // Check if either is in the current set.
+  if (CurrentSet < OrderedMembers.size()) {
+    const auto &CurrentMembers = OrderedMembers[CurrentSet];
+    const bool AInSet = std::find(CurrentMembers.begin(), CurrentMembers.end(),
+                                  AIdx) != CurrentMembers.end();
+    const bool BInSet = std::find(CurrentMembers.begin(), CurrentMembers.end(),
+                                  BIdx) != CurrentMembers.end();
+
+    // Prefer members of the current set.
+    if (AInSet != BInSet) {
+      return AInSet;
+    }
+  }
+
+  // Default: prefer earlier earliest.
+  return Info[AIdx].Earliest < Info[BIdx].Earliest;
+}
+
+void BurstMostUrgentStrategy::selected(const SUnit &N) {
+  // Check if we've completed the current set.
+  if (CurrentSet < OrderedMembers.size()) {
+    const auto &CurrentMembers = OrderedMembers[CurrentSet];
+
+    // Check if all members of the current set are scheduled.
+    const bool AllMembersScheduled =
+        llvm::all_of(CurrentMembers, [this](int MemberIdx) {
+          return Info[MemberIdx].Scheduled;
+        });
+
+    // If all members are scheduled, advance to the next set.
+    if (AllMembersScheduled) {
+      ++CurrentSet;
+      LLVM_DEBUG(dbgs() << format("Completed set %zu, advancing to %zu\n",
+                                  CurrentSet - 1, CurrentSet));
+
+      // If we just completed a members set (odd index), simulate
+      // anti-dependences.
+      if ((CurrentSet - 1) % 2 == 1) {
+        const size_t BurstIdx = (CurrentSet - 1) / 2;
+        const int RangeIdx =
+            (BurstIdx < ScarceRanges.size()) ? static_cast<int>(BurstIdx) : -1;
+        if (RangeIdx >= 0) {
+          simulateAntiDependences(RangeIdx);
+        }
+      }
+    }
+  }
+}
+
+void BurstMostUrgentStrategy::simulateAntiDependences(int CompletedRangeIdx) {
+  const auto &CompletedRange = ScarceRanges[CompletedRangeIdx];
+  const auto *const SchedModel = DAG.getSchedModel();
+
+  LLVM_DEBUG(dbgs() << format("Simulating anti-dependences for range %d\n",
+                              CompletedRangeIdx));
+
+  // For each Use in the completed range's LiveRange.
+  for (const auto &UseInfo : CompletedRange.LiveRange.uses()) {
+    MachineOperand *const UseOp = UseInfo.getOperand();
+    assert(UseOp && "UseOp should be valid");
+    MachineInstr *const UseMI = UseOp->getParent();
+    assert(UseMI && "Every operand should have a parent MachineInstr");
+
+    const unsigned UseOpIdx = UseOp->getOperandNo();
+
+    // Find the corresponding SUnit index.
+    int UseSUIdx = -1;
+    for (const int MemberIdx : CompletedRange.Members) {
+      if (DAG.SUnits[MemberIdx].getInstr() == UseMI) {
+        UseSUIdx = MemberIdx;
+        break;
+      }
+    }
+    assert(UseSUIdx >= 0 && "Use instruction should be in completed range");
+
+    const int UseCycle = Info[UseSUIdx].Cycle;
+
+    // For each subsequent range.
+    for (size_t LaterRangeIdx = CompletedRangeIdx + 1;
+         LaterRangeIdx < ScarceRanges.size(); ++LaterRangeIdx) {
+      const auto &LaterRange = ScarceRanges[LaterRangeIdx];
+
+      // For each Def in the later range's LiveRange.
+      for (const auto &DefInfo : LaterRange.LiveRange.defs()) {
+        MachineOperand *const DefOp = DefInfo.getOperand();
+        assert(DefOp && "DefOp should be valid");
+        MachineInstr *const DefMI = DefOp->getParent();
+        assert(DefMI && "Every operand should have a parent MachineInstr");
+
+        const unsigned DefOpIdx = DefOp->getOperandNo();
+
+        // Find the corresponding SUnit index.
+        int DefSUIdx = -1;
+        for (const int MemberIdx : LaterRange.Members) {
+          if (DAG.SUnits[MemberIdx].getInstr() == DefMI) {
+            DefSUIdx = MemberIdx;
+            break;
+          }
+        }
+        assert(DefSUIdx >= 0 && "Def instruction should be in later range");
+
+        // Compute the anti-dependence latency.
+        const unsigned Latency =
+            SchedModel->computeOperandLatency(UseMI, UseOpIdx, DefMI, DefOpIdx);
+
+        // Update Earliest[Def] = max(Earliest[Def], Cycle[Use] + L).
+        const int NewEarliest = UseCycle + static_cast<int>(Latency);
+        Info[DefSUIdx].Earliest =
+            std::max(Info[DefSUIdx].Earliest, NewEarliest);
+      }
+    }
+  }
+}
+
+void buildScarceRangeMapping(const std::vector<ScarceRange> &Ranges,
+                             const ScheduleInfo &Info,
+                             std::vector<int> &RangeOfSUnit) {
+  RangeOfSUnit.assign(Info.NInstr, -1);
+
+  for (size_t RangeIdx = 0; RangeIdx < Ranges.size(); ++RangeIdx) {
+    const auto &Range = Ranges[RangeIdx];
+    for (int MemberIdx : Range.Members) {
+      assert(MemberIdx >= 0 && MemberIdx < Info.NInstr &&
+             "Scarce range member index out of bounds");
+      assert(RangeOfSUnit[MemberIdx] == -1 &&
+             "SUnit cannot belong to multiple scarce ranges");
+      RangeOfSUnit[MemberIdx] = RangeIdx;
+    }
+  }
+}
+
+void buildScarceDAG(std::vector<ScarceRange> &Ranges, const ScheduleInfo &Info,
+                    const ScheduleDAGInstrs &DAG) {
+  // Build the mapping from SUnit to range index.
+  std::vector<int> RangeOfSUnit;
+  buildScarceRangeMapping(Ranges, Info, RangeOfSUnit);
+
+  // Populate PredRanges for each range using direct predecessors from the DAG.
+  for (size_t RangeIdx = 0; RangeIdx < Ranges.size(); ++RangeIdx) {
+    auto &Range = Ranges[RangeIdx];
+    Range.PredRanges.clear();
+
+    // Use a small set to deduplicate predecessor ranges.
+    SmallVector<int, 4> PredSet;
+
+    // For each member of this range.
+    for (int MemberIdx : Range.Members) {
+      assert(MemberIdx >= 0 && MemberIdx < Info.NInstr &&
+             "Scarce range member index out of bounds");
+
+      const auto &SU = DAG.SUnits[MemberIdx];
+
+      // For each direct predecessor of this member.
+      for (const auto &PredEdge : SU.Preds) {
+        const SUnit *PredSU = PredEdge.getSUnit();
+        if (!PredSU || PredSU->isBoundaryNode()) {
+          continue;
+        }
+
+        const int PredIdx = PredSU->NodeNum;
+        const int PredRange = RangeOfSUnit[PredIdx];
+
+        // If the predecessor is in a different scarce range, record the edge.
+        if (PredRange != -1 && PredRange != static_cast<int>(RangeIdx)) {
+          // Add to PredSet if not already present.
+          if (std::find(PredSet.begin(), PredSet.end(), PredRange) ==
+              PredSet.end()) {
+            PredSet.push_back(PredRange);
+          }
+        }
+      }
+    }
+
+    // Copy deduplicated predecessors to PredRanges.
+    Range.PredRanges = PredSet;
+  }
+}
+
+bool checkAcyclic(const std::vector<ScarceRange> &Ranges) {
+  const size_t K = Ranges.size();
+
+  // Compute indegrees (PredRanges.size() for each range).
+  SmallVector<unsigned, 4> Indegree;
+  Indegree.reserve(K);
+  for (const auto &Range : Ranges) {
+    Indegree.push_back(Range.PredRanges.size());
+  }
+
+  // Kahn's algorithm: process ranges with indegree 0.
+  SmallVector<int, 4> Ready;
+  for (size_t I = 0; I < K; ++I) {
+    if (Indegree[I] == 0) {
+      Ready.push_back(I);
+    }
+  }
+
+  unsigned ProcessedCount = 0;
+  while (!Ready.empty()) {
+    const int Current = Ready.pop_back_val();
+    ++ProcessedCount;
+
+    // For each range that has Current as a predecessor, decrement indegree.
+    for (size_t J = 0; J < K; ++J) {
+      const auto &Range = Ranges[J];
+      if (std::find(Range.PredRanges.begin(), Range.PredRanges.end(),
+                    Current) != Range.PredRanges.end()) {
+        --Indegree[J];
+        if (Indegree[J] == 0) {
+          Ready.push_back(J);
+        }
+      }
+    }
+  }
+
+  // If we processed all ranges, the DAG is acyclic.
+  return ProcessedCount == K;
+}
+
+bool enumerateRangeOrders(
+    const std::vector<ScarceRange> &Ranges,
+    llvm::function_ref<bool(const SmallVector<int, 4> &Order)> OnOrder) {
+
+  const size_t K = Ranges.size();
+
+  // Track which ranges have been placed in the current order.
+  SmallVector<bool, 4> Placed(K, false);
+
+  // Current partial order being built.
+  SmallVector<int, 4> Order;
+  Order.reserve(K);
+
+  // Recursive DFS to enumerate linear extensions.
+  const auto Enumerate = [&](auto &EnumerateRef) -> bool {
+    // Base case: complete order found.
+    if (Order.size() == K) {
+      LLVM_DEBUG(dbgs() << "\nEntering burst scheduling with order ";
+                 for (auto Ord : Order) { dbgs() << Ord << ", "; } dbgs()
+                 << "\n";);
+      return OnOrder(Order);
+    }
+
+    // Find ready ranges (all predecessors are in Order).
+    for (size_t RangeIdx = 0; RangeIdx < K; ++RangeIdx) {
+      if (Placed[RangeIdx]) {
+        continue;
+      }
+
+      const auto &Range = Ranges[RangeIdx];
+
+      // Check if all predecessors are placed.
+      const bool AllPredsPlaced = llvm::all_of(
+          Range.PredRanges, [&Placed](int PredIdx) { return Placed[PredIdx]; });
+
+      if (AllPredsPlaced) {
+        // This range is ready; add it to the order and recurse.
+
+        Order.push_back(RangeIdx);
+        Placed[RangeIdx] = true;
+
+        if (EnumerateRef(EnumerateRef)) {
+          return true;
+        }
+
+        // Backtrack.
+        Placed[RangeIdx] = false;
+        Order.pop_back();
+      }
+    }
+
+    return false;
+  };
+
+  LLVM_DEBUG(dbgs() << "Enumerating scarce ranges\n");
+
+  return Enumerate(Enumerate);
+}
+
+} // namespace llvm::AIE
diff --git a/llvm/lib/Target/AIE/AIEScarceRegScheduling.h b/llvm/lib/Target/AIE/AIEScarceRegScheduling.h
new file mode 100644
index 000000000000..ec4586495487
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIEScarceRegScheduling.h
@@ -0,0 +1,131 @@
+//===- AIEScarceRegScheduling.h - Scarce Register Scheduling Strategy ----===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+// This file contains a PostPipelinerStrategy that prioritizes scheduling
+// decisions based on scarce register pressure.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AIE_AIESCARCEREGSCHEDULING_H
+#define LLVM_LIB_TARGET_AIE_AIESCARCEREGSCHEDULING_H
+
+#include "AIEPostPipeliner.h"
+#include "llvm/ADT/SmallVector.h"
+#include <vector>
+
+namespace llvm {
+class RegLiveRange;
+class RegLiveRangeTracker;
+class SUnit;
+} // namespace llvm
+
+namespace llvm::AIE {
+
+class ScarceRegScheduling : public PostPipelinerStrategy {
+  [[maybe_unused]] RegLiveRangeTracker &RegTracker;
+  [[maybe_unused]] int II;
+
+public:
+  ScarceRegScheduling(ScheduleDAGInstrs &DAG, ScheduleInfo &Info,
+                      RegLiveRangeTracker &RegTracker, int II);
+
+  std::string name() override { return "ScarceRegScheduling"; }
+};
+
+// Represents a scarce range to be scheduled atomically.
+struct ScarceRange {
+  // SUnit indices that are part of this scarce range.
+  SmallVector<int, 4> Members;
+
+  // Indices of scarce ranges that must precede this range (scarce-only DAG).
+  SmallVector<int, 4> PredRanges;
+
+  // Reference to the corresponding RegLiveRange with def/use operand info.
+  // The LiveRange provides the MachineOperand pointers and indices needed for
+  // anti-dependence simulation in BurstMostUrgentStrategy.
+  const RegLiveRange &LiveRange;
+
+  // Event-space anchor (start cycle modulo II).
+  int EventAnchor = 0;
+
+  // Issue-space anchor (converted from event-space with base normalization).
+  int IssueAnchor = 0;
+
+  // Event-space length of the MLI.
+  int EventLength = 0;
+
+  // Constructor computes Members from LiveRange's defs and uses via DAG.
+  ScarceRange(const RegLiveRange &LR, const ScheduleDAGInstrs &DAG);
+};
+
+// Strategy for burst scheduling: prioritize predecessors of the current
+// scarce range, then atomically place the scarce range members.
+class BurstMostUrgentStrategy : public PostPipelinerStrategy {
+  // The ordered sequence of scarce ranges to schedule.
+  const std::vector<ScarceRange> &ScarceRanges;
+
+  // Precomputed non-scarce predecessors for each range (in original order).
+  std::vector<SmallVector<int, 4>> Predecessors;
+
+  // Members for each range (in original order, copied from ScarceRanges).
+  std::vector<SmallVector<int, 4>> Members;
+
+  // Ordered sets to schedule (built by init() from Predecessors and Members).
+  // For each burst i:
+  //   OrderedMembers[2*i]   = Predecessors[RangeOrder[i]]
+  //   OrderedMembers[2*i+1] = Members[RangeOrder[i]]
+  std::vector<SmallVector<int, 4>> OrderedMembers;
+
+  // Current index into OrderedMembers (which set we're working on).
+  size_t CurrentSet = 0;
+
+public:
+  BurstMostUrgentStrategy(ScheduleDAGInstrs &DAG, ScheduleInfo &Info,
+                          const std::vector<ScarceRange> &ScarceRanges,
+                          int LatestBias);
+
+  // Initialize OrderedMembers based on the given range order.
+  void init(const SmallVector<int, 4> &RangeOrder);
+
+  std::string name() override { return "BurstMostUrgentStrategy"; }
+
+  bool better(const SUnit &A, const SUnit &B) override;
+
+  void selected(const SUnit &N) override;
+
+  bool fromTop() override { return true; }
+
+private:
+  // Simulate anti-dependences from a completed range to all subsequent ranges.
+  void simulateAntiDependences(int CompletedRangeIdx);
+};
+
+// Build a mapping from SUnit index to scarce range index.
+// RangeOfSUnit[i] = range index if SUnit i is in a scarce range, -1 otherwise.
+void buildScarceRangeMapping(const std::vector<ScarceRange> &Ranges,
+                             const ScheduleInfo &Info,
+                             std::vector<int> &RangeOfSUnit);
+
+// Build the scarce-only DAG by populating PredRanges for each range.
+void buildScarceDAG(std::vector<ScarceRange> &Ranges, const ScheduleInfo &Info,
+                    const ScheduleDAGInstrs &DAG);
+
+// Check that the scarce-only DAG is acyclic using Kahn's algorithm.
+// Returns true if acyclic, false if a cycle is detected.
+bool checkAcyclic(const std::vector<ScarceRange> &Ranges);
+
+// Enumerate range orders compatible with the DAG.
+// OnOrder returns true to stop enumeration (success), false to continue.
+// Returns true if OnOrder returned true for any order, false otherwise.
+bool enumerateRangeOrders(
+    const std::vector<ScarceRange> &Ranges,
+    llvm::function_ref<bool(const SmallVector<int, 4> &Order)> OnOrder);
+
+} // namespace llvm::AIE
+
+#endif // LLVM_LIB_TARGET_AIE_AIESCARCEREGSCHEDULING_H
diff --git a/llvm/lib/Target/AIE/AIEScheduleInterpreter.cpp b/llvm/lib/Target/AIE/AIEScheduleInterpreter.cpp
new file mode 100644
index 000000000000..34143c26e313
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIEScheduleInterpreter.cpp
@@ -0,0 +1,517 @@
+//===- AIEScheduleInterpreter.cpp - Schedule-aware itinerary interpreter -===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a schedule-aware interpreter that computes register
+// file (RF) occupancy windows from scheduled MachineInstrs and itinerary
+// data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AIEScheduleInterpreter.h"
+#include "AIEBaseInstrInfo.h"
+#include "AIELivenessVector.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <map>
+#include <set>
+#include <vector>
+
+#define DEBUG_TYPE "aie-schedule-interpreter"
+
+using namespace llvm;
+
+AIEScheduleInterpreter::AIEScheduleInterpreter(const MachineFunction &MF)
+    : TII(*MF.getSubtarget().getInstrInfo()),
+      TRI(*MF.getSubtarget().getRegisterInfo()), MRI(MF.getRegInfo()),
+      Itin(MF.getSubtarget().getInstrItineraryData()) {
+  assert(Itin && !Itin->isEmpty() &&
+         "Instruction itinerary data must be provided");
+}
+
+int AIEScheduleInterpreter::getOperandCycle(unsigned SchedClass,
+                                            unsigned OpIdx) const {
+  // Get operand cycle from itinerary.
+  // This tells us when the operand is accessed relative to instruction issue.
+  const std::optional<unsigned> OperandCycle =
+      Itin->getOperandCycle(SchedClass, OpIdx);
+
+  // Ensure we have timing information for this operand.
+  assert(OperandCycle.has_value() &&
+         "Itinerary must provide operand cycle information for all operands");
+
+  return *OperandCycle;
+}
+
+// Helper to add an event to the schedule, resizing if necessary
+static void addEvent(EventSchedule &Schedule, int Cycle, EventType Type,
+                     unsigned VReg, unsigned SubRegIdx,
+                     unsigned ForwardingClass, const MachineInstr *MI,
+                     unsigned OpIdx) {
+  // Ensure the schedule is large enough
+  if (Cycle >= static_cast<int>(Schedule.size())) {
+    Schedule.resize(Cycle + 1);
+  }
+
+  // Add the event
+  Schedule[Cycle].emplace_back(Type, VReg, SubRegIdx, ForwardingClass, MI,
+                               OpIdx);
+}
+
+void AIEScheduleInterpreter::addInstructionEvents(
+    const MachineInstr &MI, int IssueCycle, EventSchedule &Schedule) const {
+
+  LLVM_DEBUG(dbgs() << "Adding events for instruction at cycle " << IssueCycle
+                    << ": " << MI);
+
+  // Get scheduling class once for all operands.
+  const MCInstrDesc &Desc = MI.getDesc();
+  const unsigned SchedClass = Desc.getSchedClass();
+
+  // Process all operands
+  for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) {
+    const MachineOperand &MO = MI.getOperand(OpIdx);
+
+    // Skip non-register operands
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+
+    // Skip physical registers for now
+    if (!Register::isVirtualRegister(MO.getReg()))
+      continue;
+
+    // Skip implicit operands
+    if (MO.isImplicit())
+      continue;
+
+    const Register VReg = MO.getReg();
+    const unsigned SubRegIdx = MO.getSubReg();
+    const unsigned ForwardingClass =
+        Itin->getForwardingClass(SchedClass, OpIdx);
+
+    if (MO.isUse()) {
+      const int ReadCycleOffset = getOperandCycle(SchedClass, OpIdx);
+      const int ReadCycle = IssueCycle + ReadCycleOffset;
+
+      // Add read event.
+      // ForwardingClass != 0 indicates this read also accesses a bypass
+      // one cycle earlier.
+      addEvent(Schedule, ReadCycle, EventType::Read, VReg, SubRegIdx,
+               ForwardingClass, &MI, OpIdx);
+
+      LLVM_DEBUG(dbgs() << "  Read %vreg" << Register::virtReg2Index(VReg);
+                 if (SubRegIdx) dbgs()
+                 << ":" << TRI.getSubRegIndexName(SubRegIdx);
+                 dbgs() << " at cycle " << ReadCycle;
+                 if (ForwardingClass) dbgs()
+                 << " (forwarding class " << ForwardingClass << ")";
+                 dbgs() << "\n");
+    }
+
+    if (MO.isDef()) {
+      const int WriteCycleOffset = getOperandCycle(SchedClass, OpIdx);
+      const int WriteCycle = IssueCycle + WriteCycleOffset;
+
+      // Add write event.
+      // ForwardingClass != 0 indicates this write also writes to a bypass
+      // at the same cycle.
+      addEvent(Schedule, WriteCycle, EventType::Write, VReg, SubRegIdx,
+               ForwardingClass, &MI, OpIdx);
+
+      LLVM_DEBUG(dbgs() << "  Write %vreg" << Register::virtReg2Index(VReg);
+                 if (SubRegIdx) dbgs()
+                 << ":" << TRI.getSubRegIndexName(SubRegIdx);
+                 dbgs() << " at cycle " << WriteCycle;
+                 if (ForwardingClass) dbgs()
+                 << " (forwarding class " << ForwardingClass << ")";
+                 dbgs() << "\n");
+    }
+  }
+}
+
+void AIEScheduleInterpreter::dumpEventSchedule(const EventSchedule &Schedule,
+                                               raw_ostream &OS) const {
+
+  // Collect all unique virtual registers
+  std::set<unsigned> AllVRegs;
+  for (const auto &CycleEvents : Schedule) {
+    for (const auto &Event : CycleEvents) {
+      AllVRegs.insert(Event.VReg);
+    }
+  }
+
+  // Helper lambda to format an event as a string
+  auto FormatEvent = [](const RFEvent &Event) -> std::string {
+    const char Action = (Event.Type == EventType::Read) ? 'R' : 'W';
+    std::string ActionStr;
+    if (Event.SubRegIdx != 0) {
+      // Include subreg info if present (format as R## or W##)
+      raw_string_ostream Stream(ActionStr);
+      Stream << format("%c%02d", Action, Event.SubRegIdx);
+    } else {
+      // No subreg, just the action with padding
+      ActionStr = Action;
+      ActionStr += "  ";
+    }
+    return ActionStr;
+  };
+
+  // Build separate maps for register and bypass events per VReg.
+  // Bypass events are derived from ForwardingClass:
+  // - Reads with ForwardingClass != 0 also read bypass at same cycle
+  // - Writes with ForwardingClass != 0 also write bypass one cycle earlier
+  std::map<unsigned, std::map<unsigned, std::string>> RegEventsByVReg;
+  std::map<unsigned, std::map<unsigned, std::string>> BypassEventsByVReg;
+  for (unsigned Cycle = 0; Cycle < Schedule.size(); ++Cycle) {
+    const auto &CycleEvents = Schedule[Cycle];
+    for (const auto &Event : CycleEvents) {
+      // Add space if there's already an event in this cycle
+      if (!RegEventsByVReg[Event.VReg][Cycle].empty()) {
+        RegEventsByVReg[Event.VReg][Cycle] += " ";
+      }
+      RegEventsByVReg[Event.VReg][Cycle] += FormatEvent(Event);
+
+      // If this event uses a bypass, add bypass event
+      if (Event.ForwardingClass != 0) {
+        const int BypassCycle =
+            (Event.Type == EventType::Write) ? Cycle - 1 : Cycle;
+        if (BypassCycle >= 0) {
+          if (!BypassEventsByVReg[Event.VReg][BypassCycle].empty()) {
+            BypassEventsByVReg[Event.VReg][BypassCycle] += " ";
+          }
+          BypassEventsByVReg[Event.VReg][BypassCycle] += FormatEvent(Event);
+        }
+      }
+    }
+  }
+
+  // Print header with cycle numbers.
+  // Reserve 12 characters for register class names to handle long names.
+  OS << " RegClass    VReg  |";
+  for (unsigned Cycle = 0; Cycle < Schedule.size(); ++Cycle) {
+    OS << format(" %4d |", Cycle);
+  }
+  OS << "\n";
+
+  // Print separator.
+  OS << "-------------------+";
+  for (unsigned Cycle = 0; Cycle < Schedule.size(); ++Cycle) {
+    OS << "------+";
+  }
+  OS << "\n";
+
+  // Helper lambda to print a row of events
+  auto PrintEventRow = [&](const std::map<unsigned, std::string> &Events) {
+    for (unsigned Cycle = 0; Cycle < Schedule.size(); ++Cycle) {
+      auto It = Events.find(Cycle);
+      OS << format(" %-4s |", It != Events.end() ? It->second.c_str() : "");
+    }
+    OS << "\n";
+  };
+
+  // Print each VReg with register events and bypass events on separate lines.
+  for (unsigned VReg : AllVRegs) {
+    const auto Reg = Register::virtReg2Index(VReg);
+    const char *RCName = TRI.getRegClassName(MRI.getRegClass(VReg));
+
+    // Print register events.
+    // Use %-12.12s to left-align, pad to 12 chars, and truncate at 12 chars.
+    OS << format(" %-12.12s%5d |", RCName, Reg);
+    PrintEventRow(RegEventsByVReg[VReg]);
+
+    // Print bypass events if any exist for this VReg.
+    const auto &BypassEvents = BypassEventsByVReg[VReg];
+    if (!BypassEvents.empty()) {
+      OS << "        bypass    |";
+      PrintEventRow(BypassEvents);
+    }
+  }
+}
+
+// Helper function to get lane mask for a register operand
+static LaneBitmask getLaneMaskFor(const TargetRegisterInfo &TRI,
+                                  const MachineRegisterInfo &MRI,
+                                  unsigned SubRegIdx, unsigned VReg) {
+  if (SubRegIdx == 0) {
+    // Full/composite register - get the actual lane mask from register class
+    const TargetRegisterClass *RC = MRI.getRegClass(VReg);
+    return RC->getLaneMask();
+  }
+  // Specific subregister
+  return TRI.getSubRegIndexLaneMask(SubRegIdx);
+}
+
+DenseMap<unsigned, AIE::LivenessVector>
+AIEScheduleInterpreter::buildLiveLanes(const EventSchedule &Schedule,
+                                       int II) const {
+
+  assert(II > 0 && "Initiation interval must be positive");
+
+  DenseMap<unsigned, AIE::LivenessVector> LiveLanesByVirtReg;
+
+  if (Schedule.empty())
+    return LiveLanesByVirtReg;
+
+  // State: tracks which lanes are currently live when scanning backward
+  DenseMap<unsigned /*VReg*/, LaneBitmask> ActiveMask;
+
+  // Process cycles backward
+  int MaxCycle = Schedule.size() - 1;
+  for (int C = MaxCycle; C >= 0; --C) {
+    const auto &Events = Schedule[C];
+    int ModuloCycle = C % II; // Master modulo-II bit
+
+    // First, record what's live ENTERING this cycle (before any events)
+    // This is what was active from processing later cycles
+    for (const auto &[VReg, Mask] : ActiveMask) {
+      if (Mask.any()) {
+        // Ensure the output vector is sized for this VReg
+        if (!LiveLanesByVirtReg.count(VReg)) {
+          LiveLanesByVirtReg[VReg] = AIE::LivenessVector(II);
+        }
+        LiveLanesByVirtReg[VReg][ModuloCycle] |= Mask;
+
+        LLVM_DEBUG(dbgs() << "    Lanes " << PrintLaneMask(Mask) << " for %vreg"
+                          << Register::virtReg2Index(VReg)
+                          << " live entering cycle " << C << " (offset "
+                          << ModuloCycle << ")\n");
+      }
+    }
+
+    // Collect reads for this cycle (they don't make register live in this
+    // cycle)
+    DenseMap<unsigned /*VReg*/, LaneBitmask> ReadsInCycle;
+
+    // Step 1: Process defs (writes) - they occupy the register and kill lanes
+    // going backward
+    for (const auto &Event : Events) {
+      if (Event.Type == EventType::Write) {
+        LaneBitmask M = getLaneMaskFor(TRI, MRI, Event.SubRegIdx, Event.VReg);
+
+        // Ensure the output vector exists for this VReg
+        if (!LiveLanesByVirtReg.count(Event.VReg)) {
+          LiveLanesByVirtReg[Event.VReg] = AIE::LivenessVector(II);
+        }
+
+        // RF write occupies register file at ModuloCycle
+        LiveLanesByVirtReg[Event.VReg][ModuloCycle] |= M;
+
+        // If this write uses a bypass, mark bypass write one cycle earlier
+        if (Event.ForwardingClass != 0) {
+          const int BypassWriteCycle = C - 1;
+          if (BypassWriteCycle >= 0) {
+            const int BypassModuloCycle = BypassWriteCycle % II;
+            LiveLanesByVirtReg[Event.VReg][BypassModuloCycle].addBypassWrite(
+                Event.ForwardingClass);
+
+            LLVM_DEBUG(dbgs()
+                       << "    Bypass write of class " << Event.ForwardingClass
+                       << " at cycle " << BypassWriteCycle << " (offset "
+                       << BypassModuloCycle << ")\n");
+          }
+        }
+
+        // Kill those lanes going backward
+        ActiveMask[Event.VReg] &= ~M;
+
+        LLVM_DEBUG(dbgs() << "  Cycle " << C << " (" << ModuloCycle
+                          << "): Write %vreg"
+                          << Register::virtReg2Index(Event.VReg);
+                   if (Event.SubRegIdx) dbgs()
+                   << ":" << TRI.getSubRegIndexName(Event.SubRegIdx);
+                   dbgs() << " occupies lanes " << PrintLaneMask(M)
+                          << " and kills them going backward\n");
+
+        // If no lanes remain active, remove from map
+        if (ActiveMask[Event.VReg].none()) {
+          ActiveMask.erase(Event.VReg);
+        }
+      }
+    }
+
+    // Step 2: Collect all reads in this cycle
+    for (const auto &Event : Events) {
+      if (Event.Type == EventType::Read) {
+        LaneBitmask M = getLaneMaskFor(TRI, MRI, Event.SubRegIdx, Event.VReg);
+
+        // Accumulate reads for this VReg in this cycle
+        ReadsInCycle[Event.VReg] |= M;
+
+        LLVM_DEBUG(dbgs() << "  Cycle " << C << " (" << ModuloCycle
+                          << "): Read %vreg"
+                          << Register::virtReg2Index(Event.VReg);
+                   if (Event.SubRegIdx) dbgs()
+                   << ":" << TRI.getSubRegIndexName(Event.SubRegIdx);
+                   dbgs() << " lanes " << PrintLaneMask(M) << "\n");
+
+        // If this read uses a bypass, mark bypass read at same cycle
+        if (Event.ForwardingClass != 0) {
+          if (!LiveLanesByVirtReg.count(Event.VReg)) {
+            LiveLanesByVirtReg[Event.VReg] = AIE::LivenessVector(II);
+          }
+          LiveLanesByVirtReg[Event.VReg][ModuloCycle].addBypassRead(
+              Event.ForwardingClass);
+
+          LLVM_DEBUG(dbgs() << "    Bypass read of class "
+                            << Event.ForwardingClass << " at cycle " << C
+                            << " (offset " << ModuloCycle << ")\n");
+        }
+      }
+    }
+
+    // Step 3: Now propagate reads to ActiveMask for previous cycles
+    // Reads don't make the register live in the current cycle
+    for (const auto &[VReg, Mask] : ReadsInCycle) {
+      // The reads make the register live going backward (but not in this cycle)
+      ActiveMask[VReg] |= Mask;
+
+      LLVM_DEBUG(dbgs() << "    %vreg" << Register::virtReg2Index(VReg)
+                        << " lanes " << PrintLaneMask(Mask)
+                        << " become live going backward from cycle " << C
+                        << "\n");
+    }
+  }
+
+  // At the end, ActiveMask should be empty (all defs should have been seen)
+  // If not, we have uses without defs (which would be an error in def-first
+  // semantics)
+  for (const auto &[VReg, Mask] : ActiveMask) {
+    if (Mask.any()) {
+      LLVM_DEBUG(dbgs() << "Warning: %vreg" << Register::virtReg2Index(VReg)
+                        << " has lanes " << PrintLaneMask(Mask)
+                        << " live at beginning (use without def?)\n");
+    }
+  }
+
+  return LiveLanesByVirtReg;
+}
+
+void AIEScheduleInterpreter::dumpLiveLanes(
+    const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVirtReg, int II,
+    raw_ostream &OS) const {
+
+  if (LiveLanesByVirtReg.empty()) {
+    OS << "No live lanes data\n";
+    return;
+  }
+
+  // Collect and sort VRegs for consistent output.
+  SmallVector<unsigned, 16> VRegs;
+  for (const auto &[VReg, _] : LiveLanesByVirtReg) {
+    VRegs.push_back(VReg);
+  }
+  llvm::sort(VRegs);
+
+  OS << "Live Lanes (II=" << II << "):\n";
+  OS << "VReg   | ";
+  for (int T = 0; T < II; ++T) {
+    OS << format("t%-6d ", T);
+  }
+  OS << "\n";
+
+  OS << "-------+";
+  for (int T = 0; T < II; ++T) {
+    OS << "--------";
+  }
+  OS << "\n";
+
+  for (unsigned VReg : VRegs) {
+    OS << format("%-6d | ", Register::virtReg2Index(VReg));
+
+    const auto &LanesByOffset = LiveLanesByVirtReg.lookup(VReg);
+    for (int T = 0; T < II; ++T) {
+      const AIE::Liveness &L = LanesByOffset[T];
+      if (L.any()) {
+        // Build indicator showing lanes and bypass classes.
+        // Format examples:
+        //   "##    " = lanes only
+        //   "#R1   " = lanes + bypass read class 1
+        //   "#W2   " = lanes + bypass write class 2
+        //   "R1W2  " = bypass read class 1 + bypass write class 2
+        //   "#R1W2 " = lanes + bypass read class 1 + bypass write class 2
+        std::string Indicator;
+        if (L.getLanes().any()) {
+          Indicator = "#";
+        }
+
+        // Add bypass read classes.
+        if (!L.getBypassReads().empty()) {
+          Indicator += "R";
+          for (unsigned FC : L.getBypassReads()) {
+            Indicator += std::to_string(FC);
+          }
+        }
+
+        // Add bypass write classes.
+        if (!L.getBypassWrites().empty()) {
+          Indicator += "W";
+          for (unsigned FC : L.getBypassWrites()) {
+            Indicator += std::to_string(FC);
+          }
+        }
+
+        // Pad to 6 characters for alignment.
+        while (Indicator.size() < 6) {
+          Indicator += " ";
+        }
+        OS << " " << Indicator << " ";
+      } else {
+        OS << " ..     ";
+      }
+    }
+    OS << "\n";
+  }
+}
+
+BitVector
+AIEScheduleInterpreter::buildSubRegBitmap(ArrayRef<LaneBitmask> LaneByOffset,
+                                          unsigned SubRegIdx) const {
+
+  int II = LaneByOffset.size();
+  BitVector BV(II, false);
+
+  LaneBitmask SubRegMask = (SubRegIdx == 0)
+                               ? LaneBitmask::getAll()
+                               : TRI.getSubRegIndexLaneMask(SubRegIdx);
+
+  for (int T = 0; T < II; ++T) {
+    BV[T] = (LaneByOffset[T] & SubRegMask).any();
+  }
+
+  return BV;
+}
+
+BitVector AIEScheduleInterpreter::buildVRegBitmap(
+    ArrayRef<LaneBitmask> LaneByOffset) const {
+
+  int II = LaneByOffset.size();
+  BitVector BV(II, false);
+
+  for (int T = 0; T < II; ++T) {
+    BV[T] = LaneByOffset[T].any();
+  }
+
+  return BV;
+}
diff --git a/llvm/lib/Target/AIE/AIEScheduleInterpreter.h b/llvm/lib/Target/AIE/AIEScheduleInterpreter.h
new file mode 100644
index 000000000000..b9932a671287
--- /dev/null
+++ b/llvm/lib/Target/AIE/AIEScheduleInterpreter.h
@@ -0,0 +1,169 @@
+//===- AIEScheduleInterpreter.h - Schedule-aware itinerary interpreter ---===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a schedule-aware interpreter that computes register
+// file (RF) occupancy windows from scheduled MachineInstrs and itinerary
+// data. It emits per-operand, per-subregister liveness segments via a
+// callback interface, enabling cycle-accurate interference computation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AIE_AIESCHEDULEINTERPRETER_H
+#define LLVM_LIB_TARGET_AIE_AIESCHEDULEINTERPRETER_H
+
+#include "AIELivenessVector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include <optional>
+#include <vector>
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class InstrItineraryData;
+class ScheduleDAGInstrs;
+class SUnit;
+
+/// Key identifying a live range and its subregister
+struct LRKey {
+  unsigned LRId;      // Live range identifier
+  unsigned SubRegIdx; // Subregister index (0 for full register)
+
+  bool operator==(const LRKey &Other) const {
+    return LRId == Other.LRId && SubRegIdx == Other.SubRegIdx;
+  }
+};
+
+/// Callback interface for receiving live range events
+class LiveRangeEventSink {
+public:
+  /// Called when a live range segment starts at a specific cycle
+  virtual void startLiveRange(const LRKey &Key, int Cycle) = 0;
+
+  /// Called when a live range segment ends at a specific cycle
+  virtual void endLiveRange(const LRKey &Key, int Cycle) = 0;
+
+  virtual ~LiveRangeEventSink() = default;
+};
+
+/// Map from MachineInstr to its scheduled cycle
+using CycleMap = DenseMap<const MachineInstr *, int>;
+
+/// Handle for a live range
+struct LRHandle {
+  unsigned LRId;     // Live range identifier
+  unsigned VReg = 0; // Virtual register (optional, for diagnostics)
+  const TargetRegisterClass *RC = nullptr; // Register class (optional)
+};
+
+/// Event types for register file access
+enum class EventType { Read, Write };
+
+/// Event structure to track register accesses
+struct RFEvent {
+  EventType Type;           // Read or Write
+  unsigned VReg;            // Virtual register
+  unsigned SubRegIdx;       // Subregister index (0 for full register)
+  unsigned ForwardingClass; // Forwarding/bypass class (0 = no bypass)
+  const MachineInstr *MI;   // Source instruction
+  unsigned OpIdx;           // Operand index
+
+  RFEvent(EventType T, unsigned V, unsigned S, unsigned F,
+          const MachineInstr *M, unsigned O)
+      : Type(T), VReg(V), SubRegIdx(S), ForwardingClass(F), MI(M), OpIdx(O) {}
+};
+
+/// Event schedule indexed by cycle
+using EventSchedule = std::vector<std::vector<RFEvent>>;
+
+/// Schedule interpreter that computes RF occupancy windows
+class AIEScheduleInterpreter {
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
+  const MachineRegisterInfo &MRI;
+  const InstrItineraryData *Itin;
+
+  /// Get the cycle offset when an operand is accessed given a scheduling class
+  /// Returns the offset from issue cycle
+  int getOperandCycle(unsigned SchedClass, unsigned OpIdx) const;
+
+public:
+  explicit AIEScheduleInterpreter(const MachineFunction &MF);
+
+  /// Add events for a single instruction to the event schedule
+  ///
+  /// Processes all register operands of the instruction and adds their
+  /// read/write events to the schedule based on the issue cycle and
+  /// itinerary timing information.
+  ///
+  /// \param MI The machine instruction to process
+  /// \param IssueCycle The cycle when the instruction is issued
+  /// \param Schedule The event schedule to update (will be resized if needed)
+  void addInstructionEvents(const MachineInstr &MI, int IssueCycle,
+                            EventSchedule &Schedule) const;
+
+  /// Dump the event schedule in a tabular format
+  ///
+  /// Displays cycles in rows and virtual registers in aligned columns,
+  /// showing 'R' for reads and 'W' for writes.
+  ///
+  /// \param Schedule The event schedule to dump
+  /// \param OS Output stream to write to
+  void dumpEventSchedule(const EventSchedule &Schedule, raw_ostream &OS) const;
+
+  /// Build per-lane modulo-II live range masks from an event schedule
+  ///
+  /// Uses a backward scan to compute which lanes of each virtual register
+  /// are live at each modulo-II offset. The result is a map from VReg to
+  /// a LaneMaskVector, where LiveLanesByVirtReg[VReg][t] indicates
+  /// which lanes are live at offset t (0 <= t < II).
+  ///
+  /// \param Schedule The event schedule to analyze
+  /// \param II The initiation interval for modulo scheduling
+  /// \return Map of VReg to per-offset lane masks
+  DenseMap<unsigned, AIE::LivenessVector>
+  buildLiveLanes(const EventSchedule &Schedule, int II) const;
+
+  /// Dump the live lanes in a readable format
+  ///
+  /// \param LiveLanesByVirtReg The live lanes data to dump
+  /// \param II The initiation interval
+  /// \param OS Output stream to write to
+  void dumpLiveLanes(
+      const DenseMap<unsigned, AIE::LivenessVector> &LiveLanesByVirtReg, int II,
+      raw_ostream &OS) const;
+
+  /// Convert lane masks to a BitVector for a specific subregister
+  ///
+  /// \param LaneByOffset Array of lane masks indexed by modulo-II offset
+  /// \param SubRegIdx The subregister index (0 for full register)
+  /// \return BitVector of length II with bits set where the subregister is live
+  BitVector buildSubRegBitmap(ArrayRef<LaneBitmask> LaneByOffset,
+                              unsigned SubRegIdx) const;
+
+  /// Convert lane masks to a BitVector for the full register
+  ///
+  /// \param LaneByOffset Array of lane masks indexed by modulo-II offset
+  /// \return BitVector of length II with bits set where any lane is live
+  BitVector buildVRegBitmap(ArrayRef<LaneBitmask> LaneByOffset) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AIE_AIESCHEDULEINTERPRETER_H
diff --git a/llvm/lib/Target/AIE/CMakeLists.txt b/llvm/lib/Target/AIE/CMakeLists.txt
index b0d4fd607b34..d333902f76e7 100644
--- a/llvm/lib/Target/AIE/CMakeLists.txt
+++ b/llvm/lib/Target/AIE/CMakeLists.txt
@@ -128,7 +128,9 @@ add_llvm_target(AIECodeGen
    AIEPreISelCombiner.cpp
    AIEInterBlockScheduling.cpp
    AIEISelDAGToDAG.cpp
+   AIELivenessVector.cpp
    AIELegalizerHelper.cpp
+   AIELiveRangeUtils.cpp
    AIELiveRegs.cpp
    AIELoopClass.cpp
    AIEMachineAlignment.cpp
@@ -139,11 +141,15 @@ add_llvm_target(AIECodeGen
    AIEMIRFormatter.cpp
    AIEMultiSlotInstrMaterializer.cpp
    AIEPostPipeliner.cpp
+   AIEPostRegAlloc.cpp
    AIEPostSelectOptimize.cpp
    AIEPseudoBranchExpansion.cpp
    AIEPtrModOptimizer.cpp
    AIERegClassConstrainer.cpp
+   AIERegDefUseTracker.cpp
    AIERegMemEventTracker.cpp
+   AIEScarceRegScheduling.cpp
+   AIEScheduleInterpreter.cpp
    AIESlotCounts.cpp
    AIESpillSlotOptimization.cpp
    AIESlotStatistics.cpp
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16-v2.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16-v2.mir
index d292def1eb9a..4a973749629e 100644
--- a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16-v2.mir
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16-v2.mir
@@ -1,4 +1,3 @@
-# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 # This file is licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -17,56 +16,51 @@
   ; CHECK-LABEL: gemm:
   ; CHECK:         .p2align 4
   ; CHECK-NEXT:  // %bb.0: // %entry
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p7, #64]; vldb x2, [p6, #64]; nops ; nopx ; mov p5, p6; nopv
-  ; CHECK-NEXT:    padda [p5], m4; vldb.3d x0, [p6], d0; nopx ; mov p3, p7; nops
-  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb x6, [p5, #0]
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p7, #64]; vldb x0, [p6, #64]; nops ; nopx ; mov p5, p6; nopv
+  ; CHECK-NEXT:    padda [p5], m4; vldb.3d x6, [p6], d0; mov p3, p7
+  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb x2, [p5, #0]
   ; CHECK-NEXT:    vldb x4, [p5, #64]
   ; CHECK-NEXT:    paddb [p3], m5
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vshuffle x8, x0, x2, r0
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex2, dm4; vshuffle x9, x0, x2, r1
-  ; CHECK-NEXT:    vshuffle x0, x4, x6, r0
-  ; CHECK-NEXT:    vshuffle x1, x4, x6, r1; vmul.f dm4, y4, y5, r2
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3, #0]; vmul.f dm4, y0, y5, r2
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3, #0]
   ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]; add.nc lc, r0, #-1
   ; CHECK-NEXT:    movxm ls, #.LBB0_1
+  ; CHECK-NEXT:    vshuffle x8, x6, x0, r0
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex3, dm4; vshuffle x9, x6, x0, r1
+  ; CHECK-NEXT:    vshuffle x0, x4, x2, r0
+  ; CHECK-NEXT:    vshuffle x1, x4, x2, r1; vmul.f dm4, y4, y5, r2
   ; CHECK-NEXT:    nopa ; nopb ; nops ; movxm le, #.L_LEnd0; nopv
   ; CHECK-NEXT:    .p2align 4
   ; CHECK-NEXT:  .LBB0_1: // %for.body
   ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p7, #64]; vldb x2, [p6, #64]; vconv.bfp16ebs8.fp32 ex3, dm4; nopx ; mov p5, p6; nopv
-  ; CHECK-NEXT:    padda [p5], m4; vldb.3d x0, [p6], d0; nops ; nopx ; mov p3, p7; nopv
-  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb x6, [p5, #0]; nopx ; vconv.bfp16ebs8.fp32 ex5, dm4
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p7, #64]; vldb x0, [p6, #64]; vconv.bfp16ebs8.fp32 ex5, dm4; nopx ; mov p5, p6; vmul.f dm4, y0, y5, r2
+  ; CHECK-NEXT:    padda [p5], m4; vldb.3d x6, [p6], d0; nops ; nopx ; mov p3, p7; nopv
+  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb x2, [p5, #0]; nopx
   ; CHECK-NEXT:    vldb x4, [p5, #64]
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex7, dm4; paddb [p3], m5; vmac.f dm3, dm3, ex2, ex3, r3
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vmac.f dm2, dm2, ex2, ex5, r3
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vshuffle x8, x0, x2, r0; vmac.f dm1, dm1, ex7, ex3, r3
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex2, dm4; vshuffle x9, x0, x2, r1; vmac.f dm0, dm0, ex7, ex5, r3
-  ; CHECK-NEXT:    vshuffle x0, x4, x6, r0
-  ; CHECK-NEXT:    vshuffle x1, x4, x6, r1; vmul.f dm4, y4, y5, r2
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3, #0]; vmul.f dm4, y0, y5, r2
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]
+  ; CHECK-NEXT:    paddb [p3], m5; vconv.bfp16ebs8.fp32 ex7, dm4
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3, #0]
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]; vconv.bfp16ebs8.fp32 ex7, dm4
   ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    vshuffle x8, x6, x0, r0; vmac.f dm3, dm3, ex3, ex7, r3
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex3, dm4; vshuffle x9, x6, x0, r1; vmac.f dm1, dm1, ex5, ex7, r3
+  ; CHECK-NEXT:    vshuffle x0, x4, x2, r0; vmac.f dm2, dm2, ex3, ex7, r3
+  ; CHECK-NEXT:    vshuffle x1, x4, x2, r1; vmul.f dm4, y4, y5, r2
   ; CHECK-NEXT:  .L_LEnd0:
-  ; CHECK-NEXT:    nopa ; nopb ; nops ; nopxm ; nopv
+  ; CHECK-NEXT:    nopa ; nopb ; nops ; nopxm ; vmac.f dm0, dm0, ex5, ex7, r3
   ; CHECK-NEXT:    .p2align 4
   ; CHECK-NEXT:  // %bb.2:
-  ; CHECK-NEXT:    nopa ; nopb ; vconv.bfp16ebs8.fp32 ex3, dm4; nopxm ; nopv
-  ; CHECK-NEXT:    nopx
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex5, dm4
+  ; CHECK-NEXT:    nopa ; nopb ; nopx ; vconv.bfp16ebs8.fp32 ex5, dm4; vmul.f dm4, y0, y5, r2
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex7, dm4
   ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex7, dm4; vmac.f dm3, dm3, ex2, ex3, r3
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex7, dm4
   ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vmac.f dm2, dm2, ex2, ex5, r3
+  ; CHECK-NEXT:    vmac.f dm3, dm3, ex3, ex7, r3
+  ; CHECK-NEXT:    vmac.f dm1, dm1, ex5, ex7, r3
+  ; CHECK-NEXT:    vmac.f dm2, dm2, ex3, ex7, r3
   ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vmac.f dm1, dm1, ex7, ex3, r3
-  ; CHECK-NEXT:    vmac.f dm0, dm0, ex7, ex5, r3
+  ; CHECK-NEXT:    vmac.f dm0, dm0, ex5, ex7, r3
   ; CHECK-NEXT:    ret lr
   ; CHECK-NEXT:    nop // Delay Slot 5
   ; CHECK-NEXT:    nop // Delay Slot 4
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16.mir
index 77c15549f7b5..094725094bdc 100644
--- a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16.mir
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/gemm-bfp16.mir
@@ -17,57 +17,53 @@
   ; CHECK-LABEL: gemm:
   ; CHECK:         .p2align 4
   ; CHECK-NEXT:  // %bb.0: // %entry
-  ; CHECK-NEXT:    nopa ; vldb.3d x2, [p6], d0; nops ; nopx ; mov p5, p6; nopv
-  ; CHECK-NEXT:    padda [p5], m4; vldb x4, [p5, #64]; mov p3, p7
-  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb x6, [p5], #64
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]; vldb x8, [p5, #0]
+  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb.3d x2, [p6], d0; movs p5, p6; nopx ; mov p3, p7; nopv
+  ; CHECK-NEXT:    padda [p5], m4; vldb x11, [p5, #64]; nops ; nopxm ; nopv
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]; vldb x6, [p5], #64
+  ; CHECK-NEXT:    vldb x8, [p5, #0]
   ; CHECK-NEXT:    paddb [p3], m5
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3], #64
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #0]
   ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    vshuffle x4, x2, x11, r0
+  ; CHECK-NEXT:    vshuffle x5, x2, x11, r1
+  ; CHECK-NEXT:    vshuffle x10, x6, x8, r0
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex6, dm4; vshuffle x11, x6, x8, r1; vmul.f dm4, y2, y0, r2
   ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vshuffle x10, x2, x4, r0
-  ; CHECK-NEXT:    vshuffle x11, x2, x4, r1
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex2, dm4; vshuffle x4, x6, x8, r0
-  ; CHECK-NEXT:    vshuffle x5, x6, x8, r1; vmul.f dm4, y5, y0, r2
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3], #64; vmul.f dm4, y2, y0, r2
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #0]
-  ; CHECK-NEXT:    add.nc lc, r0, #-1
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex4, dm4; add.nc lc, r0, #-1; vmul.f dm4, y5, y0, r2
   ; CHECK-NEXT:    movxm ls, #.LBB0_1
-  ; CHECK-NEXT:    nopa ; nopb ; vconv.bfp16ebs8.fp32 ex6, dm4; movxm le, #.L_LEnd0; nopv
+  ; CHECK-NEXT:    nopa ; nopb ; nops ; movxm le, #.L_LEnd0; nopv
   ; CHECK-NEXT:    .p2align 4
   ; CHECK-NEXT:  .LBB0_1: // %for.body
   ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-  ; CHECK-NEXT:    vldb.3d x2, [p6], d0; mov p5, p6
-  ; CHECK-NEXT:    padda [p5], m4; vldb x4, [p5, #64]; vconv.bfp16ebs8.fp32 ex4, dm4; mov p3, p7
-  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb x6, [p5], #64
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]; vldb x8, [p5, #0]; vconv.bfp16ebs8.fp32 ex8, dm4; vmac.f dm3, dm3, ex2, ex6, r3
+  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vldb.3d x2, [p6], d0; nopx ; mov p3, p7; movs p5, p6
+  ; CHECK-NEXT:    padda [p5], m4; vldb x11, [p5, #64]; vconv.bfp16ebs8.fp32 ex8, dm4
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]; vldb x6, [p5], #64
+  ; CHECK-NEXT:    vldb x8, [p5, #0]; vconv.bfp16ebs8.fp32 ex8, dm4
   ; CHECK-NEXT:    paddb [p3], m5
-  ; CHECK-NEXT:    vmac.f dm2, dm2, ex2, ex4, r3
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vmac.f dm0, dm0, ex8, ex4, r3
-  ; CHECK-NEXT:    vshuffle x10, x2, x4, r0; vmac.f dm1, dm1, ex8, ex6, r3
-  ; CHECK-NEXT:    vshuffle x11, x2, x4, r1
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex2, dm4; vshuffle x4, x6, x8, r0
-  ; CHECK-NEXT:    vshuffle x5, x6, x8, r1; vmul.f dm4, y5, y0, r2
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3], #64; vmul.f dm4, y2, y0, r2
-  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #0]
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3], #64; vmac.f dm3, dm3, ex6, ex8, r3
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #0]; vmac.f dm1, dm1, ex4, ex8, r3
+  ; CHECK-NEXT:    vmac.f dm2, dm2, ex6, ex8, r3
+  ; CHECK-NEXT:    vshuffle x4, x2, x11, r0; vmac.f dm0, dm0, ex4, ex8, r3
+  ; CHECK-NEXT:    vshuffle x5, x2, x11, r1
+  ; CHECK-NEXT:    vshuffle x10, x6, x8, r0
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex6, dm4; vshuffle x11, x6, x8, r1; vmul.f dm4, y2, y0, r2
   ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex4, dm4; vmul.f dm4, y5, y0, r2
   ; CHECK-NEXT:    nop
   ; CHECK-NEXT:  .L_LEnd0:
-  ; CHECK-NEXT:    nopa ; nopb ; vconv.bfp16ebs8.fp32 ex6, dm4; nopxm ; nopv
+  ; CHECK-NEXT:    nopa ; nopb ; nops ; nopxm ; nopv
   ; CHECK-NEXT:    .p2align 4
   ; CHECK-NEXT:  // %bb.2:
-  ; CHECK-NEXT:    nopx
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex4, dm4
-  ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex8, dm4; vmac.f dm3, dm3, ex2, ex6, r3
+  ; CHECK-NEXT:    nopa ; nopx
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex8, dm4
   ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vmac.f dm2, dm2, ex2, ex4, r3
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex8, dm4
   ; CHECK-NEXT:    nop
-  ; CHECK-NEXT:    vmac.f dm0, dm0, ex8, ex4, r3
-  ; CHECK-NEXT:    vmac.f dm1, dm1, ex8, ex6, r3
+  ; CHECK-NEXT:    vmac.f dm3, dm3, ex6, ex8, r3
+  ; CHECK-NEXT:    vmac.f dm1, dm1, ex4, ex8, r3
+  ; CHECK-NEXT:    vmac.f dm2, dm2, ex6, ex8, r3
+  ; CHECK-NEXT:    vmac.f dm0, dm0, ex4, ex8, r3
   ; CHECK-NEXT:    ret lr
   ; CHECK-NEXT:    nop // Delay Slot 5
   ; CHECK-NEXT:    nop // Delay Slot 4
@@ -123,12 +119,12 @@ body:             |
     successors: %bb.2, %bb.3
     liveins: $dc0, $dc1, $dc2, $dc3, $dc4, $dc5, $dj0, $dj1, $dj2, $dj3, $dj4, $dj5, $dj7, $dm0:0x000000001800000C, $dm1:0x000000001800000C, $dm2:0x000000001800000C, $dm3:0x000000001800000C, $dn0, $dn1, $dn2, $dn4, $dn5, $m0, $m1, $m2, $m3, $m4, $m5, $p0, $p1, $p2, $p4, $p6, $p7, $r0, $r1, $r2, $r3, $r4, $r8, $y0:0x0000000000000033, $d0_3d:0x0003C00000200E00, $d1_3d:0x0003C00000200E00, $dn3
 
-    $p5 = MOV_alu_mv_mv_mv_scl $p6
+    $p5 = MOV_scalar_pseudo $p6
     $x2, $p6, $dc0, $dc4 = VLD_3D_x_pseudo killed $p6, $d0_3d :: (load (<16 x s32>))
     renamable $x4 = VLD_x_idx_imm_pseudo renamable $p5, 64 :: (load (<16 x s32>))
-    $p3 = MOV_alu_mv_mv_mv_scl $p7
-    $cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf killed $p7, $d1_3d :: (load (<32 x s16>))
+    $p3 = MOV_scalar_pseudo $p7
     renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm renamable $p3, 64 :: (load (<32 x s16>))
+    $cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf killed $p7, $d1_3d :: (load (<32 x s16>))
     renamable $p5 = PADD_mod_pseudo killed renamable $p5, renamable $m4
     renamable $x6, renamable $p5 = VLD_x_pstm_nrm_imm_pseudo killed renamable $p5, 64 :: (load (<16 x s32>))
     renamable $x8 = VLD_x_idx_imm_pseudo killed renamable $p5, 0 :: (load (<16 x s32>))
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/postpipeliner-ore.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/postpipeliner-ore.mir
index 29c9a6f155a3..2ef9cea8f838 100644
--- a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/postpipeliner-ore.mir
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/postpipeliner-ore.mir
@@ -1,8 +1,9 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 # This file is licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
-# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
 
 # RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
 # RUN:   --start-before=postmisched %s \
@@ -13,16 +14,6 @@
 
 --- |
   define dso_local void @gemm(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
-  ; CHECK: --- !Passed
-  ; CHECK-NEXT: Pass:            postpipeliner
-  ; CHECK-NEXT: Name:            schedule
-  ; CHECK-NEXT: Function:        gemm
-  ; CHECK-NEXT: Args:
-  ; CHECK-NEXT:   - String:          Schedule found
-  ; CHECK-NEXT:   - NS:              '4'
-  ; CHECK-NEXT:   - II:              '8'
-  ; CHECK-NEXT:   - BasicBlock:              for.body
-  ; CHECK-NEXT: ...
   entry:
     %cmp5 = icmp sgt i32 %n, 0
     br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
@@ -49,13 +40,6 @@
 
 
   define dso_local void @gemm_lowitercount(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
-  ; CHECK: --- !Missed
-  ; CHECK-NEXT: Pass:            postpipeliner
-  ; CHECK-NEXT: Name:            schedule
-  ; CHECK-NEXT: Function:        gemm_lowitercount
-  ; CHECK-NEXT: Args:
-  ; CHECK-NEXT:   - String:          No schedule found.
-  ; CHECK-NEXT: ...
   entry:
     %cmp5 = icmp sgt i32 %n, 0
     br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
@@ -192,3 +176,5 @@ body:             |
     DelayedSchedBarrier
 
 ...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-exp.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-exp.mir
new file mode 100644
index 000000000000..626a3fb8460f
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-exp.mir
@@ -0,0 +1,250 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# This test exercises experimental modules AIERegDefUseTracker and AIEScheduleInterpreter
+# using the motivating GEMM example with multi-slot pseudo materialization
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched --stop-after=postmisched \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   -o - --debug-only=aie-reg-liverange %s 2>&1 | FileCheck %s 
+# REQUIRES: asserts
+
+# CHECK: FINAL LIVE RANGES
+# CHECK: ================================
+# CHECK: Total live ranges: 14
+# CHECK: Live Range #28 for dm4:
+# CHECK:   Definitions (2):
+# CHECK:     [0] Register: cmh4 (SubRegIdx: 9) renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm renamable $p3, 64 :: (load (<32 x s16>))
+# CHECK:     [1] Register: cml4 (SubRegIdx: 10) $cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf killed $p7(tied-def 1), $d1_3d :: (load (<32 x s16>))
+# CHECK:   Uses (1):
+# CHECK:     [0] Register: dm4 renamable $ex2 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK: Live Range #22 for dm4:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: dm4 renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y5, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK:   Uses (1):
+# CHECK:     [0] Register: dm4 renamable $ex6 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK: Live Range #16 for dm4:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: dm4 renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y2, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK:   Uses (1):
+# CHECK:     [0] Register: dm4 renamable $ex4 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK: Live Range #13 for dm4:
+# CHECK:   Definitions (2):
+# CHECK:     [0] Register: cmh4 (SubRegIdx: 9) renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm killed renamable $p3, 0 :: (load (<32 x s16>))
+# CHECK:     [1] Register: cml4 (SubRegIdx: 10) renamable $cml4, renamable $p3 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_pstm_nrm_imm killed renamable $p3(tied-def 1), 64 :: (load (<32 x s16>))
+# CHECK:   Uses (1):
+# CHECK:     [0] Register: dm4 renamable $ex8 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK: Live Range #10 for ex2:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: ex2 renamable $ex2 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: ex2 renamable $dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm2, killed renamable $ex2, renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK:     [1] Register: ex2 renamable $dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm3, renamable $ex2, renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK: Live Range #3 for ex4:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: ex4 renamable $ex4 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: ex4 renamable $dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm0, killed renamable $ex8, killed renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK:     [1] Register: ex4 renamable $dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm2, killed renamable $ex2, renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK: Live Range #7 for ex6:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: ex6 renamable $ex6 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: ex6 renamable $dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm1, renamable $ex8, killed renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK:     [1] Register: ex6 renamable $dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm3, renamable $ex2, renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK: Live Range #2 for ex8:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: ex8 renamable $ex8 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: ex8 renamable $dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm0, killed renamable $ex8, killed renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK:     [1] Register: ex8 renamable $dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm1, renamable $ex8, killed renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK: Live Range #29 for x2:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: x2 $x2, $p6, $dc0, $dc4 = VLDB_3D_dmx_ldb_x killed $p6(tied-def 1), $d0_3d :: (load (<16 x s32>))
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: x2 renamable $x11 = VSHUFFLE_vec_shuffle_x killed renamable $x2, killed renamable $x4, renamable $r1
+# CHECK:     [1] Register: x2 renamable $x10 = VSHUFFLE_vec_shuffle_x renamable $x2, renamable $x4, renamable $r0
+# CHECK: Live Range #30 for x4:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: x4 renamable $x4 = VLDB_dmx_ldb_x_idx_imm renamable $p5, 64 :: (load (<16 x s32>))
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: x4 renamable $x11 = VSHUFFLE_vec_shuffle_x killed renamable $x2, killed renamable $x4, renamable $r1
+# CHECK:     [1] Register: x4 renamable $x10 = VSHUFFLE_vec_shuffle_x renamable $x2, renamable $x4, renamable $r0
+# CHECK: Live Range #23 for x6:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: x6 renamable $x6, renamable $p5 = VLDB_dmx_ldb_x_pstm_nrm_imm killed renamable $p5(tied-def 1), 64 :: (load (<16 x s32>))
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: x6 renamable $x5 = VSHUFFLE_vec_shuffle_x killed renamable $x6, killed renamable $x8, renamable $r1
+# CHECK:     [1] Register: x6 renamable $x4 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x8, renamable $r0
+# CHECK: Live Range #24 for x8:
+# CHECK:   Definitions (1):
+# CHECK:     [0] Register: x8 renamable $x8 = VLDB_dmx_ldb_x_idx_imm killed renamable $p5, 0 :: (load (<16 x s32>))
+# CHECK:   Uses (2):
+# CHECK:     [0] Register: x8 renamable $x5 = VSHUFFLE_vec_shuffle_x killed renamable $x6, killed renamable $x8, renamable $r1
+# CHECK:     [1] Register: x8 renamable $x4 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x8, renamable $r0
+# CHECK: Live Range #19 for y2:
+# CHECK:   Definitions (2):
+# CHECK:     [0] Register: x5 (SubRegIdx: 5) renamable $x5 = VSHUFFLE_vec_shuffle_x killed renamable $x6, killed renamable $x8, renamable $r1
+# CHECK:     [1] Register: x4 (SubRegIdx: 8) renamable $x4 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x8, renamable $r0
+# CHECK:   Uses (1):
+# CHECK:     [0] Register: y2 renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y2, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+# CHECK: Live Range #27 for y5:
+# CHECK:   Definitions (2):
+# CHECK:     [0] Register: x11 (SubRegIdx: 5) renamable $x11 = VSHUFFLE_vec_shuffle_x killed renamable $x2, killed renamable $x4, renamable $r1
+# CHECK:     [1] Register: x10 (SubRegIdx: 8) renamable $x10 = VSHUFFLE_vec_shuffle_x renamable $x2, renamable $x4, renamable $r0
+# CHECK:   Uses (1):
+# CHECK:     [0] Register: y5 renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y5, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK: ==============================================
+# CHECK:   bmhh4
+# CHECK:   bmhl4
+# CHECK:   bmlh4
+# CHECK:   bmll4
+# CHECK:   cmh4
+# CHECK:   cml4
+# CHECK:   dm4
+# CHECK:   e2
+# CHECK:   e4
+# CHECK:   e6
+# CHECK:   e8
+# CHECK:   eh2
+# CHECK:   eh4
+# CHECK:   eh6
+# CHECK:   eh8
+# CHECK:   el2
+# CHECK:   el4
+# CHECK:   el6
+# CHECK:   el8
+# CHECK:   ewh2
+# CHECK:   ewh4
+# CHECK:   ewh6
+# CHECK:   ewh8
+# CHECK:   ewl2
+# CHECK:   ewl4
+# CHECK:   ewl6
+# CHECK:   ewl8
+# CHECK:   ex2
+# CHECK:   ex4
+# CHECK:   ex6
+# CHECK:   ex8
+# CHECK:   wh2
+# CHECK:   wh4
+# CHECK:   wh5
+# CHECK:   wh6
+# CHECK:   wh8
+# CHECK:   wh10
+# CHECK:   wh11
+# CHECK:   wl2
+# CHECK:   wl4
+# CHECK:   wl5
+# CHECK:   wl6
+# CHECK:   wl8
+# CHECK:   wl10
+# CHECK:   wl11
+# CHECK:   x2
+# CHECK:   x4
+# CHECK:   x5
+# CHECK:   x6
+# CHECK:   x8
+# CHECK:   x10
+# CHECK:   x11
+# CHECK:   y2
+# CHECK:   y5
+# CHECK: Total: 54 registers
+# CHECK: === END FINAL LIVE RANGES
+
+# derived from GEMM_Bfp16_opt_0
+
+--- |
+  define dso_local void @gemm(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
+  entry:
+    %cmp5 = icmp sgt i32 %n, 0
+    br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:                               ; preds = %entry
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %for.body
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %d.addr.07 = phi ptr addrspace(5) [ %incdec.ptr, %for.body ], [ %d, %for.body.preheader ]
+    %s.addr.06 = phi ptr addrspace(6) [ %incdec.ptr1, %for.body ], [ %s, %for.body.preheader ]
+    %0 = load i32, ptr addrspace(6) %s.addr.06, align 4
+    %add = add nsw i32 %0, 1
+    store i32 %add, ptr addrspace(5) %d.addr.07, align 4
+    %incdec.ptr = getelementptr inbounds i32, ptr addrspace(5) %d.addr.07, i20 1
+    %incdec.ptr1 = getelementptr inbounds i32, ptr addrspace(6) %s.addr.06, i20 1
+    %1 = call i1 @llvm.loop.decrement.i32(i32 1)
+    br i1 %1, label %for.body, label %for.cond.cleanup, !llvm.loop !0
+
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    ret void
+
+  }
+
+  declare void @llvm.set.loop.iterations.i32(i32)
+  declare i1 @llvm.loop.decrement.i32(i32)
+
+  !0 = distinct !{!0, !1, !2}
+  !1 = !{!"llvm.loop.mustprogress"}
+  !2 = !{!"llvm.loop.itercount.range", i64 10}
+
+...
+---
+name:            gemm
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry (align 16):
+    successors: %bb.2
+    liveins: $p0, $p1, $r0
+
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.2
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.2.for.body (align 16):
+    successors: %bb.2, %bb.3
+    liveins: $dc0, $dc1, $dc2, $dc3, $dc4, $dc5, $dj0, $dj1, $dj2, $dj3, $dj4, $dj5, $dj7, $dm0:0x000000001800000C, $dm1:0x000000001800000C, $dm2:0x000000001800000C, $dm3:0x000000001800000C, $dn0, $dn1, $dn2, $dn4, $dn5, $m0, $m1, $m2, $m3, $m4, $m5, $p0, $p1, $p2, $p4, $p6, $p7, $r0, $r1, $r2, $r3, $r4, $r8, $y0:0x0000000000000033, $d0_3d:0x0003C00000200E00, $d1_3d:0x0003C00000200E00, $dn3
+
+    $p5 = MOV_alu_mv_mv_mv_scl $p6
+    $x2, $p6, $dc0, $dc4 = VLD_3D_x_pseudo killed $p6, $d0_3d :: (load (<16 x s32>))
+    renamable $x4 = VLD_x_idx_imm_pseudo renamable $p5, 64 :: (load (<16 x s32>))
+    $p3 = MOV_alu_mv_mv_mv_scl $p7
+    $cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf killed $p7, $d1_3d :: (load (<32 x s16>))
+    renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm renamable $p3, 64 :: (load (<32 x s16>))
+    renamable $p5 = PADD_mod_pseudo killed renamable $p5, renamable $m4
+    renamable $x6, renamable $p5 = VLD_x_pstm_nrm_imm_pseudo killed renamable $p5, 64 :: (load (<16 x s32>))
+    renamable $x8 = VLD_x_idx_imm_pseudo killed renamable $p5, 0 :: (load (<16 x s32>))
+    renamable $x10 = VSHUFFLE_vec_shuffle_x renamable $x2, renamable $x4, renamable $r0
+    renamable $x11 = VSHUFFLE_vec_shuffle_x killed renamable $x2, killed renamable $x4, renamable $r1
+    renamable $ex2 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y5, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $x4 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x8, renamable $r0
+    renamable $x5 = VSHUFFLE_vec_shuffle_x killed renamable $x6, killed renamable $x8, renamable $r1
+    renamable $ex6 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y2, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $p3 = PADD_mod_pseudo killed renamable $p3, renamable $m5
+    renamable $ex4 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $cml4, renamable $p3 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_pstm_nrm_imm killed renamable $p3, 64 :: (load (<32 x s16>))
+    renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm killed renamable $p3, 0 :: (load (<32 x s16>))
+    renamable $ex8 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm3, renamable $ex2, renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm2, killed renamable $ex2, renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm1, renamable $ex8, killed renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask,
+    renamable $dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm0, killed renamable $ex8, killed renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.2
+
+  bb.3 (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# REWRITE: {{.*}}
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-ii7.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-ii7.mir
new file mode 100644
index 000000000000..f0f13d3550a3
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-ii7.mir
@@ -0,0 +1,159 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# This test exercises experimental modules
+#   	AIERegDefUseTracker
+#	AIEScheduleInterpreter
+#	AIEPostRegAlloc
+# using the motivating GEMM example skipping WAWRegRewriter.
+# We rewrite suitable physregs to virtual regs, create the dependence graph,
+# pipeline, then reallocate the virtual regs
+
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --aie-postpipeliner-filter-no-choice=0 \
+# RUN:   --aie-wawreg-rewrite=0 \
+# RUN:   --aie-preassign-multi-slot-instr=1 \
+# RUN:   --aie-materialize-pipeline=0 \
+# RUN:   --aie-postpipeliner-maxii=10 \
+# RUN:   -o - %s | FileCheck %s
+
+# derived from GEMM_Bfp16_opt_0
+
+--- |
+  define dso_local void @gemm(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
+  ; CHECK-LABEL: gemm:
+  ; CHECK:         .p2align 4
+  ; CHECK-NEXT:  // %bb.0: // %entry
+  ; CHECK-NEXT:    add.nc lc, r0, #0
+  ; CHECK-NEXT:    movxm ls, #.LBB0_1
+  ; CHECK-NEXT:    movxm le, #.L_LEnd0
+  ; CHECK-NEXT:    .p2align 4
+  ; CHECK-NEXT:  .LBB0_1: // %for.body
+  ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+  ; CHECK-NEXT:    nopa ; vldb.3d x2, [p6], d0; nopx ; mov p5, p6
+  ; CHECK-NEXT:    padda [p5], m4; vldb x4, [p5, #64]
+  ; CHECK-NEXT:    vldb x6, [p5], #64
+  ; CHECK-NEXT:    vldb x8, [p5, #0]
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    mov p3, p7
+  ; CHECK-NEXT:    vshuffle x10, x2, x4, r0
+  ; CHECK-NEXT:    vlda.3d.conv.fp32.bf16 cml4, [p7], d1; vshuffle x11, x2, x4, r1
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #64]; vshuffle x4, x6, x8, r0
+  ; CHECK-NEXT:    vshuffle x5, x6, x8, r1
+  ; CHECK-NEXT:    padda [p3], m5; vmul.f dm4, y5, y0, r2
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cml4, [p3], #64; vmul.f dm4, y2, y0, r2
+  ; CHECK-NEXT:    vlda.conv.fp32.bf16 cmh4, [p3, #0]
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex2, dm4
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex6, dm4
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex4, dm4
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    vconv.bfp16ebs8.fp32 ex8, dm4
+  ; CHECK-NEXT:    nop
+  ; CHECK-NEXT:    vmac.f dm3, dm3, ex2, ex6, r3
+  ; CHECK-NEXT:    vmac.f dm2, dm2, ex2, ex4, r3
+  ; CHECK-NEXT:    vmac.f dm1, dm1, ex8, ex6, r3
+  ; CHECK-NEXT:  .L_LEnd0:
+  ; CHECK-NEXT:    nopa ; nopb ; nops ; nopxm ; vmac.f dm0, dm0, ex8, ex4, r3
+  ; CHECK-NEXT:    .p2align 4
+  ; CHECK-NEXT:  // %bb.2:
+  ; CHECK-NEXT:    nopa ; ret lr
+  ; CHECK-NEXT:    nop // Delay Slot 5
+  ; CHECK-NEXT:    nop // Delay Slot 4
+  ; CHECK-NEXT:    nop // Delay Slot 3
+  ; CHECK-NEXT:    nop // Delay Slot 2
+  ; CHECK-NEXT:    nop // Delay Slot 1
+  entry:
+    %cmp5 = icmp sgt i32 %n, 0
+    br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:                               ; preds = %entry
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %for.body
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %d.addr.07 = phi ptr addrspace(5) [ %incdec.ptr, %for.body ], [ %d, %for.body.preheader ]
+    %s.addr.06 = phi ptr addrspace(6) [ %incdec.ptr1, %for.body ], [ %s, %for.body.preheader ]
+    %0 = load i32, ptr addrspace(6) %s.addr.06, align 4
+    %add = add nsw i32 %0, 1
+    store i32 %add, ptr addrspace(5) %d.addr.07, align 4
+    %incdec.ptr = getelementptr inbounds i32, ptr addrspace(5) %d.addr.07, i20 1
+    %incdec.ptr1 = getelementptr inbounds i32, ptr addrspace(6) %s.addr.06, i20 1
+    %1 = call i1 @llvm.loop.decrement.i32(i32 1)
+    br i1 %1, label %for.body, label %for.cond.cleanup, !llvm.loop !0
+
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    ret void
+
+  }
+
+  declare void @llvm.set.loop.iterations.i32(i32)
+  declare i1 @llvm.loop.decrement.i32(i32)
+
+  !0 = distinct !{!0, !1, !2}
+  !1 = !{!"llvm.loop.mustprogress"}
+  !2 = !{!"llvm.loop.itercount.range", i64 10}
+
+...
+---
+name:            gemm
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry (align 16):
+    successors: %bb.2
+    liveins: $p0, $p1, $r0
+
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.2
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.2.for.body (align 16):
+    successors: %bb.2, %bb.3
+    liveins: $dc0, $dc1, $dc2, $dc3, $dc4, $dc5, $dj0, $dj1, $dj2, $dj3, $dj4, $dj5, $dj7, $dm0:0x000000001800000C, $dm1:0x000000001800000C, $dm2:0x000000001800000C, $dm3:0x000000001800000C, $dn0, $dn1, $dn2, $dn4, $dn5, $m0, $m1, $m2, $m3, $m4, $m5, $p0, $p1, $p2, $p4, $p6, $p7, $r0, $r1, $r2, $r3, $r4, $r8, $y0:0x0000000000000033, $d0_3d:0x0003C00000200E00, $d1_3d:0x0003C00000200E00, $dn3
+
+    $p5 = MOV_alu_mv_mv_mv_scl $p6
+    $x2, $p6, $dc0, $dc4 = VLD_3D_x_pseudo killed $p6, $d0_3d :: (load (<16 x s32>))
+    renamable $x4 = VLD_x_idx_imm_pseudo renamable $p5, 64 :: (load (<16 x s32>))
+    $p3 = MOV_alu_mv_mv_mv_scl $p7
+    $cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf killed $p7, $d1_3d :: (load (<32 x s16>))
+    renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm renamable $p3, 64 :: (load (<32 x s16>))
+    renamable $p5 = PADD_mod_pseudo killed renamable $p5, renamable $m4
+    renamable $x6, renamable $p5 = VLD_x_pstm_nrm_imm_pseudo killed renamable $p5, 64 :: (load (<16 x s32>))
+    renamable $x8 = VLD_x_idx_imm_pseudo killed renamable $p5, 0 :: (load (<16 x s32>))
+    renamable $x10 = VSHUFFLE_vec_shuffle_x renamable $x2, renamable $x4, renamable $r0
+    renamable $x11 = VSHUFFLE_vec_shuffle_x killed renamable $x2, killed renamable $x4, renamable $r1
+    renamable $ex2 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y5, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $x4 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x8, renamable $r0
+    renamable $x5 = VSHUFFLE_vec_shuffle_x killed renamable $x6, killed renamable $x8, renamable $r1
+    renamable $ex6 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y2, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $p3 = PADD_mod_pseudo killed renamable $p3, renamable $m5
+    renamable $ex4 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $cml4, renamable $p3 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_pstm_nrm_imm killed renamable $p3, 64 :: (load (<32 x s16>))
+    renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm killed renamable $p3, 0 :: (load (<32 x s16>))
+    renamable $ex8 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm3, renamable $ex2, renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm2, killed renamable $ex2, renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm1, renamable $ex8, killed renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask,
+    renamable $dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm0, killed renamable $ex8, killed renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.2
+
+  bb.3 (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-mli.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-mli.mir
new file mode 100644
index 000000000000..6e544a614433
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/gemm-bfp16-mli.mir
@@ -0,0 +1,124 @@
+
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# This test exercises experimental modules AIERegDefUseTracker and AIEScheduleInterpreter
+# using the motivating GEMM example with multi-slot pseudo materialization
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched --stop-after=postmisched \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --aie-postpipeliner-filter-no-choice=false \
+# RUN:   --aie-postpipeliner-maxii=7 \
+# RUN:   -o - --debug-only=aie-postregalloc %s 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# derived from GEMM_Bfp16_opt_0
+
+# CHECK: Live Lanes (II=7):
+# CHECK: VReg   | t0      t1      t2      t3      t4      t5      t6      
+# CHECK: -------+--------------------------------------------------------
+# CHECK: 0      |  ..      #       #       #R1     R1      ..      ..     
+# CHECK: 1      |  #       #       #       #R1     R1      ..      ..     
+# CHECK: 2      |  #       ..      #       #       #       #       #      
+# CHECK: 3      |  ..      ..      ..      W1      #W1     #       ..     
+# CHECK: 4      |  ..      ..      ..      #       #       #R1     R1     
+# CHECK: 5      |  ..      ..      #       #       #       #R1     R1     
+# CHECK: 6      |  ..      ..      ..      ..      #       ..      ..     
+# CHECK: 7      |  #       ..      ..      ..      ..      W1      #W1    
+# CHECK: 8      |  ..      ..      ..      ..      ..      ..      #      
+# CHECK: 9      |  #       #       ..      ..      ..      #       #      
+# CHECK: 10     |  #       #       #       #       #       #       #      
+# CHECK: 11     |  ..      #       #       ..      ..      ..      ..     
+# CHECK: 12     |  ..      ..      ..      #       #       ..      ..     
+# CHECK: 13     |  #       #       #       #       #       #       #      
+
+--- |
+  define dso_local void @gemm(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
+  entry:
+    %cmp5 = icmp sgt i32 %n, 0
+    br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:                               ; preds = %entry
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %for.body
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %d.addr.07 = phi ptr addrspace(5) [ %incdec.ptr, %for.body ], [ %d, %for.body.preheader ]
+    %s.addr.06 = phi ptr addrspace(6) [ %incdec.ptr1, %for.body ], [ %s, %for.body.preheader ]
+    %0 = load i32, ptr addrspace(6) %s.addr.06, align 4
+    %add = add nsw i32 %0, 1
+    store i32 %add, ptr addrspace(5) %d.addr.07, align 4
+    %incdec.ptr = getelementptr inbounds i32, ptr addrspace(5) %d.addr.07, i20 1
+    %incdec.ptr1 = getelementptr inbounds i32, ptr addrspace(6) %s.addr.06, i20 1
+    %1 = call i1 @llvm.loop.decrement.i32(i32 1)
+    br i1 %1, label %for.body, label %for.cond.cleanup, !llvm.loop !0
+
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    ret void
+
+  }
+
+  declare void @llvm.set.loop.iterations.i32(i32)
+  declare i1 @llvm.loop.decrement.i32(i32)
+
+  !0 = distinct !{!0, !1, !2}
+  !1 = !{!"llvm.loop.mustprogress"}
+  !2 = !{!"llvm.loop.itercount.range", i64 10}
+
+...
+---
+name:            gemm
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry (align 16):
+    successors: %bb.2
+    liveins: $p0, $p1, $r0
+
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.2
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.2.for.body (align 16):
+    successors: %bb.2, %bb.3
+    liveins: $dc0, $dc1, $dc2, $dc3, $dc4, $dc5, $dj0, $dj1, $dj2, $dj3, $dj4, $dj5, $dj7, $dm0:0x000000001800000C, $dm1:0x000000001800000C, $dm2:0x000000001800000C, $dm3:0x000000001800000C, $dn0, $dn1, $dn2, $dn4, $dn5, $m0, $m1, $m2, $m3, $m4, $m5, $p0, $p1, $p2, $p4, $p6, $p7, $r0, $r1, $r2, $r3, $r4, $r8, $y0:0x0000000000000033, $d0_3d:0x0003C00000200E00, $d1_3d:0x0003C00000200E00, $dn3
+
+    $p5 = MOV_alu_mv_mv_mv_scl $p6
+    $x2, $p6, $dc0, $dc4 = VLD_3D_x_pseudo killed $p6, $d0_3d :: (load (<16 x s32>))
+    renamable $x4 = VLD_x_idx_imm_pseudo renamable $p5, 64 :: (load (<16 x s32>))
+    $p3 = MOV_alu_mv_mv_mv_scl $p7
+    $cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf killed $p7, $d1_3d :: (load (<32 x s16>))
+    renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm renamable $p3, 64 :: (load (<32 x s16>))
+    renamable $p5 = PADD_mod_pseudo killed renamable $p5, renamable $m4
+    renamable $x6, renamable $p5 = VLD_x_pstm_nrm_imm_pseudo killed renamable $p5, 64 :: (load (<16 x s32>))
+    renamable $x8 = VLD_x_idx_imm_pseudo killed renamable $p5, 0 :: (load (<16 x s32>))
+    renamable $x10 = VSHUFFLE_vec_shuffle_x renamable $x2, renamable $x4, renamable $r0
+    renamable $x11 = VSHUFFLE_vec_shuffle_x killed renamable $x2, killed renamable $x4, renamable $r1
+    renamable $ex2 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y5, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $x4 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x8, renamable $r0
+    renamable $x5 = VSHUFFLE_vec_shuffle_x killed renamable $x6, killed renamable $x8, renamable $r1
+    renamable $ex6 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y killed renamable $y2, renamable $y0, renamable $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $p3 = PADD_mod_pseudo killed renamable $p3, renamable $m5
+    renamable $ex4 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $cml4, renamable $p3 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_pstm_nrm_imm killed renamable $p3, 64 :: (load (<32 x s16>))
+    renamable $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm killed renamable $p3, 0 :: (load (<32 x s16>))
+    renamable $ex8 = VCONV_bfp16ebs8_fp32 killed renamable $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    renamable $dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm3, renamable $ex2, renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm2, killed renamable $ex2, renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    renamable $dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm1, renamable $ex8, killed renamable $ex6, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask,
+    renamable $dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX killed renamable $dm0, killed renamable $ex8, killed renamable $ex4, renamable $r3, implicit-def dead $srfpflags, implicit $crfpmask
+
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.2
+
+  bb.3 (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-available-phys-regs.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-available-phys-regs.mir
new file mode 100644
index 000000000000..52e47960d28c
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-available-phys-regs.mir
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+# NOTE: Test for AIERegDefUseTracker - available physical registers tracking
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p \
+# RUN:   --start-before=postmisched --stop-after=postmisched \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o - %s 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# This test verifies that the AIERegDefUseTracker correctly tracks and dumps
+# available physical registers after rewriting them to virtual registers.
+
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK: ==============================================
+# CHECK-DAG: bmhh4
+# CHECK-DAG: bmhl4
+# CHECK-DAG: bmlh4
+# CHECK-DAG: bmll4
+# CHECK-DAG: cmh4
+# CHECK-DAG: cml4
+# CHECK-DAG: dm4
+# CHECK-DAG: l0
+# CHECK-DAG: r0
+# CHECK-DAG: r1
+# CHECK: Total: 10 registers
+
+--- |
+  define void @test_available_regs_dump() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_available_regs_dump
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p1, $p6, $y5, $y0, $r2, $r3
+
+    $lc = ADD_NC_mv_add_ri $r3, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p1, $p6, $y5, $y0, $r2
+
+    ; Simple scalar registers that should become available
+    $r0 = MOV_alu_mv_mv_mv_scl $p6
+    $r1 = ADD_NC_mv_add_rr $r0, $r2
+
+    ; Composite register dm4 with subregs cml4 and cmh4 that should all become available
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-physreg-to-vreg-rewrite-final.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-physreg-to-vreg-rewrite-final.mir
new file mode 100644
index 000000000000..ae5ca5a6d4ea
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-physreg-to-vreg-rewrite-final.mir
@@ -0,0 +1,264 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -mtriple=aie2p -run-pass=postmisched --aie-postpipeliner-vreg-mode %s -o - | FileCheck %s
+
+# This test verifies that the --aie-postpipeliner-vreg-mode option correctly
+# replaces filtered physical registers with virtual registers of the appropriate class.
+
+---
+name:            simple_scalar_def_use
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: simple_scalar_def_use
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $lc = MOV_alu_mv_mv_mv_scl $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $p0, $p6, $r2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $r0 = MOV_alu_mv_mv_mv_scl $p6
+  ; CHECK-NEXT:   $r1 = ADD_NC_mv_add_rr killed $r0, $r2
+  ; CHECK-NEXT:   PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   RET implicit $lr
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   DelayedSchedBarrier
+  bb.0.entry:
+    liveins: $p0
+    successors: %bb.1
+    $lc = MOV_alu_mv_mv_mv_scl $p0
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p6, $r2
+
+    $r0 = MOV_alu_mv_mv_mv_scl $p6
+    $r1 = ADD_NC_mv_add_rr $r0, $r2
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2:
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
+---
+name:            composite_dm_register
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: composite_dm_register
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $lc = MOV_alu_mv_mv_mv_scl $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $p0, $p1, $y5, $y0, $r2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm killed $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+  ; CHECK-NEXT:   VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm killed $cmh4, $p1, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+  ; CHECK-NEXT:   PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   RET implicit $lr
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   DelayedSchedBarrier
+  bb.0.entry:
+    liveins: $p0
+    successors: %bb.1
+    $lc = MOV_alu_mv_mv_mv_scl $p0
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p1, $y5, $y0, $r2
+
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2:
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
+---
+name:            partial_composite_def
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: partial_composite_def
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $p0, $cmh4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $lc = MOV_alu_mv_mv_mv_scl $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $p0, $p7, $cmh4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 0 :: (load (<32 x s16>))
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   $ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+  ; CHECK-NEXT:   PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   RET implicit $lr
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   DelayedSchedBarrier
+  bb.0.entry:
+    liveins: $p0, $cmh4
+    successors: %bb.1
+    $lc = MOV_alu_mv_mv_mv_scl $p0
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p7, $cmh4
+
+    $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 0 :: (load (<32 x s16>))
+
+    $ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2:
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
+---
+name:            tied_operands
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: tied_operands
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $p0, $p3, $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $lc = MOV_alu_mv_mv_mv_scl $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $p0, $p3, $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead $r1, $p3, $dc0 = LDA_2D_dms_lda killed $p3, $d0 :: (load (s32))
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   RET implicit $lr
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   DelayedSchedBarrier
+  bb.0.entry:
+    liveins: $p0, $p3, $d0
+    successors: %bb.1
+    $lc = MOV_alu_mv_mv_mv_scl $p0
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p3, $d0
+
+    dead $r1, $p3, $dc0 = LDA_2D_dms_lda $p3, $d0 :: (load (s32))
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2:
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
+---
+name:            composite_with_liveout
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: composite_with_liveout
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $lc = MOV_alu_mv_mv_mv_scl $p0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $p0, $y5, $y0, $r2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm killed $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+  ; CHECK-NEXT:   PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $p0, $cmh4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   RET implicit $lr
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm killed $cmh4, killed $p0, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+  ; CHECK-NEXT:   NOP
+  ; CHECK-NEXT:   DelayedSchedBarrier
+  bb.0.entry:
+    liveins: $p0
+    successors: %bb.1
+    $lc = MOV_alu_mv_mv_mv_scl $p0
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    liveins: $p0, $y5, $y0, $r2
+
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2:
+    liveins: $p0, $cmh4
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p0, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-vreg-metrics.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-vreg-metrics.mir
new file mode 100644
index 000000000000..27d46dcf1b63
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test-vreg-metrics.mir
@@ -0,0 +1,143 @@
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# Test virtual register metrics dumping in AIEPostRegAlloc
+# This test verifies that the metrics are properly computed and displayed
+# REQUIRES: asserts
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --aie-postpipeliner-filter-no-choice=false \
+# RUN:   --aie-wawreg-rewrite=0 \
+# RUN:   --aie-preassign-multi-slot-instr=1 \
+# RUN:   --aie-materialize-pipeline=0 \
+# RUN:   --aie-postpipeliner-maxii=7 \
+# RUN:   --debug-only=aie-postregalloc \
+# RUN:   -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=METRICS
+
+# Based on the actual output from gemm-bfp16-ii7.mir, we expect:
+# - 14 virtual registers total
+# - Various register classes (ex, x, y, dm)
+# - Interference degrees ranging from 0 to 9
+# - Different lane widths and durations
+
+# NOTE: Status quo - II=7 (was II=6)
+# METRICS: AIEPostRegAlloc::allocate for 14 vregs, II=7
+# METRICS: === Virtual Register Metrics Dump ===
+# METRICS: Total Virtual Registers: 14
+# METRICS-EMPTY:
+# METRICS: VReg      RegClass                 Avail  Pure  Alias  TotalLanes  MaxWidth  Duration
+# METRICS-NEXT: --------  -----------------------  -----  ----  -----  ----------  --------  --------
+# METRICS-NEXT: %vreg0     VEC512                       7     3    600           6         2         4
+# METRICS-NEXT: %vreg1     VEC512                       7     3    800           8         2         5
+# METRICS-NEXT: %vreg2     eDM                          1     3      0          22         4         6 FAIL
+# METRICS-NEXT: %vreg3     eY                           2     1    700           6         4         3
+# METRICS-NEXT: %vreg4     VEC512                       7     3    700           6         2         4
+# METRICS-NEXT: %vreg5     VEC512                       7     3    800           8         2         5
+# METRICS-NEXT: %vreg6     eDM                          1     1      0           4         4         1 FAIL
+# METRICS-NEXT: %vreg7     eY                           2     1    500           6         4         3
+# METRICS-NEXT: %vreg8     eDM                          1     2      0           4         4         1 FAIL
+# METRICS-NEXT: %vreg9     eDM                          1     2      0          14         4         4 FAIL
+# METRICS-NEXT: %vreg10    VEC576                       4     3    800          28         4         7
+# METRICS-NEXT: %vreg11    VEC576                       4     2    300           8         4         2
+# METRICS-NEXT: %vreg12    VEC576                       4     2    600           8         4         2
+# METRICS-NEXT: %vreg13    VEC576                       4     3    800          28         4         7
+# METRICS-EMPTY:
+# METRICS: === Summary Statistics ===
+# METRICS: Total Lanes (sum):              156
+# METRICS: Max Width (max):                4
+# METRICS: Max Duration:                   7
+# METRICS: Max Pure Interference Degree:   3
+# METRICS: Max Aliasing Interference Deg:  800
+# METRICS: Avg Pure Interference Degree:   2.29
+# METRICS: Avg Aliasing Interference Deg:  471.43
+
+--- |
+  define dso_local void @gemm(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
+  entry:
+    %cmp5 = icmp sgt i32 %n, 0
+    br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+  for.body.preheader:
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %for.body
+
+  for.body:
+    %d.addr.07 = phi ptr addrspace(5) [ %incdec.ptr, %for.body ], [ %d, %for.body.preheader ]
+    %s.addr.06 = phi ptr addrspace(6) [ %incdec.ptr1, %for.body ], [ %s, %for.body.preheader ]
+    %0 = load i32, ptr addrspace(6) %s.addr.06, align 4
+    %add = add nsw i32 %0, 1
+    store i32 %add, ptr addrspace(5) %d.addr.07, align 4
+    %incdec.ptr = getelementptr inbounds i32, ptr addrspace(5) %d.addr.07, i20 1
+    %incdec.ptr1 = getelementptr inbounds i32, ptr addrspace(6) %s.addr.06, i20 1
+    %1 = call i1 @llvm.loop.decrement.i32(i32 1)
+    br i1 %1, label %for.body, label %for.cond.cleanup, !llvm.loop !0
+
+  for.cond.cleanup:
+    ret void
+  }
+
+  declare void @llvm.set.loop.iterations.i32(i32)
+  declare i1 @llvm.loop.decrement.i32(i32)
+
+  !0 = distinct !{!0, !1, !2}
+  !1 = !{!"llvm.loop.mustprogress"}
+  !2 = !{!"llvm.loop.itercount.range", i64 10}
+
+...
+---
+name:            gemm
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry (align 16):
+    successors: %bb.2
+    liveins: $p0, $p1, $r0
+
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.2
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.2.for.body (align 16):
+    successors: %bb.2, %bb.3
+    liveins: $dc0, $dc1, $dc2, $dc3, $dc4, $dc5, $dj0, $dj1, $dj2, $dj3, $dj4, $dj5, $dj7, $dm0:0x000000001800000C, $dm1:0x000000001800000C, $dm2:0x000000001800000C, $dm3:0x000000001800000C, $dn0, $dn1, $dn2, $dn4, $dn5, $m0, $m1, $m2, $m3, $m4, $m5, $p0, $p1, $p2, $p4, $p6, $p7, $r0, $r1, $r2, $r3, $r4, $r8, $y0:0x0000000000000033, $d0_3d:0x0003C00000200E00, $d1_3d:0x0003C00000200E00, $dn3
+
+    $p5 = MOV_alu_mv_mv_mv_scl $p6
+    $x2, $p6, $dc0, $dc4 = VLD_3D_x_pseudo $p6, $d0_3d :: (load (<16 x s32>))
+    $x4 = VLD_x_idx_imm_pseudo $p5, 64 :: (load (<16 x s32>))
+    $p3 = MOV_alu_mv_mv_mv_scl $p7
+    $cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf $p7, $d1_3d :: (load (<32 x s16>))
+    $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p3, 64 :: (load (<32 x s16>))
+    $p5 = PADD_mod_pseudo $p5, $m4
+    $x6, $p5 = VLD_x_pstm_nrm_imm_pseudo $p5, 64 :: (load (<16 x s32>))
+    $x8 = VLD_x_idx_imm_pseudo $p5, 0 :: (load (<16 x s32>))
+    $x10 = VSHUFFLE_vec_shuffle_x $x2, $x4, $r0
+    $x11 = VSHUFFLE_vec_shuffle_x $x2, $x4, $r1
+    $ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    $x4 = VSHUFFLE_vec_shuffle_x $x6, $x8, $r0
+    $x5 = VSHUFFLE_vec_shuffle_x $x6, $x8, $r1
+    $ex6 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y2, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    $p3 = PADD_mod_pseudo $p3, $m5
+    $ex4 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    $cml4, $p3 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_pstm_nrm_imm $p3, 64 :: (load (<32 x s16>))
+    $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p3, 0 :: (load (<32 x s16>))
+    $ex8 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    $dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm3, $ex2, $ex6, $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    $dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm2, $ex2, $ex4, $r3, implicit-def dead $srfpflags, implicit $crfpmask
+    $dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm1, $ex8, $ex6, $r3, implicit-def dead $srfpflags, implicit $crfpmask,
+    $dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm0, $ex8, $ex4, $r3, implicit-def dead $srfpflags, implicit $crfpmask
+
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.2
+
+  bb.3 (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test1-simple-def-use.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test1-simple-def-use.mir
new file mode 100644
index 000000000000..24d930be777c
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test1-simple-def-use.mir
@@ -0,0 +1,62 @@
+# NOTE: Test for AIERegDefUseTracker - simple def-use chains
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 1: def-use def-use on the same simple register leads to two live ranges
+# CHECK-DAG: Live Range {{.*}} for r0:
+# CHECK-DAG: Live Range {{.*}} for r0:
+
+--- |
+  define void @test_simple_def_use() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_simple_def_use
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p6, $r0, $r2, $r4
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p6, $r2, $r4
+    
+    ; First def-use chain
+    $r0 = MOV_alu_mv_mv_mv_scl $p6
+    $r1 = ADD_NC_mv_add_rr $r0, $r2
+    
+    ; Second def-use chain on same register
+    $r0 = MOV_alu_mv_mv_mv_scl $p6
+    $r3 = ADD_NC_mv_add_rr $r0, $r4
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test10-reserved-subreg-loads-vmul-liveout.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test10-reserved-subreg-loads-vmul-liveout.mir
new file mode 100644
index 000000000000..f61705e81282
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test10-reserved-subreg-loads-vmul-liveout.mir
@@ -0,0 +1,117 @@
+# NOTE: Test for AIERegDefUseTracker - reserved ranges with subreg loads and VMUL composite use
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 10: Verify reserved range handling with subreg loads feeding VMUL with live-out
+# 
+# This test exercises reserved ranges with subreg defs and composite Y register use:
+# 1. Subreg loads (x10, x11) that feed into a composite Y register use (y5) in VMUL
+#    where the result feeds into a live-out should create a RESERVED range
+# 2. An additional disjoint live range on the same composite register should
+#    NOT make that register available for reallocation
+#
+# Program order (backward analysis processes in reverse):
+# - y5: def x10, def x11 → use y5 in VMUL (disjoint, early) - analyzed LAST → normal
+# - y5: def x10, def x11 → use y5 in VMUL (late, result feeds live-out) - analyzed FIRST → RESERVED
+#
+# Expected behavior:
+# - y5's late range (feeding live-out) should be marked RESERVED
+# - y5's early disjoint range should be normal (not reserved)
+# - y5 (and its subregs) should NOT appear in available physical registers
+
+# CHECK-DAG: Live Range #{{[0-9]+}} for dm4:
+# CHECK-DAG: Live Range #{{[0-9]+}} for y5 [RESERVED]:
+# CHECK-DAG: Live Range #{{[0-9]+}} for y5:
+
+# Verify available registers explicitly - dm4 and subregs should be available, but not y5/x10/x11
+# CHECK: FINAL LIVE RANGES
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK-NEXT: ==============================================
+# CHECK-DAG:   bmhh4
+# CHECK-DAG:   bmhl4
+# CHECK-DAG:   bmlh4
+# CHECK-DAG:   bmll4
+# CHECK-DAG:   cmh4
+# CHECK-DAG:   cml4
+# CHECK-DAG:   dm4
+# CHECK: Total: 7 registers
+
+# Verify scarce range set - both dm4 ranges should be identified as scarce
+# CHECK: Most promising scarce range set: 2 ranges
+# CHECK-NEXT: Register class: eDM
+# CHECK-NEXT:   [0] BaseReg=dm4 Defs=1 Uses=2
+# CHECK-NEXT:   [1] BaseReg=dm4 Defs=1 Uses=2
+
+--- |
+  define void @test_reserved_subreg_loads_vmul_liveout() {
+  entry:
+    br label %loop
+  loop:
+    br i1 undef, label %loop, label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_reserved_subreg_loads_vmul_liveout
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p1, $p7, $y0, $r2, $r3
+    
+    $lc = ADD_NC_mv_add_ri $r3, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p1, $p7, $y0, $r2
+    
+    ; EARLY in program order (analyzed LAST in backward pass):
+    ; Subreg loads (x10, x11) → composite use (y5) in VMUL, disjoint from later range
+    ; This is a normal range
+    $x10, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 0 :: (load (<16 x s32>))
+    $x11, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 64 :: (load (<16 x s32>))
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    ; LATE in program order (analyzed FIRST in backward pass):
+    ; Subreg loads (x10, x11) → composite use (y5) in VMUL where result feeds live-out
+    ; This should be marked as RESERVED because dm4 result is live-out to bb.2
+    $x10, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 128 :: (load (<16 x s32>))
+    $x11, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 192 :: (load (<16 x s32>))
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 128, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 192, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    liveins: $y5, $y0, $r2, $p0
+    
+    ; Use y5 in another VMUL - this makes y5 live-out from bb.1
+    ; Backward analysis starts here, sees y5 is live-in to bb.2
+    ; Then traces back and finds the LATE x10/x11 → y5 chain (closest to block end)
+    ; feeds into this, so that y5 range becomes RESERVED
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 256, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test11-reserved-subreg-loads-vshuffle-vmul-liveout.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test11-reserved-subreg-loads-vshuffle-vmul-liveout.mir
new file mode 100644
index 000000000000..7b670f5f5f97
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test11-reserved-subreg-loads-vshuffle-vmul-liveout.mir
@@ -0,0 +1,130 @@
+# NOTE: Test for AIERegDefUseTracker - reserved ranges with subreg loads, VSHUFFLE, and VMUL composite use
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 11: Verify reserved range handling with subreg loads, VSHUFFLE, and VMUL with live-out
+# 
+# This test exercises reserved ranges with subreg defs via VSHUFFLE and composite Y register use:
+# 1. Subreg loads (x8, x9) that are shuffled to create (x10, x11) which feed into a composite
+#    Y register use (y5) in VMUL where the result feeds into a live-out should create a RESERVED range
+# 2. An additional disjoint live range on the same composite register should
+#    NOT make that register available for reallocation
+# 3. Since x8/x9 (forming y4) are only used as inputs to VSHUFFLE, y4 should become available
+#
+# Program order (backward analysis processes in reverse):
+# - y5: def x8, def x9 → VSHUFFLE → def x10, def x11 → use y5 in VMUL (disjoint, early) - analyzed LAST → normal
+# - y5: def x10, def x11 → use y5 in VMUL (late, result feeds live-out) - analyzed FIRST → RESERVED
+#
+# Expected behavior:
+# - y5's late range (feeding live-out) should be marked RESERVED
+# - y5's early disjoint range should be normal (not reserved)
+# - y5 (and its subregs x10/x11) should NOT appear in available physical registers
+# - y4 (and its subregs x8/x9) SHOULD appear in available physical registers
+
+# CHECK-DAG: Live Range #{{[0-9]+}} for dm4:
+# CHECK-DAG: Live Range #{{[0-9]+}} for x8:
+# CHECK-DAG: Live Range #{{[0-9]+}} for x9:
+# CHECK-DAG: Live Range #{{[0-9]+}} for y5 [RESERVED]:
+# CHECK-DAG: Live Range #{{[0-9]+}} for y5:
+
+# Verify available registers explicitly - dm4, y4, and their subregs should be available, but not y5/x10/x11
+# CHECK: FINAL LIVE RANGES
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK-DAG:   bmhh4
+# CHECK-DAG:   bmhl4
+# CHECK-DAG:   bmlh4
+# CHECK-DAG:   bmll4
+# CHECK-DAG:   cmh4
+# CHECK-DAG:   cml4
+# CHECK-DAG:   dm4
+# CHECK-DAG:   wh8
+# CHECK-DAG:   wh9
+# CHECK-DAG:   wl8
+# CHECK-DAG:   wl9
+# CHECK-DAG:   x8
+# CHECK-DAG:   x9
+# CHECK-DAG:   y4
+# CHECK: Total: 14 registers
+
+# Verify scarce range set - both dm4 ranges should be identified as scarce
+# CHECK: Most promising scarce range set: 2 ranges
+# CHECK-NEXT: Register class: eDM
+# CHECK-NEXT:   [0] BaseReg=dm4 Defs=1 Uses=2
+# CHECK-NEXT:   [1] BaseReg=dm4 Defs=1 Uses=2
+
+--- |
+  define void @test_reserved_subreg_loads_vshuffle_vmul_liveout() {
+  entry:
+    br label %loop
+  loop:
+    br i1 undef, label %loop, label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_reserved_subreg_loads_vshuffle_vmul_liveout
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p1, $p7, $y0, $r2, $r3
+    
+    $lc = ADD_NC_mv_add_ri $r3, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p1, $p7, $y0, $r2, $r3
+    
+    ; EARLY in program order (analyzed LAST in backward pass):
+    ; Subreg loads (x8, x9) → VSHUFFLE → (x10, x11) → composite use (y5) in VMUL, disjoint from later range
+    ; This is a normal range
+    ; Since x8/x9 are only used as VSHUFFLE inputs, y4 should become available
+    $x8, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 0 :: (load (<16 x s32>))
+    $x9, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 64 :: (load (<16 x s32>))
+    $x10 = VSHUFFLE_vec_shuffle_x $x8, $x9, $r2
+    $x11 = VSHUFFLE_vec_shuffle_x $x8, $x9, $r3
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    ; LATE in program order (analyzed FIRST in backward pass):
+    ; Subreg loads (x10, x11) → composite use (y5) in VMUL where result feeds live-out
+    ; This should be marked as RESERVED because dm4 result is live-out to bb.2
+    $x10, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 128 :: (load (<16 x s32>))
+    $x11, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 192 :: (load (<16 x s32>))
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 128, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 192, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    liveins: $y5, $y0, $r2, $p0
+    
+    ; Use y5 in another VMUL - this makes y5 live-out from bb.1
+    ; Backward analysis starts here, sees y5 is live-in to bb.2
+    ; Then traces back and finds the LATE x10/x11 → y5 chain (closest to block end)
+    ; feeds into this, so that y5 range becomes RESERVED
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 256, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test12-reserved-subreg-scarce-superreg.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test12-reserved-subreg-scarce-superreg.mir
new file mode 100644
index 000000000000..8ac5aa3d25b4
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test12-reserved-subreg-scarce-superreg.mir
@@ -0,0 +1,130 @@
+# NOTE: Test for AIERegDefUseTracker - reserved subreg with scarce superreg from separate subreg liveranges
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 12: Verify scarce register creation from separate subreg liveranges
+#
+# This test exercises a complex scenario:
+# 1. We have a RESERVED live range on x0 (subreg of y0 = [x0, x1])
+# 2. We have two normal live ranges on y0 (superregister containing x0)
+# 3. We have two separate normal liveranges: one on x2, one on x3 (subregs of y1 = [x2, x3])
+# 4. The result should be y1 identified as a scarce register with two live ranges (from x2 and x3)
+#
+# Register structure:
+# - y0 = [x0, x1] where x0 = [wl0, wh0], x1 = [wl1, wh1]
+# - y1 = [x2, x3] where x2 = [wl2, wh2], x3 = [wl3, wh3]
+#
+# Program order (backward analysis processes in reverse):
+# - y0: def x0, def x1 → use y0 in VMUL (early, normal, disjoint)
+# - x2: def → use (normal, makes x2 available)
+# - x3: def → use (normal, makes x3 available)
+# - y0: def x0, def x1 → use y0 in VMUL (middle, normal, disjoint)
+# - x0: def → use (LATE, RESERVED, feeds live-out, NOT killed)
+#
+# Expected behavior:
+# - x0's late range (feeding live-out) should be marked RESERVED
+# - y0's ranges should be normal (not reserved)
+# - x0 should NOT appear in available physical registers (due to reserved range)
+# - x2 and x3 SHOULD appear in available registers
+# - y1 (composed of x2, x3) should be identified as a scarce register with 2 ranges
+
+# Verify final analysis results
+# CHECK: FINAL LIVE RANGES
+# CHECK: Total live ranges: 7
+# CHECK-DAG: Live Range #{{[0-9]+}} for x0 [RESERVED]:
+# CHECK-DAG: Live Range #{{[0-9]+}} for x2:
+# CHECK-DAG: Live Range #{{[0-9]+}} for x3:
+# CHECK-DAG: Live Range #{{[0-9]+}} for y0:
+
+# Verify available registers - x2, x3, y1 should be available, but not x0/y0
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK-NEXT: ==============================================
+# CHECK-DAG:   wh2
+# CHECK-DAG:   wh3
+# CHECK-DAG:   wl2
+# CHECK-DAG:   wl3
+# CHECK-DAG:   x2
+# CHECK-DAG:   x3
+# CHECK-DAG:   y1
+# CHECK-NOT:   x0
+# CHECK-NOT:   wl0
+# CHECK-NOT:   wh0
+# CHECK-NOT:   x1
+# CHECK-NOT:   y0
+
+# Verify scarce range set - y0 should be identified with 2 ranges
+# CHECK: Most promising scarce range set: 2 ranges
+# CHECK-NEXT: Register class: eY
+# CHECK-NEXT:   [0] BaseReg=y0 Defs=2 Uses=1
+# CHECK-NEXT:   [1] BaseReg=y0 Defs=2 Uses=1
+
+--- |
+  define void @test_reserved_subreg_scarce_superreg() {
+  entry:
+    br label %loop
+  loop:
+    br i1 undef, label %loop, label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_reserved_subreg_scarce_superreg
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p7, $y4, $r2, $r3
+    
+    $lc = ADD_NC_mv_add_ri $r3, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p7, $y4, $r2, $r3
+    
+    ; EARLY in program order (analyzed LAST in backward pass):
+    ; First y0 range: normal, disjoint from reserved x0
+    $x0, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 0 :: (load (<16 x s32>))
+    $x1, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 64 :: (load (<16 x s32>))
+    $dm3 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y0, $y4, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    
+    ; x2 range: normal, makes x2 available for reallocation
+    $x2, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 128 :: (load (<16 x s32>))
+    
+    ; x3 range: normal, makes x3 available for reallocation
+    $x3, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 192 :: (load (<16 x s32>))
+    
+    ; Second y0 range: normal, disjoint from reserved x0 range
+    $x0, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 256 :: (load (<16 x s32>))
+    $x1, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 320 :: (load (<16 x s32>))
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y0, $y4, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    
+    ; LATE in program order (analyzed FIRST in backward pass):
+    ; RESERVED range: x0 load that feeds into live-out
+    $x0, $p7 = VLD_x_pstm_nrm_imm_pseudo $p7, 384 :: (load (<16 x s32>))
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    ; We make x0 live to force a reserved range
+    liveins: $x0
+    
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test13-read-modify-write.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test13-read-modify-write.mir
new file mode 100644
index 000000000000..00c40a65ce05
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test13-read-modify-write.mir
@@ -0,0 +1,87 @@
+# NOTE: Test for AIERegDefUseTracker - read-modify-write pattern
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 13: Read-modify-write pattern
+#
+# This tests the backward scan handling of an instruction that both reads and writes
+# the same register:
+#   x0 = VLDA... p0          ; def x0 (first def)
+#   x0 = VSHUFFLE x0, x0, r0 ; use x0, use x0, def x0 (read-modify-write)
+#   VST x0                   ; use x0 (final use)
+#
+# In the backward scan with correct def-before-use processing:
+# 1. VST makes x0 live (uses x0) - creates Live Range #0
+# 2. VSHUFFLE def KILLS x0's liveness (terminates Range #0)
+# 3. VSHUFFLE uses create a NEW live range #1 for x0
+# 4. VLDA def terminates Live Range #1
+#
+# The expected result is TWO separate live ranges for x0:
+# - Live Range 1: VLDA def -> VSHUFFLE uses (1 def, 2 uses)
+# - Live Range 2: VSHUFFLE def -> VST use (1 def, 1 use)
+#
+# CHECK: FINAL LIVE RANGES
+# CHECK: Live Range #{{[0-9]+}} for x0:
+# CHECK:   Definitions (1):
+# CHECK:   Uses (2):
+# CHECK: Live Range #{{[0-9]+}} for x0:
+# CHECK:   Definitions (1):
+# CHECK:   Uses (1):
+
+--- |
+  define void @test_read_modify_write() {
+  entry:
+    br label %loop
+  loop:
+    br i1 undef, label %loop, label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_read_modify_write
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $r0, $r1, $r2
+    
+    $lc = ADD_NC_mv_add_ri $r1, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $r0
+    
+    ; Def x0 (first def)
+    $x0, $p0 = VLD_x_pstm_nrm_imm_pseudo $p0, 0 :: (load (<16 x s32>))
+    
+    ; Read-modify-write: use x0 twice, def x0
+    ; This instruction reads x0 and writes x0
+    $x0 = VSHUFFLE_vec_shuffle_x $x0, $x0, $r0
+    
+    ; Final use of x0 - store consumes the result
+    VST_dmx_sts_x_idx_imm $x0, $p0, 64 :: (store (<16 x s32>))
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2-subreg-defs-composite-use.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2-subreg-defs-composite-use.mir
new file mode 100644
index 000000000000..997434cc8efb
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2-subreg-defs-composite-use.mir
@@ -0,0 +1,66 @@
+# NOTE: Test for AIERegDefUseTracker - sub-register defs with composite use
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 2: def(cml4) def(cmh4) use(dm4) leads to one live range with two defs and one use
+# CHECK: Live Range {{.*}} for dm4:
+# CHECK:   Definitions (2):
+# CHECK-DAG:     Register: cml4 (SubRegIdx: 10)
+# CHECK-DAG:     Register: cmh4 (SubRegIdx: 9)
+# CHECK:   Uses (1):
+# CHECK:     Register: dm4
+
+--- |
+  define void @test_subreg_defs_composite_use() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_subreg_defs_composite_use
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p7, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p7
+    
+    ; Define low half of dm4
+    $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 0 :: (load (<32 x s16>))
+    
+    ; Define high half of dm4
+    $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 64 :: (load (<32 x s16>))
+    
+    ; Use composite register dm4
+    $ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2b-missing-subreg-def.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2b-missing-subreg-def.mir
new file mode 100644
index 000000000000..8eafee971e91
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2b-missing-subreg-def.mir
@@ -0,0 +1,60 @@
+# NOTE: Test for AIERegDefUseTracker - missing sub-register def with composite use
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 2b: def(cml4) use(dm4) with cmh4 as livein - should filter out dm4 live range
+# Since cmh4 is not defined in the block but is live-in, the dm4 live range
+# should be filtered out as it's not fully defined
+# CHECK-NOT: Live Range {{.*}} for dm4:
+
+--- |
+  define void @test_missing_subreg_def() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_missing_subreg_def
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p7, $r0, $cmh4
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p7, $cmh4
+    
+    ; Define only low half of dm4 (cmh4 is live-in)
+    $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 0 :: (load (<32 x s16>))
+    
+    ; Use composite register dm4 (but cmh4 was not defined in this block)
+    $ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2c-aliasing-with-unmanaged.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2c-aliasing-with-unmanaged.mir
new file mode 100644
index 000000000000..4332bc1e96c9
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test2c-aliasing-with-unmanaged.mir
@@ -0,0 +1,72 @@
+# NOTE: Test for AIERegDefUseTracker - aliasing with unmanaged live range
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 2c: def(cml4) use(dm4) with cmh4 as livein (unmanaged), followed by def/use of cml4
+# Both dm4 and the subsequent cml4 live ranges should be filtered out
+# since they alias with the unmanaged cmh4 live range
+# cmh4 should not appear as a live range since it's only live-in (not defined in block)
+# With implicit operands filtered, we should only have ex2 live range
+# CHECK: FINAL LIVE RANGES
+# CHECK: Total live ranges: 1
+# CHECK: Live Range #{{[0-9]+}} for ex2:
+# CHECK-NEXT:   Definitions (1):
+# CHECK:         Register: ex2
+# CHECK-NEXT:   Uses (0):
+# CHECK-EMPTY:
+
+--- |
+  define void @test_aliasing_with_unmanaged() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_aliasing_with_unmanaged
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p7, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p7, $cmh4
+    
+    ; Define only low half of dm4 (cmh4 is explicitly live-in, unmanaged)
+    $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 0 :: (load (<32 x s16>))
+    
+    ; Use composite register dm4 (but cmh4 was not defined in this block)
+    $ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    
+    ; Now define and use cml4 again (this should also be filtered since it aliases with unmanaged cmh4)
+    $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 32 :: (load (<32 x s16>))
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3-composite-def-subreg-uses.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3-composite-def-subreg-uses.mir
new file mode 100644
index 000000000000..3bc2015784fc
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3-composite-def-subreg-uses.mir
@@ -0,0 +1,67 @@
+# NOTE: Test for AIERegDefUseTracker - composite def with sub-register uses
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 3: def(dm4) use(cml4) use(cmh4) leads to one live range with one def and two uses
+# The uses should have sub-register indices recorded
+# CHECK: Live Range {{.*}} for dm4:
+# CHECK:   Definitions (1):
+# CHECK:     Register: dm4
+# CHECK:   Uses (2):
+# CHECK-DAG:     Register: cml4 (SubRegIdx: 10)
+# CHECK-DAG:     Register: cmh4 (SubRegIdx: 9)
+
+--- |
+  define void @test_composite_def_subreg_uses() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_composite_def_subreg_uses
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $y5, $y0, $r2, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $y5, $y0, $r2
+    
+    ; Define composite register dm4
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    
+    ; Use low half cml4
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    ; Use high half cmh4
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p0, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3b-subreg-use-in-successor.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3b-subreg-use-in-successor.mir
new file mode 100644
index 000000000000..dda105e24f94
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3b-subreg-use-in-successor.mir
@@ -0,0 +1,75 @@
+# NOTE: Test for AIERegDefUseTracker - composite def with subreg use in successor
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 3b: def(dm4) use(cml4) with cmh4 used in successor block (live-out)
+# Since cmh4 (a subreg of dm4) is live-out to the successor block, the dm4 live range
+# is marked as RESERVED.
+# CHECK: FINAL LIVE RANGES
+# CHECK: Total live ranges: 1
+# CHECK: Live Range #{{[0-9]+}} for dm4 [RESERVED]:
+# CHECK-NEXT:   Definitions (1):
+# CHECK-NEXT:     [0] Register: dm4
+# CHECK:   Uses (1):
+# CHECK-NEXT:     [0] Register: cml4 (SubRegIdx: 10)
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK-NEXT: ==============================================
+# CHECK-NEXT: Total: 0 registers
+
+--- |
+  define void @test_subreg_use_in_successor() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_subreg_use_in_successor
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $y5, $y0, $r2, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $y5, $y0, $r2
+    
+    ; Define composite register dm4
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    
+    ; Use low half cml4 in this block
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    ; cmh4 is live-out to successor block
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    liveins: $p0, $cmh4
+    
+    ; Use high half cmh4 in successor block
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p0, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3c-aliasing-with-liveout.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3c-aliasing-with-liveout.mir
new file mode 100644
index 000000000000..ff9553b93079
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test3c-aliasing-with-liveout.mir
@@ -0,0 +1,76 @@
+# NOTE: Test for AIERegDefUseTracker - aliasing with live-out range
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 3c: def(dm4) use(cml4) with cmh4 live-out to successor, followed by def/use of cmh4
+# The cmh4 def/use creates a RESERVED range (feeds live-out).
+# The dm4->cml4 range is independent (cmh4 is fully redefined after) so it's a normal range.
+# NOTE: Current status quo - no registers available for reallocation due to aliasing constraints.
+# CHECK: FINAL LIVE RANGES
+# CHECK: Total live ranges: 2
+# CHECK-DAG: Live Range #{{[0-9]+}} for cmh4 [RESERVED]:
+# CHECK-DAG: Live Range #{{[0-9]+}} for dm4:
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK: Total: 0 registers
+
+--- |
+  define void @test_aliasing_with_liveout() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_aliasing_with_liveout
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p7, $y5, $y0, $r2, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p7, $y5, $y0, $r2
+    
+    ; Define composite register dm4
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    
+    ; Use low half cml4 in this block
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    ; cmh4 is live-out to successor block, but also define and use it here
+    ; This should also be filtered since cmh4 is part of the live-out set
+    $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 64 :: (load (<32 x s16>))
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p0, 32, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    liveins: $p0, $cmh4
+    
+    ; Use high half cmh4 in successor block
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p0, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test4-def-only-garbage-bin.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test4-def-only-garbage-bin.mir
new file mode 100644
index 000000000000..9f6529dd3e19
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test4-def-only-garbage-bin.mir
@@ -0,0 +1,60 @@
+# NOTE: Test for AIERegDefUseTracker - def-only live range (garbage bin register)
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test: Def-only live range should be kept (garbage bin register)
+# r0 has def only - valid live range (garbage bin)
+# CHECK: FINAL LIVE RANGES
+# CHECK: Total live ranges: 1
+# CHECK: Live Range #0 for r0:
+# CHECK:   Definitions (1):
+# CHECK:     Register: r0
+# CHECK:   Uses (0):
+
+--- |
+  define void @test_def_only_garbage_bin() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_def_only_garbage_bin
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $r4
+    
+    $lc = ADD_NC_mv_add_ri $r4, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    
+    ; r0: def-only (garbage bin register) - from immediate
+    dead $r0 = MOVA 100
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test5-two-subreg-def-chains.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test5-two-subreg-def-chains.mir
new file mode 100644
index 000000000000..45bd3285a332
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test5-two-subreg-def-chains.mir
@@ -0,0 +1,72 @@
+# NOTE: Test for AIERegDefUseTracker - two separate sub-register def chains
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 5: Two separate chains with subreg defs and composite use (like test1 but with test2 pattern)
+# First chain: def(cml4) def(cmh4) use(dm4)
+# Second chain: def(cml4) def(cmh4) use(dm4)
+# Should create two separate live ranges for dm4
+# CHECK-DAG: Live Range {{.*}} for dm4:
+# CHECK-DAG: Live Range {{.*}} for dm4:
+
+--- |
+  define void @test_two_subreg_def_chains() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_two_subreg_def_chains
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p7, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p7
+    
+    ; First chain: subreg defs -> composite use
+    ; Define low half of dm4
+    $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 0 :: (load (<32 x s16>))
+    ; Define high half of dm4
+    $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 64 :: (load (<32 x s16>))
+    ; Use composite register dm4
+    $ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    
+    ; Second chain: subreg defs -> composite use (separate from first)
+    ; Define low half of dm4 again
+    $cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 128 :: (load (<32 x s16>))
+    ; Define high half of dm4 again
+    $cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 192 :: (load (<32 x s16>))
+    ; Use composite register dm4 again
+    $ex4 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test6-two-composite-def-chains.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test6-two-composite-def-chains.mir
new file mode 100644
index 000000000000..4ced5b1f3f11
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test6-two-composite-def-chains.mir
@@ -0,0 +1,72 @@
+# NOTE: Test for AIERegDefUseTracker - two separate composite def chains
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 6: Two separate chains with composite def and subreg uses (like test1 but with test3 pattern)
+# First chain: def(dm4) use(cml4) use(cmh4)
+# Second chain: def(dm4) use(cml4) use(cmh4)
+# Should create two separate live ranges for dm4
+# CHECK-DAG: Live Range {{.*}} for dm4:
+# CHECK-DAG: Live Range {{.*}} for dm4:
+
+--- |
+  define void @test_two_composite_def_chains() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_two_composite_def_chains
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $y5, $y0, $r2, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $y5, $y0, $r2
+    
+    ; First chain: composite def -> subreg uses
+    ; Define composite register dm4
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    ; Use low half cml4
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    ; Use high half cmh4
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p0, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    ; Second chain: composite def -> subreg uses (separate from first)
+    ; Define composite register dm4 again
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    ; Use low half cml4 again
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 128, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    ; Use high half cmh4 again
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p0, 192, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test7-tied-operands.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test7-tied-operands.mir
new file mode 100644
index 000000000000..6a998e344d11
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test7-tied-operands.mir
@@ -0,0 +1,82 @@
+# NOTE: Test for AIERegDefUseTracker - tied register pairs
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025-2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false --aie-postpipeliner-filter-no-choice=false -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 7: Instructions with tied operands should have their live ranges filtered out
+# Tied operands cannot be independently allocated, so they should be rejected
+# The LDA_2D_dms_lda instruction has tied operands where the output count
+# is tied to the input mod.sub_dim_count (dc0 tied to d0.sub_dim_count)
+# and p3 is tied as well (tied-def 1)
+# p3's live range is filtered because it's used in a tied operand
+# Only r1 should remain as it's not tied
+# CHECK: FINAL LIVE RANGES
+# CHECK-NEXT: ================================
+# CHECK-NEXT: Total live ranges: 1
+# CHECK-EMPTY:
+# CHECK: Live Range #{{[0-9]+}} for r1:
+# CHECK-NEXT:   Definitions (1):
+# CHECK-NEXT:     [0] Register: r1 dead $r1, $p3, $dc0 = LDA_2D_dms_lda
+# CHECK-NEXT:   Uses (0):
+
+--- |
+  define void @test_tied_operands() {
+  entry:
+    br label %loop
+  loop:
+    br label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_tied_operands
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $r0
+    
+    $lc = ADD_NC_mv_add_ri $r0, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0
+    
+    ; Define p3 (not live-in) to use in the tied instruction
+    $p3 = MOV_alu_mv_mv_mv_scl $p0
+    
+    ; Define d0 through its four subregs
+    ; d0 consists of: m0 (sub_mod), dn0 (sub_dim_size), dj0 (sub_dim_stride), dc0 (sub_dim_count)
+    $m0 = MOV_scalar_imm11_pseudo 0
+    $dn0 = MOV_scalar_imm11_pseudo 16
+    $dj0 = MOV_scalar_imm11_pseudo 1
+    $dc0 = MOV_scalar_imm11_pseudo 256
+    
+    ; LDA_2D_dms_lda has tied operands: $count_out=$mod.sub_dim_count
+    ; The output $dc0 (count_out) is tied to the input $d0.sub_dim_count
+    ; p3 is also tied (tied-def 1)
+    ; This creates a tied register constraint that should be filtered
+    ; Both the d0 live range and the tied operands (p3, dc0) should be filtered
+    dead $r1, $p3, $dc0 = LDA_2D_dms_lda $p3, $d0 :: (load (s32))
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test8-reserved-liveout-ranges.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test8-reserved-liveout-ranges.mir
new file mode 100644
index 000000000000..9a5c60020659
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test8-reserved-liveout-ranges.mir
@@ -0,0 +1,108 @@
+# NOTE: Test for AIERegDefUseTracker - reserved ranges for live-out defs
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 8: Verify reserved range handling for defs feeding live-out
+# 
+# This test exercises the recent work on relaxing live-in/live-out restrictions:
+# 1. A def that feeds into a live-out use should create a RESERVED range
+# 2. An additional disjoint live range on the same register should NOT make
+#    that register available for reallocation
+#
+# Program order (backward analysis processes in reverse):
+# - r0: def -> use (disjoint, early) - analyzed LAST -> normal
+# - r1: def -> use (not live-out) - normal
+# - r0: def -> use (late, feeds live-out) - analyzed FIRST -> RESERVED
+#
+# Expected behavior:
+# - r0's late range (feeding live-out) should be marked RESERVED
+# - r0's early disjoint range should be normal (not reserved)
+# - r0 should NOT appear in available physical registers (due to reserved range)
+# - r1 should appear in available physical registers
+
+# CHECK-DAG: Live Range #{{[0-9]+}} for r0 [RESERVED]:
+# CHECK-DAG: Live Range #{{[0-9]+}} for r0:
+# CHECK-DAG: Live Range #{{[0-9]+}} for r1:
+
+# Verify r0 is NOT in available registers (due to reserved range)
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK-NEXT: ==============================================
+# CHECK-DAG:   l2
+# CHECK-DAG:   r1
+# CHECK-DAG:   r4
+# CHECK-DAG:   r5
+# CHECK-DAG:   r6
+# CHECK: Total: 5 registers
+
+--- |
+  define void @test_reserved_liveout() {
+  entry:
+    br label %loop
+  loop:
+    br i1 undef, label %loop, label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_reserved_liveout
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p6, $r2, $r3
+    
+    $lc = ADD_NC_mv_add_ri $r3, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p6, $r2, $r3
+    
+    ; EARLY in program order (analyzed LAST in backward pass):
+    ; r0 def -> use chain, disjoint from the later r0 range
+    ; This is a normal range
+    $r0 = MOV_alu_mv_mv_mv_scl $p6
+    $r5 = ADD_NC_mv_add_rr $r0, $r3
+    
+    ; r1 live range: def -> use (not live-out)
+    ; This should be a normal, non-reserved range
+    $r1 = MOV_alu_mv_mv_mv_scl $p6
+    $r6 = ADD_NC_mv_add_rr $r1, $r2
+    
+    ; LATE in program order (analyzed FIRST in backward pass):
+    ; r0 def -> use that feeds live-out to bb.2
+    ; This should be marked as RESERVED because r0 is live-out to bb.2
+    ; and this is the last def-use of r0 before the block end
+    $r0 = MOV_alu_mv_mv_mv_scl $p6
+    $r4 = ADD_NC_mv_add_rr $r0, $r2
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    liveins: $r0, $r2
+    
+    ; Use r0 from the loop - this makes r0 live-out from bb.1
+    ; Backward analysis starts here, sees r0 is live-in to bb.2
+    ; Then traces back and finds the LATE r0 def-use (closest to block end)
+    ; feeds into this, so that range becomes RESERVED
+    $r7 = ADD_NC_mv_add_rr $r0, $r2
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test9-reserved-composite-subreg-liveout.mir b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test9-reserved-composite-subreg-liveout.mir
new file mode 100644
index 000000000000..eb6d4b4bdc3b
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2p/schedule/postpipeliner/regalloc/test9-reserved-composite-subreg-liveout.mir
@@ -0,0 +1,100 @@
+# NOTE: Test for AIERegDefUseTracker - reserved composite ranges with subreg live-out
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2p -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 9: Verify reserved range handling for composite registers with subreg live-out
+# 
+# This test exercises reserved ranges with composite base registers:
+# 1. A composite def (dm4) with multiple subreg uses (cml4, cmh4) that feeds
+#    into a subreg live-out (cml4) should create a RESERVED range
+# 2. An additional disjoint live range on the same composite register should
+#    NOT make that register available for reallocation
+#
+# Program order (backward analysis processes in reverse):
+# - dm4: def → use cml4, use cmh4 (disjoint, early) - analyzed LAST → normal
+# - dm4: def → use cml4, use cmh4 (late, cml4 feeds live-out) - analyzed FIRST → RESERVED
+#
+# Expected behavior:
+# - dm4's late range (feeding subreg live-out) should be marked RESERVED
+# - dm4's early disjoint range should be normal (not reserved)
+# - dm4 (and its subregs) should NOT appear in available physical registers
+
+# CHECK-DAG: Live Range #{{[0-9]+}} for dm4 [RESERVED]:
+# CHECK-DAG: Live Range #{{[0-9]+}} for dm4:
+
+# Verify dm4 and its subregs are NOT in available registers (due to reserved range)
+# CHECK: Available Physical Registers for Reallocation:
+# CHECK-NEXT: ==============================================
+# CHECK-NOT: dm4
+# CHECK-NOT: cml4
+# CHECK-NOT: cmh4
+# CHECK: Total: 0 registers
+
+--- |
+  define void @test_reserved_composite_subreg_liveout() {
+  entry:
+    br label %loop
+  loop:
+    br i1 undef, label %loop, label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_reserved_composite_subreg_liveout
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $p1, $y5, $y0, $r2, $r3
+    
+    $lc = ADD_NC_mv_add_ri $r3, 0
+    $ls = MOVXM %bb.1
+    $le = MOVXM <mcsymbol .L_LEnd0>
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $p1, $y5, $y0, $r2
+    
+    ; EARLY in program order (analyzed LAST in backward pass):
+    ; dm4 composite def → subreg uses (cml4, cmh4), disjoint from later range
+    ; This is a normal range
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 0, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 64, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    ; LATE in program order (analyzed FIRST in backward pass):
+    ; dm4 composite def → subreg uses (cml4, cmh4) where cml4 feeds live-out
+    ; This should be marked as RESERVED because cml4 is live-out to bb.2
+    $dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y5, $y0, $r2, implicit-def dead $srfpflags, implicit $crfpmask
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 128, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cmh4, $p1, 192, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    liveins: $cml4, $p0
+    
+    ; Use cml4 (subreg of dm4) from the loop - this makes cml4 live-out from bb.1
+    ; Backward analysis starts here, sees cml4 is live-in to bb.2
+    ; Then traces back and finds the LATE dm4 def with cml4 use (closest to block end)
+    ; feeds into this, so that range becomes RESERVED
+    VST_CONV_bf16_fp32_dmx_sts_srs_bf_idx_imm $cml4, $p0, 256, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...
diff --git a/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/conv2d_bf16.mir b/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/conv2d_bf16.mir
index e4b6d5f4ee46..77328d1f17a2 100644
--- a/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/conv2d_bf16.mir
+++ b/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/conv2d_bf16.mir
@@ -64,11 +64,11 @@ body:             |
   ; CHECK-NEXT:   $x3, $p0, $lf0, $r24 = VLDA_POP_dmx_lda_fifo_x_normal_pop killed $p0, killed $lf0, killed $r24, debug-location !6 :: (load unknown-size, align 1)
   ; CHECK-NEXT:   $x6, $p0, $lf0, $r24 = VLDB_POP_dmx_ldb_fifo_x_normal_pop killed $p0, killed $lf0, killed $r24, debug-location !6 :: (load unknown-size, align 1)
   ; CHECK-NEXT:   BUNDLE implicit-def $x10, implicit-def $wl10, implicit-def $wh10, implicit-def $p1, implicit-def $x1, implicit-def $wl1, implicit-def $wh1, implicit-def $p0, implicit-def $lf0, implicit-def $lfl0, implicit-def $lfh0, implicit-def $r24, implicit killed $p1, implicit killed $p0, implicit killed $lf0, implicit killed $r24, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x10, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
+  ; CHECK-NEXT:     $x10, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
   ; CHECK-NEXT:     $x1, $p0, $lf0, $r24 = VLDB_POP_dmx_ldb_fifo_x_normal_pop killed $p0, killed $lf0, killed $r24, debug-location !6 :: (load unknown-size, align 1)
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x11, implicit-def $wl11, implicit-def $wh11, implicit-def $p1, implicit-def $p0, implicit-def $dc1, implicit-def $dc5, implicit killed $p1, implicit killed $p0, implicit $d1_3d, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x11, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
+  ; CHECK-NEXT:     $x11, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
   ; CHECK-NEXT:     $p0, $dc1, $dc5 = PADDS_3D killed $p0, $d1_3d, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x5, implicit-def $wl5, implicit-def $wh5, implicit-def $p0, implicit-def $lf0, implicit-def $lfl0, implicit-def $lfh0, implicit-def $r24, implicit-def $lfe, implicit-def $srfifo_uf, implicit-def $lc, implicit $r30, implicit killed $p0, implicit killed $lf0, implicit killed $r24, implicit killed $lfe, implicit killed $r5, debug-location !6 {
@@ -84,13 +84,13 @@ body:             |
   ; CHECK-NEXT:     MOVXM_lng_cg_le_abs <mcsymbol .L_LEnd0>, implicit-def $le, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x10, implicit-def $wl10, implicit-def $wh10, implicit-def $p1, implicit-def $x1, implicit-def $wl1, implicit-def $wh1, implicit-def $p0, implicit-def $lf0, implicit-def $lfl0, implicit-def $lfh0, implicit-def $r24, implicit killed $p1, implicit killed $p0, implicit killed $lf0, implicit killed $r24, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x10, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
+  ; CHECK-NEXT:     $x10, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
   ; CHECK-NEXT:     $x1, $p0, $lf0, $r24 = VLDB_POP_dmx_ldb_fifo_x_normal_pop killed $p0, killed $lf0, killed $r24, debug-location !6 :: (load unknown-size, align 1)
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x11, implicit-def $wl11, implicit-def $wh11, implicit-def $p1, implicit-def $p0, implicit-def $dc1, implicit-def $dc5, implicit-def $x7, implicit-def $wl7, implicit-def $wh7, implicit killed $p1, implicit killed $p0, implicit $d1_3d, implicit killed $x5, implicit killed $x3, implicit $r18, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x11, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
+  ; CHECK-NEXT:     $x11, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
   ; CHECK-NEXT:     $p0, $dc1, $dc5 = PADDS_3D killed $p0, $d1_3d, debug-location !6
-  ; CHECK-NEXT:     renamable $x7 = VSHUFFLE_vec_shuffle_x killed renamable $x5, killed renamable $x3, renamable $r18, debug-location !6
+  ; CHECK-NEXT:     $x7 = VSHUFFLE_vec_shuffle_x killed $x5, killed $x3, renamable $r18, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.for.body266.i:
@@ -99,27 +99,27 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   BUNDLE implicit-def $x5, implicit-def $wl5, implicit-def $wh5, implicit-def $p0, implicit-def $lf0, implicit-def $lfl0, implicit-def $lfh0, implicit-def $r24, implicit-def $lfe, implicit-def $srfifo_uf, implicit-def $x4, implicit-def $wl4, implicit-def $wh4, implicit $r30, implicit killed $p0, implicit killed $lf0, implicit killed $r24, implicit killed $lfe, implicit killed $x3, implicit $r12, debug-location !6 {
   ; CHECK-NEXT:     $x5, $p0, $lf0, $r24 = VLDB_POPX $r30, $r30, killed $p0, killed $lf0, killed $r24, implicit-def $lfe, implicit-def $srfifo_uf, implicit killed $lfe, debug-location !6 :: (load unknown-size, align 1)
-  ; CHECK-NEXT:     renamable $x4 = VSHUFFLE_vec_shuffle_x internal renamable $x5, killed renamable $x3, renamable $r12, debug-location !6
+  ; CHECK-NEXT:     $x4 = VSHUFFLE_vec_shuffle_x internal $x5, killed $x3, renamable $r12, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x3, implicit-def $wl3, implicit-def $wh3, implicit-def $p0, implicit-def $lf0, implicit-def $lfl0, implicit-def $lfh0, implicit-def $r24, implicit-def $x8, implicit-def $wl8, implicit-def $wh8, implicit-def $cml3, implicit-def $bmll3, implicit-def $bmlh3, implicit-def dead $srfpflags, implicit killed $p0, implicit killed $lf0, implicit killed $r24, implicit killed $x6, implicit $x1, implicit $r18, implicit killed $cml3, implicit killed $x7, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
   ; CHECK-NEXT:     $x3, $p0, $lf0, $r24 = VLDA_POP_dmx_lda_fifo_x_normal_pop killed $p0, killed $lf0, killed $r24, debug-location !6 :: (load unknown-size, align 1)
-  ; CHECK-NEXT:     renamable $x8 = VSHUFFLE_vec_shuffle_x killed renamable $x6, renamable $x1, renamable $r18, debug-location !6
+  ; CHECK-NEXT:     $x8 = VSHUFFLE_vec_shuffle_x killed $x6, $x1, renamable $r18, debug-location !6
   ; CHECK-NEXT:     $cml3 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml3, killed $x7, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x6, implicit-def $wl6, implicit-def $wh6, implicit-def $p0, implicit-def $lf0, implicit-def $lfl0, implicit-def $lfh0, implicit-def $r24, implicit-def $cml2, implicit-def $bmll2, implicit-def $bmlh2, implicit-def dead $srfpflags, implicit killed $p0, implicit killed $lf0, implicit killed $r24, implicit killed $x1, implicit $r12, implicit killed $cml2, implicit killed $x4, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
   ; CHECK-NEXT:     $x6, $p0, $lf0, $r24 = VLDB_POP_dmx_ldb_fifo_x_normal_pop killed $p0, killed $lf0, killed $r24, debug-location !6 :: (load unknown-size, align 1)
-  ; CHECK-NEXT:     renamable $x6 = VSHUFFLE_vec_shuffle_x internal renamable $x6, killed renamable $x1, renamable $r12, debug-location !6
+  ; CHECK-NEXT:     $x6 = VSHUFFLE_vec_shuffle_x internal $x6, killed $x1, renamable $r12, debug-location !6
   ; CHECK-NEXT:     $cml2 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml2, killed $x4, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x10, implicit-def $wl10, implicit-def $wh10, implicit-def $p1, implicit-def $x1, implicit-def $wl1, implicit-def $wh1, implicit-def $p0, implicit-def $lf0, implicit-def $lfl0, implicit-def $lfh0, implicit-def $r24, implicit-def $cml1, implicit-def $bmll1, implicit-def $bmlh1, implicit-def dead $srfpflags, implicit killed $p1, implicit killed $p0, implicit killed $lf0, implicit killed $r24, implicit killed $cml1, implicit killed $x8, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x10, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
+  ; CHECK-NEXT:     $x10, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
   ; CHECK-NEXT:     $x1, $p0, $lf0, $r24 = VLDB_POP_dmx_ldb_fifo_x_normal_pop killed $p0, killed $lf0, killed $r24, debug-location !6 :: (load unknown-size, align 1)
   ; CHECK-NEXT:     $cml1 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml1, killed $x8, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x11, implicit-def $wl11, implicit-def $wh11, implicit-def $p1, implicit-def $p0, implicit-def $dc1, implicit-def $dc5, implicit-def $x7, implicit-def $wl7, implicit-def $wh7, implicit-def $cml0, implicit-def $bmll0, implicit-def $bmlh0, implicit-def dead $srfpflags, implicit killed $p1, implicit killed $p0, implicit $d1_3d, implicit killed $x5, implicit killed $x3, implicit $r18, implicit killed $cml0, implicit killed $x6, implicit killed $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x11, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
+  ; CHECK-NEXT:     $x11, renamable $p1 = VLDA_dmx_lda_x_ld_pstm_nrm_imm killed renamable $p1, 64, debug-location !6 :: (load (<16 x s32>), addrspace 6)
   ; CHECK-NEXT:     $p0, $dc1, $dc5 = PADDS_3D killed $p0, $d1_3d, debug-location !6
-  ; CHECK-NEXT:     renamable $x7 = VSHUFFLE_vec_shuffle_x killed renamable $x5, killed renamable $x3, renamable $r18, debug-location !6
+  ; CHECK-NEXT:     $x7 = VSHUFFLE_vec_shuffle_x killed $x5, killed $x3, renamable $r18, debug-location !6
   ; CHECK-NEXT:     $cml0 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml0, killed $x6, killed $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.2, debug-location !6
@@ -127,27 +127,27 @@ body:             |
   ; CHECK-NEXT: bb.3.for.cond.cleanup265.i:
   ; CHECK-NEXT:   liveins: $cml0, $cml1, $cml2, $cml3
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $x4 = VSHUFFLE_vec_shuffle_x renamable $x5, renamable $x3, renamable $r12, debug-location !6
+  ; CHECK-NEXT:   $x4 = VSHUFFLE_vec_shuffle_x $x5, $x3, renamable $r12, debug-location !6
   ; CHECK-NEXT:   BUNDLE implicit-def $x8, implicit-def $wl8, implicit-def $wh8, implicit-def $cml3, implicit-def $bmll3, implicit-def $bmlh3, implicit-def dead $srfpflags, implicit $x6, implicit $x1, implicit $r18, implicit killed $cml3, implicit killed $x7, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x8 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x1, renamable $r18, debug-location !6
+  ; CHECK-NEXT:     $x8 = VSHUFFLE_vec_shuffle_x $x6, $x1, renamable $r18, debug-location !6
   ; CHECK-NEXT:     $cml3 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml3, killed $x7, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x6, implicit-def $wl6, implicit-def $wh6, implicit-def $cml2, implicit-def $bmll2, implicit-def $bmlh2, implicit-def dead $srfpflags, implicit killed $x6, implicit $x1, implicit $r12, implicit killed $cml2, implicit killed $x4, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x6 = VSHUFFLE_vec_shuffle_x killed renamable $x6, renamable $x1, renamable $r12, debug-location !6
+  ; CHECK-NEXT:     $x6 = VSHUFFLE_vec_shuffle_x killed $x6, $x1, renamable $r12, debug-location !6
   ; CHECK-NEXT:     $cml2 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml2, killed $x4, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   $cml1 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml1, killed $x8, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   BUNDLE implicit-def $x7, implicit-def $wl7, implicit-def $wh7, implicit-def $cml0, implicit-def $bmll0, implicit-def $bmlh0, implicit-def dead $srfpflags, implicit $x5, implicit $x3, implicit $r18, implicit killed $cml0, implicit $x6, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x7 = VSHUFFLE_vec_shuffle_x renamable $x5, renamable $x3, renamable $r18, debug-location !6
+  ; CHECK-NEXT:     $x7 = VSHUFFLE_vec_shuffle_x $x5, $x3, renamable $r18, debug-location !6
   ; CHECK-NEXT:     $cml0 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml0, $x6, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
-  ; CHECK-NEXT:   renamable $x4 = VSHUFFLE_vec_shuffle_x killed renamable $x5, killed renamable $x3, renamable $r12, debug-location !6
+  ; CHECK-NEXT:   $x4 = VSHUFFLE_vec_shuffle_x killed $x5, killed $x3, renamable $r12, debug-location !6
   ; CHECK-NEXT:   BUNDLE implicit-def $x8, implicit-def $wl8, implicit-def $wh8, implicit-def $cml3, implicit-def $bmll3, implicit-def $bmlh3, implicit-def dead $srfpflags, implicit $x6, implicit $x1, implicit killed $r18, implicit killed $cml3, implicit killed $x7, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x8 = VSHUFFLE_vec_shuffle_x renamable $x6, renamable $x1, killed renamable $r18, debug-location !6
+  ; CHECK-NEXT:     $x8 = VSHUFFLE_vec_shuffle_x $x6, $x1, killed renamable $r18, debug-location !6
   ; CHECK-NEXT:     $cml3 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml3, killed $x7, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   BUNDLE implicit-def $x6, implicit-def $wl6, implicit-def $wh6, implicit-def $cml2, implicit-def $bmll2, implicit-def $bmlh2, implicit-def dead $srfpflags, implicit killed $x6, implicit killed $x1, implicit killed $r12, implicit killed $cml2, implicit killed $x4, implicit $y5, implicit $r8, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6 {
-  ; CHECK-NEXT:     renamable $x6 = VSHUFFLE_vec_shuffle_x killed renamable $x6, killed renamable $x1, killed renamable $r12, debug-location !6
+  ; CHECK-NEXT:     $x6 = VSHUFFLE_vec_shuffle_x killed $x6, killed $x1, killed renamable $r12, debug-location !6
   ; CHECK-NEXT:     $cml2 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml2, killed $x4, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
   ; CHECK-NEXT:   }
   ; CHECK-NEXT:   $cml1 = VMAC_f_vmac_bf_half_vmul_bf_core_X_Y killed $cml1, killed $x8, $y5, $r8, implicit-def dead $srfpflags, implicit $crbf8conf, implicit $crfp8conf, implicit $crfpmask, debug-location !6
diff --git a/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/regalloc/test14-read-modify-write.mir b/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/regalloc/test14-read-modify-write.mir
new file mode 100644
index 000000000000..e888fe301487
--- /dev/null
+++ b/llvm/test/CodeGen/AIE/aie2ps/schedule/postpipeliner/regalloc/test14-read-modify-write.mir
@@ -0,0 +1,96 @@
+# NOTE: Test for AIERegDefUseTracker - composite register pattern with AIE2PS VMUL
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2026 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -verify-machineinstrs --mtriple=aie2ps -O2 \
+# RUN:   --start-before=postmisched %s \
+# RUN:   --aie-postpipeliner-vreg-mode=1 \
+# RUN:   --aie-postpipeliner-phys-mode=0 \
+# RUN:   --debug-only=aie-reg-liverange \
+# RUN:   --aie-test-regdefuse-tracker --aie-postpipeliner-filter-no-choice=false \
+# RUN:   -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test 14: Composite register pattern with VMUL using Y registers
+#
+# Pattern:
+#   x10 = VLDB_UNPACK...     ; first def of x10
+#   x0 = (some op)...        ; local def of x0
+#   x10 = VMAX...            ; second def of x10 (read-modify-write)
+#   dm1 = VMUL y0, y5, r9    ; uses y0 (x0 + x1) and y5 (x10 + x11)
+#
+# y0 is composed of: locally-defined x0 + live-in x1
+# y5 is composed of: locally-defined x10 + undefined x11 (not live-in)
+#
+# The goal is to have two separate live ranges for x10:
+# - Live Range 1: VLDB_UNPACK def -> VMIN use (base register: x10)
+# - Live Range 2: VMAX def -> VMUL use via y5 (base register: y5)
+#
+# This tests that lane-mask-based overlap checking correctly separates
+# live ranges when a subreg is redefined within a super-register range.
+#
+# CHECK: FINAL LIVE RANGES
+# CHECK: Live Range #7 for x10:
+# CHECK-NEXT:   Definitions (1):
+# CHECK-NEXT:     [0] Register: x10 $x10, $p0 = VLDB_UNPACK_dmw_ldb_unpack_pstm_nrm_imm_unpackSign1
+# CHECK-NEXT:   Uses (1):
+# CHECK-NEXT:     [0] Register: x10 $x0, dead $r16 = VMIN_GE_16_vaddSign1 $x10, $x4
+# CHECK-EMPTY:
+# CHECK-NEXT: Live Range #2 for y5:
+# CHECK-NEXT:   Definitions (1):
+# CHECK-NEXT:     [0] Register: x10 (SubRegIdx: 8) $x10, dead $r16 = VMAX_LT_16_vaddSign1 $x0, $x4
+# CHECK-NEXT:   Uses (1):
+# CHECK-NEXT:     [0] Register: y5 $dm1 = VMUL_vmul_vmul_cm_core_Y_Y $y0, $y5, $r9
+
+--- |
+  define void @test_vmul_composite_regs() {
+  entry:
+    br label %loop
+  loop:
+    br i1 undef, label %loop, label %exit
+  exit:
+    ret void
+  }
+...
+---
+name:            test_vmul_composite_regs
+alignment:       16
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+    liveins: $p0, $r1, $r9, $x1, $x4
+    
+    $lc = ADD_NC_add_lc_ri $r1, 0
+    MOVXM_lng_cg_ls_abs %bb.1, implicit-def $ls
+    MOVXM_lng_cg_le_abs <mcsymbol .L_LEnd0>, implicit-def $le
+
+  bb.1.loop (align 16):
+    successors: %bb.1, %bb.2
+    liveins: $p0, $r9, $x1, $x4
+    
+    ; First def of x10 (from load with unpack)
+    $x10, $p0 = VLDB_UNPACK_dmw_ldb_unpack_pstm_nrm_imm_unpackSign1 $p0, 32, implicit $crunpacksize, implicit $unpacksign1
+    
+    ; Local def of x0 (via min operation using x10 and x4)
+    $x0, dead $r16 = VMIN_GE_16_vaddSign1 $x10, $x4, implicit $crbf8conf, implicit $crfp8conf, implicit $vaddsign1
+    
+    ; Second def of x10 (read-modify-write: uses x0, defines x10)
+    $x10, dead $r16 = VMAX_LT_16_vaddSign1 $x0, $x4, implicit $crbf8conf, implicit $crfp8conf, implicit $vaddsign1
+    
+    ; VMUL uses y0 (x0 + x1) and y5 (x10 + x11)
+    ; y0 = locally-defined x0 + live-in x1
+    ; y5 = locally-defined x10 + undefined x11
+    $dm1 = VMUL_vmul_vmul_cm_core_Y_Y $y0, $y5, $r9
+    
+    PseudoLoopEnd <mcsymbol .L_LEnd0>, %bb.1
+
+  bb.2.exit (align 16):
+    
+    RET implicit $lr
+    DelayedSchedBarrier
+
+...