Skip to content

Commit

Permalink
[SystemZ, MachineScheduler] Improve post-RA scheduling.
Browse files Browse the repository at this point in the history
The idea of this patch is to continue the scheduler state over an MBB boundary
in the case where the successor block has only one predecessor. This means
that the scheduler will continue in the successor block (after emitting any
branch instructions) with e.g. maintained processor resource counters.
Benchmarks have been confirmed to benefit from this.

The algorithm in MachineScheduler.cpp that extracts scheduling regions of an
MBB has been extended so that the strategy may optionally reverse the order
of processing the regions themselves. This is controlled by a new method
doMBBSchedRegionsTopDown(), which defaults to false.

Handling the top-most region of an MBB first also means that a top-down
scheduler can continue the scheduler state across any scheduling boundary
between to regions inside MBB.

Review: Ulrich Weigand, Matthias Braun, Andy Trick.
https://reviews.llvm.org/D35053

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311072 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
JonPsson committed Aug 17, 2017
1 parent 605e60b commit 59bdb88
Show file tree
Hide file tree
Showing 8 changed files with 376 additions and 79 deletions.
21 changes: 21 additions & 0 deletions include/llvm/CodeGen/MachineScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,20 @@ class MachineSchedStrategy {
/// This has to be enabled in combination with shouldTrackPressure().
virtual bool shouldTrackLaneMasks() const { return false; }

// If this method returns true, handling of the scheduling regions
// themselves (in case of a scheduling boundary in MBB) will be done
// beginning with the topmost region of MBB.
virtual bool doMBBSchedRegionsTopDown() const { return false; }

/// Initialize the strategy after building the DAG for a new region.
virtual void initialize(ScheduleDAGMI *DAG) = 0;

/// Tell the strategy that MBB is about to be processed.
virtual void enterMBB(MachineBasicBlock *MBB) {};

/// Tell the strategy that current MBB is done.
virtual void leaveMBB() {};

/// Notify this strategy that all roots have been released (including those
/// that depend on EntrySU or ExitSU).
virtual void registerRoots() {}
Expand Down Expand Up @@ -284,6 +295,13 @@ class ScheduleDAGMI : public ScheduleDAGInstrs {
// Provide a vtable anchor
~ScheduleDAGMI() override;

/// If this method returns true, handling of the scheduling regions
/// themselves (in case of a scheduling boundary in MBB) will be done
/// beginning with the topmost region of MBB.
bool doMBBSchedRegionsTopDown() const override {
return SchedImpl->doMBBSchedRegionsTopDown();
}

// Returns LiveIntervals instance for use in DAG mutators and such.
LiveIntervals *getLIS() const { return LIS; }

Expand Down Expand Up @@ -326,6 +344,9 @@ class ScheduleDAGMI : public ScheduleDAGInstrs {
/// reorderable instructions.
void schedule() override;

void startBlock(MachineBasicBlock *bb) override;
void finishBlock() override;

/// Change the position of an instruction within the basic block and update
/// live ranges and region boundary iterators.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
Expand Down
5 changes: 5 additions & 0 deletions include/llvm/CodeGen/ScheduleDAGInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ namespace llvm {
/// Returns an existing SUnit for this MI, or nullptr.
SUnit *getSUnit(MachineInstr *MI) const;

/// If this method returns true, handling of the scheduling regions
/// themselves (in case of a scheduling boundary in MBB) will be done
/// beginning with the topmost region of MBB.
virtual bool doMBBSchedRegionsTopDown() const { return false; }

/// Prepares to perform scheduling in the given block.
virtual void startBlock(MachineBasicBlock *BB);

Expand Down
120 changes: 84 additions & 36 deletions lib/CodeGen/MachineScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {

// Initialize the context of the pass.
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
PassConfig = &getAnalysis<TargetPassConfig>();

if (VerifyScheduling)
Expand Down Expand Up @@ -437,11 +438,63 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
}

/// A region of an MBB for scheduling.
struct SchedRegion {
/// RegionBegin is the first instruction in the scheduling region, and
/// RegionEnd is either MBB->end() or the scheduling boundary after the
/// last instruction in the scheduling region. These iterators cannot refer
/// to instructions outside of the identified scheduling region because
/// those may be reordered before scheduling this region.
MachineBasicBlock::iterator RegionBegin;
MachineBasicBlock::iterator RegionEnd;
unsigned NumRegionInstrs;
SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
unsigned N) :
RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
};

typedef SmallVector<SchedRegion, 16> MBBRegionsVector;
static void
getSchedRegions(MachineBasicBlock *MBB,
MBBRegionsVector &Regions,
bool RegionsTopDown) {
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

MachineBasicBlock::iterator I = nullptr;
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = I) {

// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end() ||
isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
--RegionEnd;
}

// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
I = RegionEnd;
for (;I != MBB->begin(); --I) {
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
if (!MI.isDebugValue())
// MBB::size() uses instr_iterator to count. Here we need a bundle to
// count as a single instruction.
++NumRegionInstrs;
}

Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
}

if (RegionsTopDown)
std::reverse(Regions.begin(), Regions.end());
}

/// Main driver for both MachineScheduler and PostMachineScheduler.
void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
bool FixKillFlags) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

// Visit all machine basic blocks.
//
// TODO: Visit blocks in global postorder or postorder within the bottom-up
Expand All @@ -459,39 +512,28 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
continue;
#endif

// Break the block into scheduling regions [I, RegionEnd), and schedule each
// region as soon as it is discovered. RegionEnd points the scheduling
// boundary at the bottom of the region. The DAG does not include RegionEnd,
// but the region does (i.e. the next RegionEnd is above the previous
// RegionBegin). If the current block has no terminator then RegionEnd ==
// MBB->end() for the bottom region.
// Break the block into scheduling regions [I, RegionEnd). RegionEnd
// points to the scheduling boundary at the bottom of the region. The DAG
// does not include RegionEnd, but the region does (i.e. the next
// RegionEnd is above the previous RegionBegin). If the current block has
// no terminator then RegionEnd == MBB->end() for the bottom region.
//
// All the regions of MBB are first found and stored in MBBRegions, which
// will be processed (MBB) top-down if initialized with true.
//
// The Scheduler may insert instructions during either schedule() or
// exitRegion(), even for empty regions. So the local iterators 'I' and
// 'RegionEnd' are invalid across these calls.
//
// MBB::size() uses instr_iterator to count. Here we need a bundle to count
// as a single instruction.
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {

// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end() ||
isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
--RegionEnd;
}
// 'RegionEnd' are invalid across these calls. Instructions must not be
// added to other regions than the current one without updating MBBRegions.

MBBRegionsVector MBBRegions;
getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
for (MBBRegionsVector::iterator R = MBBRegions.begin();
R != MBBRegions.end(); ++R) {
MachineBasicBlock::iterator I = R->RegionBegin;
MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
unsigned NumRegionInstrs = R->NumRegionInstrs;

// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
for (; I != MBB->begin(); --I) {
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
if (!MI.isDebugValue())
++NumRegionInstrs;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
Expand All @@ -517,15 +559,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
}

// Schedule a region: possibly reorder instructions.
// This invalidates 'RegionEnd' and 'I'.
// This invalidates the original region iterators.
Scheduler.schedule();

// Close the current region.
Scheduler.exitRegion();

// Scheduling has invalidated the current iterator 'I'. Ask the
// scheduler for the top of it's scheduled region.
RegionEnd = Scheduler.begin();
}
Scheduler.finishBlock();
// FIXME: Ideally, no further passes should rely on kill flags. However,
Expand Down Expand Up @@ -650,6 +688,16 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
releasePred(SU, &Pred);
}

void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
ScheduleDAGInstrs::startBlock(bb);
SchedImpl->enterMBB(bb);
}

void ScheduleDAGMI::finishBlock() {
SchedImpl->leaveMBB();
ScheduleDAGInstrs::finishBlock();
}

/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
/// crossing a scheduling boundary. [begin, end) includes all instructions in
/// the region, including the boundary itself and single-instruction regions
Expand Down
87 changes: 74 additions & 13 deletions lib/Target/SystemZ/SystemZHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@
// * Processor resources usage. It is beneficial to balance the use of
// resources.
//
// A goal is to consider all instructions, also those outside of any
// scheduling region. Such instructions are "advanced" past and include
// single instructions before a scheduling region, branches etc.
//
// A block that has only one predecessor continues scheduling with the state
// of it (which may be updated by emitting branches).
//
// ===---------------------------------------------------------------------===//

#include "SystemZHazardRecognizer.h"
Expand All @@ -36,13 +43,9 @@ static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
"resources during scheduling."),
cl::init(8));

SystemZHazardRecognizer::
SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
SchedModel(nullptr) {}

unsigned SystemZHazardRecognizer::
getNumDecoderSlots(SUnit *SU) const {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.

Expand Down Expand Up @@ -73,12 +76,13 @@ void SystemZHazardRecognizer::Reset() {
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
LastEmittedMI = nullptr;
DEBUG(CurGroupDbg = "";);
}

bool
SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return true;

Expand Down Expand Up @@ -125,9 +129,9 @@ void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
#ifndef NDEBUG // Debug output
void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
OS << "SU(" << SU->NodeNum << "):";
OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
OS << TII->getName(SU->getInstr()->getOpcode());

const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return;

Expand Down Expand Up @@ -200,10 +204,15 @@ void SystemZHazardRecognizer::clearProcResCounters() {
CriticalResourceIdx = UINT_MAX;
}

static inline bool isBranchRetTrap(MachineInstr *MI) {
return (MI->isBranch() || MI->isReturn() ||
MI->getOpcode() == SystemZ::CondTrap);
}

// Update state with SU as the next scheduled unit.
void SystemZHazardRecognizer::
EmitInstruction(SUnit *SU) {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
const MCSchedClassDesc *SC = getSchedClass(SU);
DEBUG( dumpCurrGroup("Decode group before emission"););

// If scheduling an SU that must begin a new decoder group, move on
Expand All @@ -218,8 +227,10 @@ EmitInstruction(SUnit *SU) {
cgd << ", ";
dumpSU(SU, cgd););

LastEmittedMI = SU->getInstr();

// After returning from a call, we don't know much about the state.
if (SU->getInstr()->isCall()) {
if (SU->isCall) {
DEBUG (dbgs() << "+++ Clearing state after call.\n";);
clearProcResCounters();
LastFPdOpCycleIdx = UINT_MAX;
Expand Down Expand Up @@ -259,19 +270,22 @@ EmitInstruction(SUnit *SU) {
<< LastFPdOpCycleIdx << "\n";);
}

bool GroupEndingBranch =
(CurrGroupSize >= 1 && isBranchRetTrap(SU->getInstr()));

// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
assert (CurrGroupSize <= 3);

// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
if (CurrGroupSize == 3 || SC->EndGroup)
if (CurrGroupSize == 3 || SC->EndGroup || GroupEndingBranch)
nextGroup();
}

int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0;

Expand Down Expand Up @@ -315,7 +329,7 @@ int SystemZHazardRecognizer::
resourcesCost(SUnit *SU) {
int Cost = 0;

const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
const MCSchedClassDesc *SC = getSchedClass(SU);
if (!SC->isValid())
return 0;

Expand All @@ -335,3 +349,50 @@ resourcesCost(SUnit *SU) {
return Cost;
}

void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
bool TakenBranch) {
// Make a temporary SUnit.
SUnit SU(MI, 0);

// Set interesting flags.
SU.isCall = MI->isCall();

const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
for (const MCWriteProcResEntry &PRE :
make_range(SchedModel->getWriteProcResBegin(SC),
SchedModel->getWriteProcResEnd(SC))) {
switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
case 0:
SU.hasReservedResource = true;
break;
case 1:
SU.isUnbuffered = true;
break;
default:
break;
}
}

EmitInstruction(&SU);

if (TakenBranch && CurrGroupSize > 0)
nextGroup(false /*DbgOutput*/);

assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
"Scheduler: unhandled terminator!");
}

void SystemZHazardRecognizer::
copyState(SystemZHazardRecognizer *Incoming) {
// Current decoder group
CurrGroupSize = Incoming->CurrGroupSize;
DEBUG (CurGroupDbg = Incoming->CurGroupDbg;);

// Processor resources
ProcResourceCounters = Incoming->ProcResourceCounters;
CriticalResourceIdx = Incoming->CriticalResourceIdx;

// FPd
LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
GrpCount = Incoming->GrpCount;
}
Loading

0 comments on commit 59bdb88

Please sign in to comment.