Skip to content

Commit

Permalink
[llvm-mca] Improved support for dependency-breaking instructions.
Browse files Browse the repository at this point in the history
The tool assumes that a zero-latency instruction that doesn't consume hardware
resources is an optimizable dependency-breaking instruction. That means, it
doesn't have to wait on register input operands, and it doesn't consume any
physical register. The PRF knows how to optimize it at register renaming stage.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332249 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Andrea Di Biagio authored and Andrea Di Biagio committed May 14, 2018
1 parent 3f8354c commit 40a24a8
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 35 deletions.
49 changes: 31 additions & 18 deletions tools/llvm-mca/Dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
}
}

void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
MutableArrayRef<unsigned> UsedPhysRegs) {
void RegisterFile::allocatePhysRegs(IndexPlusCostPairTy Entry,
MutableArrayRef<unsigned> UsedPhysRegs) {
unsigned RegisterFileIndex = Entry.first;
unsigned Cost = Entry.second;
if (RegisterFileIndex) {
Expand All @@ -106,8 +106,8 @@ void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
UsedPhysRegs[0] += Cost;
}

void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
MutableArrayRef<unsigned> FreedPhysRegs) {
void RegisterFile::freePhysRegs(IndexPlusCostPairTy Entry,
MutableArrayRef<unsigned> FreedPhysRegs) {
unsigned RegisterFileIndex = Entry.first;
unsigned Cost = Entry.second;
if (RegisterFileIndex) {
Expand All @@ -121,8 +121,9 @@ void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
FreedPhysRegs[0] += Cost;
}

void RegisterFile::addRegisterMapping(WriteState &WS,
MutableArrayRef<unsigned> UsedPhysRegs) {
void RegisterFile::addRegisterWrite(WriteState &WS,
MutableArrayRef<unsigned> UsedPhysRegs,
bool ShouldAllocatePhysRegs) {
unsigned RegID = WS.getRegisterID();
assert(RegID && "Adding an invalid register definition?");

Expand All @@ -131,7 +132,11 @@ void RegisterFile::addRegisterMapping(WriteState &WS,
for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
RegisterMappings[*I].first = &WS;

createNewMappings(Mapping.second, UsedPhysRegs);
// No physical registers are allocated for instructions that are optimized in
// hardware. For example, zero-latency data-dependency breaking instructions
// don't consume physical registers.
if (ShouldAllocatePhysRegs)
allocatePhysRegs(Mapping.second, UsedPhysRegs);

// If this is a partial update, then we are done.
if (!WS.fullyUpdatesSuperRegs())
Expand All @@ -141,8 +146,9 @@ void RegisterFile::addRegisterMapping(WriteState &WS,
RegisterMappings[*I].first = &WS;
}

void RegisterFile::invalidateRegisterMapping(
const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
void RegisterFile::removeRegisterWrite(
const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs,
bool ShouldFreePhysRegs) {
unsigned RegID = WS.getRegisterID();
bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs();

Expand All @@ -154,7 +160,8 @@ void RegisterFile::invalidateRegisterMapping(
if (!Mapping.first)
return;

removeMappings(Mapping.second, FreedPhysRegs);
if (ShouldFreePhysRegs)
freePhysRegs(Mapping.second, FreedPhysRegs);

if (Mapping.first == &WS)
Mapping.first = nullptr;
Expand Down Expand Up @@ -261,8 +268,10 @@ void DispatchUnit::notifyInstructionDispatched(const InstRef &IR,
void DispatchUnit::notifyInstructionRetired(const InstRef &IR) {
LLVM_DEBUG(dbgs() << "[E] Instruction Retired: " << IR << '\n');
SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles());
const InstrDesc &Desc = IR.getInstruction()->getDesc();

for (const std::unique_ptr<WriteState> &WS : IR.getInstruction()->getDefs())
RAT->invalidateRegisterMapping(*WS.get(), FreedRegs);
RAT->removeRegisterWrite(*WS.get(), FreedRegs, !Desc.isZeroLatency());
Owner->notifyInstructionEvent(HWInstructionRetiredEvent(IR, FreedRegs));
Owner->eraseInstruction(IR);
}
Expand Down Expand Up @@ -339,18 +348,22 @@ void DispatchUnit::dispatch(InstRef IR, const MCSubtargetInfo &STI) {
AvailableEntries -= NumMicroOps;
}

// Update RAW dependencies if this instruction is not a zero-latency
// instruction. The assumption is that a zero-latency instruction doesn't
// require to be issued to the scheduler for execution. More importantly, it
// doesn't have to wait on the register input operands.
if (Desc.MaxLatency || !Desc.Resources.empty())
// A dependency-breaking instruction doesn't have to wait on the register
// input operands, and it is often optimized at register renaming stage.
// Update RAW dependencies if this instruction is not a dependency-breaking
// instruction. A dependency-breaking instruction is a zero-latency
// instruction that doesn't consume hardware resources.
// An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
if (!Desc.isZeroLatency())
for (std::unique_ptr<ReadState> &RS : IS.getUses())
updateRAWDependencies(*RS, STI);

// Allocate new mappings.
// By default, a dependency-breaking zero-latency instruction is expected to
// be optimized at register renaming stage. That means, no physical register
// is allocated to the instruction.
SmallVector<unsigned, 4> RegisterFiles(RAT->getNumRegisterFiles());
for (std::unique_ptr<WriteState> &WS : IS.getDefs())
RAT->addRegisterMapping(*WS, RegisterFiles);
RAT->addRegisterWrite(*WS, RegisterFiles, !Desc.isZeroLatency());

// Reserve slots in the RCU, and notify the instruction that it has been
// dispatched to the schedulers for execution.
Expand Down
34 changes: 19 additions & 15 deletions tools/llvm-mca/Dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@ class RegisterFile {

// Allocates register mappings in register file specified by the
// IndexPlusCostPairTy object. This method is called from addRegisterMapping.
void createNewMappings(IndexPlusCostPairTy IPC,
llvm::MutableArrayRef<unsigned> UsedPhysRegs);
void allocatePhysRegs(IndexPlusCostPairTy IPC,
llvm::MutableArrayRef<unsigned> UsedPhysRegs);

// Removes a previously allocated mapping from the register file referenced
// by the IndexPlusCostPairTy object. This method is called from
// invalidateRegisterMapping.
void removeMappings(IndexPlusCostPairTy IPC,
llvm::MutableArrayRef<unsigned> FreedPhysRegs);
void freePhysRegs(IndexPlusCostPairTy IPC,
llvm::MutableArrayRef<unsigned> FreedPhysRegs);

// Create an instance of RegisterMappingTracker for every register file
// specified by the processor model.
Expand All @@ -126,17 +126,21 @@ class RegisterFile {
initialize(SM, NumRegs);
}

// Creates a new register mapping for RegID.
// This reserves a microarchitectural register in every register file that
// contains RegID.
void addRegisterMapping(WriteState &WS,
llvm::MutableArrayRef<unsigned> UsedPhysRegs);

// Invalidates register mappings associated to the input WriteState object.
// This releases previously allocated mappings for the physical register
// associated to the WriteState.
void invalidateRegisterMapping(const WriteState &WS,
llvm::MutableArrayRef<unsigned> FreedPhysRegs);
// This method updates the data dependency graph by inserting a new register
// definition. This method is also responsible for updating the number of used
// physical registers in the register file(s). The number of physical
// registers is updated only if flag ShouldAllocatePhysRegs is set.
void addRegisterWrite(WriteState &WS,
llvm::MutableArrayRef<unsigned> UsedPhysRegs,
bool ShouldAllocatePhysRegs = true);

// Updates the data dependency graph by removing a write. It also updates the
// internal state of the register file(s) by freeing physical registers.
// The number of physical registers is updated only if flag ShouldFreePhysRegs
// is set.
void removeRegisterWrite(const WriteState &WS,
llvm::MutableArrayRef<unsigned> FreedPhysRegs,
bool ShouldFreePhysRegs = true);

// Checks if there are enough microarchitectural registers in the register
// files. Returns a "response mask" where each bit is the response from a
Expand Down
3 changes: 3 additions & 0 deletions tools/llvm-mca/Instruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ struct InstrDesc {
bool MayLoad;
bool MayStore;
bool HasSideEffects;

// A zero latency instruction doesn't consume any scheduler resources.
bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
};

/// An instruction dispatched to the out-of-order backend.
Expand Down
3 changes: 1 addition & 2 deletions tools/llvm-mca/Scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,13 +260,12 @@ void Scheduler::scheduleInstruction(InstRef &IR) {
// targets, zero-idiom instructions (for example: a xor that clears the value
// of a register) are treated speacially, and are often eliminated at register
// renaming stage.
bool IsZeroLatency = !Desc.MaxLatency && Desc.Resources.empty();

// Instructions that use an in-order dispatch/issue processor resource must be
// issued immediately to the pipeline(s). Any other in-order buffered
// resources (i.e. BufferSize=1) is consumed.

if (!IsZeroLatency && !Resources->mustIssueImmediately(Desc)) {
if (!Desc.isZeroLatency() && !Resources->mustIssueImmediately(Desc)) {
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding " << IR
<< " to the Ready Queue\n");
ReadyQueue[IR.getSourceIndex()] = IR.getInstruction();
Expand Down

0 comments on commit 40a24a8

Please sign in to comment.