Skip to content

Commit

Permalink
[llvm-exegesis] Refactor how forbidden registers are computed.
Browse files Browse the repository at this point in the history
Summary:
Right now latency generation can incorrectly select the scratch register
as a dependency-carrying register.
 - Move the logic for preventing register selection from Uops
   implementation to common SnippetGenerator class.
 - Aliasing detection now takes a set of forbidden registers just like
   random register assignment does.

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68084

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373048 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
legrosbuffle committed Sep 27, 2019
1 parent 92a8cd1 commit 20508d0
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 98 deletions.
26 changes: 15 additions & 11 deletions tools/llvm-exegesis/lib/Latency.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ static constexpr size_t kMaxAliasingInstructions = 10;

static std::vector<Instruction>
computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
size_t MaxAliasingInstructions) {
size_t MaxAliasingInstructions,
const BitVector &ForbiddenRegisters) {
// Randomly iterate the set of instructions.
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
Expand All @@ -53,15 +54,16 @@ computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
if (OtherInstr.hasMemoryOperands())
continue;
if (Instr.hasAliasingRegistersThrough(OtherInstr))
AliasingInstructions.push_back(std::move(OtherInstr));
if (Instr.hasAliasingRegistersThrough(OtherInstr, ForbiddenRegisters))
AliasingInstructions.push_back(OtherInstr);
if (AliasingInstructions.size() >= MaxAliasingInstructions)
break;
}
return AliasingInstructions;
}

static ExecutionMode getExecutionModes(const Instruction &Instr) {
static ExecutionMode getExecutionModes(const Instruction &Instr,
const BitVector &ForbiddenRegisters) {
ExecutionMode EM = ExecutionMode::UNKNOWN;
if (Instr.hasAliasingImplicitRegisters())
EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
Expand All @@ -70,7 +72,7 @@ static ExecutionMode getExecutionModes(const Instruction &Instr) {
if (Instr.hasMemoryOperands())
EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
else {
if (Instr.hasAliasingRegisters())
if (Instr.hasAliasingRegisters(ForbiddenRegisters))
EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
if (Instr.hasOneUseOrOneDef())
EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
Expand All @@ -80,6 +82,7 @@ static ExecutionMode getExecutionModes(const Instruction &Instr) {

static void appendCodeTemplates(const LLVMState &State,
const Instruction &Instr,
const BitVector &ForbiddenRegisters,
ExecutionMode ExecutionModeBit,
llvm::StringRef ExecutionClassDescription,
std::vector<CodeTemplate> &CodeTemplates) {
Expand Down Expand Up @@ -122,8 +125,8 @@ static void appendCodeTemplates(const LLVMState &State,
}
case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
// Select back-to-back non-memory instruction.
for (const auto OtherInstr :
computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
for (const auto OtherInstr : computeAliasingInstructions(
State, Instr, kMaxAliasingInstructions, ForbiddenRegisters)) {
const AliasingConfigurations Forward(Instr, OtherInstr);
const AliasingConfigurations Back(OtherInstr, Instr);
InstructionTemplate ThisIT(Instr);
Expand All @@ -149,13 +152,14 @@ static void appendCodeTemplates(const LLVMState &State,
LatencySnippetGenerator::~LatencySnippetGenerator() = default;

llvm::Expected<std::vector<CodeTemplate>>
LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
LatencySnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
std::vector<CodeTemplate> Results;
const ExecutionMode EM = getExecutionModes(Instr);
const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters);
for (const auto EC : kExecutionClasses) {
for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
Results);
appendCodeTemplates(State, Instr, ForbiddenRegisters, ExecutionModeBit,
EC.Description, Results);
if (!Results.empty())
break;
}
Expand Down
3 changes: 2 additions & 1 deletion tools/llvm-exegesis/lib/Latency.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class LatencySnippetGenerator : public SnippetGenerator {
~LatencySnippetGenerator() override;

llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};

class LatencyBenchmarkRunner : public BenchmarkRunner {
Expand Down
34 changes: 28 additions & 6 deletions tools/llvm-exegesis/lib/MCInstrDescView.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,26 +183,48 @@ bool Instruction::hasAliasingImplicitRegisters() const {
return ImplDefRegs.anyCommon(ImplUseRegs);
}

// Returns true if there are registers that are both in `A` and `B` but not in
// `Forbidden`.
static bool anyCommonExcludingForbidden(const BitVector &A, const BitVector &B,
const BitVector &Forbidden) {
assert(A.size() == B.size() && B.size() == Forbidden.size());
const auto Size = A.size();
for (int AIndex = A.find_first(); AIndex != -1;) {
const int BIndex = B.find_first_in(AIndex, Size);
if (BIndex == -1)
return false;
if (AIndex == BIndex && !Forbidden.test(AIndex))
return true;
AIndex = A.find_first_in(BIndex + 1, Size);
}
return false;
}

bool Instruction::hasAliasingRegistersThrough(
const Instruction &OtherInstr) const {
return AllDefRegs.anyCommon(OtherInstr.AllUseRegs) &&
OtherInstr.AllDefRegs.anyCommon(AllUseRegs);
const Instruction &OtherInstr, const BitVector &ForbiddenRegisters) const {
return anyCommonExcludingForbidden(AllDefRegs, OtherInstr.AllUseRegs,
ForbiddenRegisters) &&
anyCommonExcludingForbidden(OtherInstr.AllDefRegs, AllUseRegs,
ForbiddenRegisters);
}

bool Instruction::hasTiedRegisters() const {
return llvm::any_of(
Variables, [](const Variable &Var) { return Var.hasTiedOperands(); });
}

bool Instruction::hasAliasingRegisters() const {
return AllDefRegs.anyCommon(AllUseRegs);
bool Instruction::hasAliasingRegisters(
const BitVector &ForbiddenRegisters) const {
return anyCommonExcludingForbidden(AllDefRegs, AllUseRegs,
ForbiddenRegisters);
}

bool Instruction::hasOneUseOrOneDef() const {
return AllDefRegs.count() || AllUseRegs.count();
}

void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
const RegisterAliasingTrackerCache &RATC,
llvm::raw_ostream &Stream) const {
Stream << "- " << Name << "\n";
for (const auto &Op : Operands) {
Expand Down Expand Up @@ -251,7 +273,7 @@ void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
Stream << "- hasAliasingImplicitRegisters (execution is always serial)\n";
if (hasTiedRegisters())
Stream << "- hasTiedRegisters (execution is always serial)\n";
if (hasAliasingRegisters())
if (hasAliasingRegisters(RATC.emptyRegisters()))
Stream << "- hasAliasingRegisters\n";
}

Expand Down
6 changes: 4 additions & 2 deletions tools/llvm-exegesis/lib/MCInstrDescView.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,11 @@ struct Instruction {
// Repeating this instruction may execute sequentially by picking aliasing
// Use and Def registers. It may also execute in parallel by picking non
// aliasing Use and Def registers.
bool hasAliasingRegisters() const;
bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const;

// Whether this instruction's registers alias with OtherInstr's registers.
bool hasAliasingRegistersThrough(const Instruction &OtherInstr) const;
bool hasAliasingRegistersThrough(const Instruction &OtherInstr,
const BitVector &ForbiddenRegisters) const;

// Returns whether this instruction has Memory Operands.
// Repeating this instruction executes sequentially with an instruction that
Expand All @@ -129,6 +130,7 @@ struct Instruction {

// Convenient function to help with debugging.
void dump(const llvm::MCRegisterInfo &RegInfo,
const RegisterAliasingTrackerCache &RATC,
llvm::raw_ostream &Stream) const;

const llvm::MCInstrDesc *Description; // Never nullptr.
Expand Down
31 changes: 25 additions & 6 deletions tools/llvm-exegesis/lib/SnippetGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,33 @@ SnippetGenerator::~SnippetGenerator() = default;

llvm::Expected<std::vector<BenchmarkCode>>
SnippetGenerator::generateConfigurations(const Instruction &Instr) const {
if (auto E = generateCodeTemplates(Instr)) {
const auto &RATC = State.getRATC();
llvm::BitVector ForbiddenRegs = State.getRATC().reservedRegisters();

// If the instruction has memory registers, prevent the generator from
// using the scratch register and its aliasing registers.
if (Instr.hasMemoryOperands()) {
const auto &ET = State.getExegesisTarget();
unsigned ScratchSpacePointerInReg =
ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
if (ScratchSpacePointerInReg == 0)
return llvm::make_error<BenchmarkFailure>(
"Infeasible : target does not support memory instructions");
const auto &ScratchRegAliases =
State.getRATC().getRegister(ScratchSpacePointerInReg).aliasedBits();
// If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
// FIXME: We could make a copy of the scratch register.
for (const auto &Op : Instr.Operands) {
if (Op.isDef() && Op.isImplicitReg() &&
ScratchRegAliases.test(Op.getImplicitReg()))
return llvm::make_error<BenchmarkFailure>(
"Infeasible : memory instruction uses scratch memory register");
}
ForbiddenRegs |= ScratchRegAliases;
}

if (auto E = generateCodeTemplates(Instr, ForbiddenRegs)) {
std::vector<BenchmarkCode> Output;
for (CodeTemplate &CT : E.get()) {
const llvm::BitVector &ForbiddenRegs =
CT.ScratchSpacePointerInReg
? RATC.getRegister(CT.ScratchSpacePointerInReg).aliasedBits()
: RATC.emptyRegisters();
// TODO: Generate as many BenchmarkCode as needed.
{
BenchmarkCode BC;
Expand Down
3 changes: 2 additions & 1 deletion tools/llvm-exegesis/lib/SnippetGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class SnippetGenerator {
private:
// API to be implemented by subclasses.
virtual llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const = 0;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const = 0;
};

// A global Random Number Generator to randomize configurations.
Expand Down
52 changes: 14 additions & 38 deletions tools/llvm-exegesis/lib/Uops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ void UopsSnippetGenerator::instantiateMemoryOperands(
static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
const LLVMState &State, const InstructionTemplate &IT,
const ArrayRef<const Variable *> TiedVariables,
const BitVector *ScratchSpaceAliasedRegs) {
const BitVector &ForbiddenRegisters) {
std::vector<InstructionTemplate> Instructions;
// Assign registers to variables in a round-robin manner. This is simple but
// ensures that the most register-constrained variable does not get starved.
Expand All @@ -135,12 +135,8 @@ static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
assert(Var);
const Operand &Op = IT.Instr.getPrimaryOperand(*Var);
assert(Op.isReg());
BitVector PossibleRegs = State.getRATC().emptyRegisters();
if (ScratchSpaceAliasedRegs) {
PossibleRegs |= *ScratchSpaceAliasedRegs;
}
PossibleRegs.flip();
PossibleRegs &= Op.getRegisterAliasing().sourceBits();
BitVector PossibleRegs = Op.getRegisterAliasing().sourceBits();
remove(PossibleRegs, ForbiddenRegisters);
PossibleRegsForVar.push_back(std::move(PossibleRegs));
}
SmallVector<int, 2> Iterators(TiedVariables.size(), 0);
Expand Down Expand Up @@ -168,28 +164,14 @@ static std::vector<InstructionTemplate> generateSnippetUsingStaticRenaming(
}

llvm::Expected<std::vector<CodeTemplate>>
UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
UopsSnippetGenerator::generateCodeTemplates(
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
CodeTemplate CT;
const llvm::BitVector *ScratchSpaceAliasedRegs = nullptr;
if (Instr.hasMemoryOperands()) {
const auto &ET = State.getExegesisTarget();
CT.ScratchSpacePointerInReg =
ET.getScratchMemoryRegister(State.getTargetMachine().getTargetTriple());
if (CT.ScratchSpacePointerInReg == 0)
return llvm::make_error<BenchmarkFailure>(
"Infeasible : target does not support memory instructions");
ScratchSpaceAliasedRegs =
&State.getRATC().getRegister(CT.ScratchSpacePointerInReg).aliasedBits();
// If the instruction implicitly writes to ScratchSpacePointerInReg , abort.
// FIXME: We could make a copy of the scratch register.
for (const auto &Op : Instr.Operands) {
if (Op.isDef() && Op.isImplicitReg() &&
ScratchSpaceAliasedRegs->test(Op.getImplicitReg()))
return llvm::make_error<BenchmarkFailure>(
"Infeasible : memory instruction uses scratch memory register");
}
}

CT.ScratchSpacePointerInReg =
Instr.hasMemoryOperands()
? State.getExegesisTarget().getScratchMemoryRegister(
State.getTargetMachine().getTargetTriple())
: 0;
const AliasingConfigurations SelfAliasing(Instr, Instr);
InstructionTemplate IT(Instr);
if (SelfAliasing.empty()) {
Expand All @@ -208,20 +190,17 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
if (!TiedVariables.empty()) {
CT.Info = "instruction has tied variables, using static renaming.";
CT.Instructions = generateSnippetUsingStaticRenaming(
State, IT, TiedVariables, ScratchSpaceAliasedRegs);
State, IT, TiedVariables, ForbiddenRegisters);
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
return getSingleton(std::move(CT));
}
const auto &ReservedRegisters = State.getRATC().reservedRegisters();
// No tied variables, we pick random values for defs.
llvm::BitVector Defs(State.getRegInfo().getNumRegs());
for (const auto &Op : Instr.Operands) {
if (Op.isReg() && Op.isExplicit() && Op.isDef() && !Op.isMemory()) {
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
remove(PossibleRegisters, ReservedRegisters);
// Do not use the scratch memory address register.
if (ScratchSpaceAliasedRegs)
remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
// Do not use forbidden registers.
remove(PossibleRegisters, ForbiddenRegisters);
assert(PossibleRegisters.any() && "No register left to choose from");
const auto RandomReg = randomBit(PossibleRegisters);
Defs.set(RandomReg);
Expand All @@ -233,10 +212,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
for (const auto &Op : Instr.Operands) {
if (Op.isReg() && Op.isExplicit() && Op.isUse() && !Op.isMemory()) {
auto PossibleRegisters = Op.getRegisterAliasing().sourceBits();
remove(PossibleRegisters, ReservedRegisters);
// Do not use the scratch memory address register.
if (ScratchSpaceAliasedRegs)
remove(PossibleRegisters, *ScratchSpaceAliasedRegs);
remove(PossibleRegisters, ForbiddenRegisters);
remove(PossibleRegisters, DefAliases);
assert(PossibleRegisters.any() && "No register left to choose from");
const auto RandomReg = randomBit(PossibleRegisters);
Expand Down
3 changes: 2 additions & 1 deletion tools/llvm-exegesis/lib/Uops.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class UopsSnippetGenerator : public SnippetGenerator {
~UopsSnippetGenerator() override;

llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;

static constexpr const size_t kMinNumDifferentAddresses = 6;

Expand Down
16 changes: 10 additions & 6 deletions tools/llvm-exegesis/lib/X86/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,19 +182,21 @@ class X86LatencySnippetGenerator : public LatencySnippetGenerator {
using LatencySnippetGenerator::LatencySnippetGenerator;

llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};
} // namespace

llvm::Expected<std::vector<CodeTemplate>>
X86LatencySnippetGenerator::generateCodeTemplates(
const Instruction &Instr) const {
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr))
return std::move(E);

switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP:
return LatencySnippetGenerator::generateCodeTemplates(Instr);
return LatencySnippetGenerator::generateCodeTemplates(Instr,
ForbiddenRegisters);
case llvm::X86II::ZeroArgFP:
case llvm::X86II::OneArgFP:
case llvm::X86II::SpecialFP:
Expand All @@ -219,19 +221,21 @@ class X86UopsSnippetGenerator : public UopsSnippetGenerator {
using UopsSnippetGenerator::UopsSnippetGenerator;

llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction &Instr) const override;
generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override;
};
} // namespace

llvm::Expected<std::vector<CodeTemplate>>
X86UopsSnippetGenerator::generateCodeTemplates(
const Instruction &Instr) const {
const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
if (auto E = IsInvalidOpcode(Instr))
return std::move(E);

switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP:
return UopsSnippetGenerator::generateCodeTemplates(Instr);
return UopsSnippetGenerator::generateCodeTemplates(Instr,
ForbiddenRegisters);
case llvm::X86II::ZeroArgFP:
case llvm::X86II::OneArgFP:
case llvm::X86II::SpecialFP:
Expand Down
Loading

0 comments on commit 20508d0

Please sign in to comment.