Skip to content

Commit

Permalink
[BOLT][NFC] Extract DataAggregator::parseLBRSample
Browse files Browse the repository at this point in the history
Reviewed By: #bolt, rafauler

Differential Revision: https://reviews.llvm.org/D150986
  • Loading branch information
aaupov committed May 20, 2023
1 parent b75d6a4 commit 860543d
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 73 deletions.
3 changes: 3 additions & 0 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,9 @@ class DataAggregator : public DataReader {
/// Parse a single LBR entry as output by perf script -Fbrstack
ErrorOr<LBREntry> parseLBREntry();

/// Parse LBR sample, returns the number of traces.
uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);

/// Parse and pre-aggregate branch events.
std::error_code parseBranchEvents();

Expand Down
142 changes: 69 additions & 73 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,74 @@ std::error_code DataAggregator::printLBRHeatMap() {
return std::error_code();
}

uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
bool NeedsSkylakeFix) {
uint64_t NumTraces{0};
// LBRs are stored in reverse execution order. NextPC refers to the next
// recorded executed PC.
uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
uint32_t NumEntry = 0;
for (const LBREntry &LBR : Sample.LBR) {
++NumEntry;
// Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
// sometimes record entry 32 as an exact copy of entry 31. This will cause
// us to likely record an invalid trace and generate a stale function for
// BAT mode (non BAT disassembles the function and is able to ignore this
// trace at aggregation time). Drop first 2 entries (last two, in
// chronological order)
if (NeedsSkylakeFix && NumEntry <= 2)
continue;
if (NextPC) {
// Record fall-through trace.
const uint64_t TraceFrom = LBR.To;
const uint64_t TraceTo = NextPC;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
if (TraceBF->containsAddress(LBR.From))
++Info.InternCount;
else
++Info.ExternCount;
} else {
const BinaryFunction *ToFunc =
getBinaryFunctionContainingAddress(TraceTo);
if (TraceBF && ToFunc) {
LLVM_DEBUG({
dbgs() << "Invalid trace starting in " << TraceBF->getPrintName()
<< formatv(" @ {0:x}", TraceFrom - TraceBF->getAddress())
<< formatv(" and ending @ {0:x}\n", TraceTo);
});
++NumInvalidTraces;
} else {
LLVM_DEBUG({
dbgs() << "Out of range trace starting in "
<< (TraceBF ? TraceBF->getPrintName() : "None")
<< formatv(" @ {0:x}",
TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
<< " and ending in "
<< (ToFunc ? ToFunc->getPrintName() : "None")
<< formatv(" @ {0:x}\n",
TraceTo - (ToFunc ? ToFunc->getAddress() : 0));
});
++NumLongRangeTraces;
}
}
++NumTraces;
}
NextPC = LBR.From;

uint64_t From = getBinaryFunctionContainingAddress(LBR.From) ? LBR.From : 0;
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
if (!From && !To)
continue;
BranchInfo &Info = BranchLBRs[Trace(From, To)];
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
return NumTraces;
}

std::error_code DataAggregator::parseBranchEvents() {
outs() << "PERF2BOLT: parse branch events...\n";
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
Expand Down Expand Up @@ -1412,79 +1480,7 @@ std::error_code DataAggregator::parseBranchEvents() {
NeedsSkylakeFix = true;
}

// LBRs are stored in reverse execution order. NextPC refers to the next
// recorded executed PC.
uint64_t NextPC = opts::UseEventPC ? Sample.PC : 0;
uint32_t NumEntry = 0;
for (const LBREntry &LBR : Sample.LBR) {
++NumEntry;
// Hardware bug workaround: Intel Skylake (which has 32 LBR entries)
// sometimes record entry 32 as an exact copy of entry 31. This will cause
// us to likely record an invalid trace and generate a stale function for
// BAT mode (non BAT disassembles the function and is able to ignore this
// trace at aggregation time). Drop first 2 entries (last two, in
// chronological order)
if (NeedsSkylakeFix && NumEntry <= 2)
continue;
if (NextPC) {
// Record fall-through trace.
const uint64_t TraceFrom = LBR.To;
const uint64_t TraceTo = NextPC;
const BinaryFunction *TraceBF =
getBinaryFunctionContainingAddress(TraceFrom);
if (TraceBF && TraceBF->containsAddress(TraceTo)) {
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
if (TraceBF->containsAddress(LBR.From))
++Info.InternCount;
else
++Info.ExternCount;
} else {
if (TraceBF && getBinaryFunctionContainingAddress(TraceTo)) {
LLVM_DEBUG(dbgs()
<< "Invalid trace starting in "
<< TraceBF->getPrintName() << " @ "
<< Twine::utohexstr(TraceFrom - TraceBF->getAddress())
<< " and ending @ " << Twine::utohexstr(TraceTo)
<< '\n');
++NumInvalidTraces;
} else {
LLVM_DEBUG(dbgs()
<< "Out of range trace starting in "
<< (TraceBF ? TraceBF->getPrintName() : "None") << " @ "
<< Twine::utohexstr(
TraceFrom - (TraceBF ? TraceBF->getAddress() : 0))
<< " and ending in "
<< (getBinaryFunctionContainingAddress(TraceTo)
? getBinaryFunctionContainingAddress(TraceTo)
->getPrintName()
: "None")
<< " @ "
<< Twine::utohexstr(
TraceTo -
(getBinaryFunctionContainingAddress(TraceTo)
? getBinaryFunctionContainingAddress(TraceTo)
->getAddress()
: 0))
<< '\n');
++NumLongRangeTraces;
}
}
++NumTraces;
}
NextPC = LBR.From;

uint64_t From = LBR.From;
if (!getBinaryFunctionContainingAddress(From))
From = 0;
uint64_t To = LBR.To;
if (!getBinaryFunctionContainingAddress(To))
To = 0;
if (!From && !To)
continue;
BranchInfo &Info = BranchLBRs[Trace(From, To)];
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
}

for (const auto &LBR : BranchLBRs) {
Expand Down

0 comments on commit 860543d

Please sign in to comment.