Skip to content

Commit

Permalink
SamplePGO - Add initial support for inliner annotations.
Browse files Browse the repository at this point in the history
This adds two thresholds to the sample profiler to affect inlining
decisions: the concept of global hotness and coldness.

Functions that have accumulated more than a certain fraction of samples at
runtime, are annotated with the InlineHint attribute. Conversely,
functions that accumulate less than a certain fraction of samples, are
annotated with the Cold attribute.

This is very similar to the hints emitted by Clang when using
instrumentation profiles.

Notice that this is a very blunt instrument. A function may have
globally collected a significant fraction of samples, but that does not
necessarily mean that every callsite for that function is hot.

Ideally, we would annotate each callsite with the samples collected at
that callsite. This way, the inliner can incorporate all these weights
into its cost model.

Once the inliner offers this functionality, we can change the hints
emitted here to a more precise per-callsite annotation. For now, this is
providing some measure of speedups with our internal benchmarks. I've
observed speedups of up to 23% (though the geo mean is about 3%). I expect
these numbers to improve as the inliner gets better annotations.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254212 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
dnovillo committed Nov 27, 2015
1 parent 2badfee commit c738af5
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 1 deletion.
80 changes: 79 additions & 1 deletion lib/Transforms/IPO/SampleProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ static cl::opt<double> SampleProfileHotThreshold(
"sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
cl::desc("Inlined functions that account for more than N% of all samples "
"collected in the parent function, will be inlined again."));
static cl::opt<double> SampleProfileGlobalHotThreshold(
"sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
cl::desc("Top-level functions that account for more than N% of all samples "
"collected in the profile, will be marked as hot for the inliner "
"to consider."));
static cl::opt<double> SampleProfileGlobalColdThreshold(
"sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
cl::desc("Top-level functions that account for less than N% of all samples "
"collected in the profile, will be marked as cold for the inliner "
"to consider."));

namespace {
typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
Expand All @@ -96,7 +106,8 @@ class SampleProfileLoader : public ModulePass {

SampleProfileLoader(StringRef Name = SampleProfileFile)
: ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
Samples(nullptr), Filename(Name), ProfileIsValid(false) {
Samples(nullptr), Filename(Name), ProfileIsValid(false),
TotalCollectedSamples(0) {
initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
}

Expand All @@ -121,6 +132,7 @@ class SampleProfileLoader : public ModulePass {
const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineHotFunctions(Function &F);
bool emitInlineHints(Function &F);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
Expand Down Expand Up @@ -185,6 +197,12 @@ class SampleProfileLoader : public ModulePass {

/// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;

/// \brief Total number of samples collected in this profile.
///
/// This is the sum of all the samples collected in all the functions executed
/// at runtime.
uint64_t TotalCollectedSamples;
};

class SampleCoverageTracker {
Expand Down Expand Up @@ -582,6 +600,60 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return FS;
}

/// \brief Emit an inline hint if \p F is globally hot or cold.
///
/// If \p F consumes a significant fraction of samples (indicated by
/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
/// inliner to consider the function hot.
///
/// If \p F consumes a small fraction of samples (indicated by
/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
/// to consider the function cold.
///
/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
/// function globally hot or cold, we should be annotating individual callsites.
/// This is not currently possible, but work on the inliner will eventually
/// provide this ability. See http://reviews.llvm.org/D15003 for details and
/// discussion.
///
/// \returns True if either attribute was applied to \p F.
bool SampleProfileLoader::emitInlineHints(Function &F) {
if (TotalCollectedSamples == 0)
return false;

uint64_t FunctionSamples = Samples->getTotalSamples();
double SamplesPercent =
(double)FunctionSamples / (double)TotalCollectedSamples * 100.0;

// If the function collected more samples than the hot threshold, mark
// it globally hot.
if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
F.addFnAttr(llvm::Attribute::InlineHint);
emitOptimizationRemark(
F.getContext(), DEBUG_TYPE, F, DebugLoc(),
Twine("Applied inline hint to globally hot function '" + F.getName() +
"' with " + Twine(std::to_string(SamplesPercent)) +
"% of samples (threshold: " +
Twine(std::to_string(SampleProfileGlobalHotThreshold)) + "%)"));
return true;
}

// If the function collected fewer samples than the cold threshold, mark
// it globally cold.
if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
F.addFnAttr(llvm::Attribute::Cold);
emitOptimizationRemark(
F.getContext(), DEBUG_TYPE, F, DebugLoc(),
Twine("Applied cold hint to globally cold function '" + F.getName() +
"' with " + Twine(std::to_string(SamplesPercent)) +
"% of samples (threshold: " +
Twine(std::to_string(SampleProfileGlobalColdThreshold)) + "%)"));
return true;
}

return false;
}

/// \brief Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, and find if
Expand Down Expand Up @@ -1088,6 +1160,8 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");

Changed |= emitInlineHints(F);

Changed |= inlineHotFunctions(F);

// Compute basic block weights.
Expand Down Expand Up @@ -1165,6 +1239,10 @@ bool SampleProfileLoader::runOnModule(Module &M) {
if (!ProfileIsValid)
return false;

// Compute the total number of samples collected in this profile.
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();

bool retval = false;
for (auto &F : M)
if (!F.isDeclaration()) {
Expand Down
3 changes: 3 additions & 0 deletions test/Transforms/SampleProfile/Inputs/inline-hint.prof
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
_Z6hot_fnRxi:700:0
_Z7cold_fnRxi:1:0
other:299:0
38 changes: 38 additions & 0 deletions test/Transforms/SampleProfile/inline-hint.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
;
; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1
define void @_Z7cold_fnRxi() !dbg !4 {
entry:
ret void, !dbg !29
}

; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0
define void @_Z6hot_fnRxi() #0 !dbg !10 {
entry:
ret void, !dbg !38
}

!llvm.module.flags = !{!17, !18}
!llvm.ident = !{!19}

!1 = !DIFile(filename: "inline-hint.cc", directory: ".")
!2 = !{}
!3 = !{!4, !10, !11, !14}
!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7, !9}
!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64)
!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!12 = !DISubroutineType(types: !13)
!13 = !{!8, !8}
!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!15 = !DISubroutineType(types: !16)
!16 = !{!9}
!17 = !{i32 2, !"Dwarf Version", i32 4}
!18 = !{i32 2, !"Debug Info Version", i32 3}
!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"}
!29 = !DILocation(line: 5, column: 1, scope: !4)
!38 = !DILocation(line: 9, column: 1, scope: !10)

0 comments on commit c738af5

Please sign in to comment.