diff --git a/include/llvm/Analysis/ModuleSummaryAnalysis.h b/include/llvm/Analysis/ModuleSummaryAnalysis.h index 2a9b3e902700..c8adc3bf81ac 100644 --- a/include/llvm/Analysis/ModuleSummaryAnalysis.h +++ b/include/llvm/Analysis/ModuleSummaryAnalysis.h @@ -21,6 +21,7 @@ namespace llvm { class BlockFrequencyInfo; +class ProfileSummaryInfo; /// Direct function to compute a \c ModuleSummaryIndex from a given module. /// @@ -30,8 +31,8 @@ class BlockFrequencyInfo; /// that information. ModuleSummaryIndex buildModuleSummaryIndex( const Module &M, - std::function GetBFICallback = - nullptr); + std::function GetBFICallback, + ProfileSummaryInfo *PSI); /// Analysis pass to provide the ModuleSummaryIndex object. class ModuleSummaryIndexAnalysis diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 52d4f01b7985..d7757f2e6e96 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -194,20 +194,20 @@ enum ModulePathSymtabCodes { // and combined index cases. enum GlobalValueSummarySymtabCodes { // PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid)] FS_PERMODULE = 1, // PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, hotness)] FS_PERMODULE_PROFILE = 2, // PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid] FS_PERMODULE_GLOBALVAR_INIT_REFS = 3, // COMBINED: [valueid, modid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid)] FS_COMBINED = 4, // COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, hotness)] FS_COMBINED_PROFILE = 5, // COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid] FS_COMBINED_GLOBALVAR_INIT_REFS = 6, diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 23912dad50a1..f907267a0385 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -30,18 +30,14 @@ namespace llvm { /// \brief Class to accumulate and hold information about a callee. struct CalleeInfo { - /// The static number of callsites calling corresponding function. - unsigned CallsiteCount; - /// The cumulative profile count of calls to corresponding function - /// (if using PGO, otherwise 0). - uint64_t ProfileCount; - CalleeInfo() : CallsiteCount(0), ProfileCount(0) {} - CalleeInfo(unsigned CallsiteCount, uint64_t ProfileCount) - : CallsiteCount(CallsiteCount), ProfileCount(ProfileCount) {} - CalleeInfo &operator+=(uint64_t RHSProfileCount) { - CallsiteCount++; - ProfileCount += RHSProfileCount; - return *this; + enum class HotnessType : uint8_t { Unknown = 0, Cold = 1, None = 2, Hot = 3 }; + HotnessType Hotness = HotnessType::Unknown; + + CalleeInfo() = default; + explicit CalleeInfo(HotnessType Hotness) : Hotness(Hotness) {} + + void updateHotness(const HotnessType OtherHotness) { + Hotness = std::max(Hotness, OtherHotness); } }; diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 1a8872adeb8f..c736c6aa1698 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" @@ -63,8 +64,20 @@ static void findRefEdges(const User *CurUser, DenseSet &RefEdges, } } +static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount, + ProfileSummaryInfo *PSI) { + if (!PSI) + return CalleeInfo::HotnessType::Unknown; + if (PSI->isHotCount(ProfileCount)) + return CalleeInfo::HotnessType::Hot; + if (PSI->isColdCount(ProfileCount)) + return CalleeInfo::HotnessType::Cold; + return CalleeInfo::HotnessType::None; +} + static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, - const Function &F, BlockFrequencyInfo *BFI) { + const Function &F, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI) { // Summary not currently supported for anonymous functions, they must // be renamed. if (!F.hasName()) @@ -97,7 +110,10 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None; auto *CalleeId = M.getValueSymbolTable().lookup(CalledFunction->getName()); - CallGraphEdges[CalleeId] += (ScaledCount ? ScaledCount.getValue() : 0); + + auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) + : CalleeInfo::HotnessType::Unknown; + CallGraphEdges[CalleeId].updateHotness(Hotness); } else { const auto *CI = dyn_cast(&I); // Skip inline assembly calls. @@ -113,7 +129,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, ICallAnalysis.getPromotionCandidatesForInstruction( &I, NumVals, TotalCount, NumCandidates); for (auto &Candidate : CandidateProfileData) - IndirectCallEdges[Candidate.Value] += Candidate.Count; + IndirectCallEdges[Candidate.Value].updateHotness( + getHotness(Candidate.Count, PSI)); } } @@ -140,7 +157,8 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, ModuleSummaryIndex llvm::buildModuleSummaryIndex( const Module &M, - std::function GetBFICallback) { + std::function GetBFICallback, + ProfileSummaryInfo *PSI) { ModuleSummaryIndex Index; // Check if the module can be promoted, otherwise just disable importing from // it by not emitting any summary. @@ -165,7 +183,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( BFI = BFIPtr.get(); } - computeFunctionSummary(Index, M, F, BFI); + computeFunctionSummary(Index, M, F, BFI, PSI); } // Compute summaries for all variables defined in module, and save in the @@ -182,10 +200,15 @@ char ModuleSummaryIndexAnalysis::PassID; ModuleSummaryIndex ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) { + ProfileSummaryInfo &PSI = AM.getResult(M); auto &FAM = AM.getResult(M).getManager(); - return buildModuleSummaryIndex(M, [&FAM](const Function &F) { - return &FAM.getResult(*const_cast(&F)); - }); + return buildModuleSummaryIndex( + M, + [&FAM](const Function &F) { + return &FAM.getResult( + *const_cast(&F)); + }, + &PSI); } char ModuleSummaryIndexWrapperPass::ID = 0; @@ -205,11 +228,15 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass() } bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) { - Index = buildModuleSummaryIndex(M, [this](const Function &F) { - return &(this->getAnalysis( - *const_cast(&F)) - .getBFI()); - }); + auto &PSI = *getAnalysis().getPSI(M); + Index = buildModuleSummaryIndex( + M, + [this](const Function &F) { + return &(this->getAnalysis( + *const_cast(&F)) + .getBFI()); + }, + &PSI); return false; } @@ -221,6 +248,7 @@ bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) { void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); } bool llvm::moduleCanBeRenamedForThinLTO(const Module &M) { diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 49bf8136f4e1..101e8eba6b12 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -651,6 +651,9 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { std::pair getGUIDFromValueId(unsigned ValueId); + std::pair + readCallGraphEdge(const SmallVector &Record, unsigned int &I, + bool IsOldProfileFormat, bool HasProfile); }; } // end anonymous namespace @@ -6218,8 +6221,10 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { return error("Invalid Summary Block: version expected"); } const uint64_t Version = Record[0]; - if (Version != 1) - return error("Invalid summary version " + Twine(Version) + ", 1 expected"); + const bool IsOldProfileFormat = Version == 1; + if (!IsOldProfileFormat && Version != 2) + return error("Invalid summary version " + Twine(Version) + + ", 1 or 2 expected"); Record.clear(); // Keep around the last seen summary to be used when we see an optional @@ -6264,10 +6269,10 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { default: // Default behavior: ignore. break; // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, - // n x (valueid, callsitecount)] + // n x (valueid)] // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // n x (valueid, hotness)] case bitc::FS_PERMODULE: case bitc::FS_PERMODULE_PROFILE: { unsigned ValueID = Record[0]; @@ -6296,12 +6301,11 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE); for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E; ++I) { - unsigned CalleeValueId = Record[I]; - unsigned CallsiteCount = Record[++I]; - uint64_t ProfileCount = HasProfile ? Record[++I] : 0; - GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; - FS->addCallGraphEdge(CalleeGUID, - CalleeInfo(CallsiteCount, ProfileCount)); + CalleeInfo::HotnessType Hotness; + GlobalValue::GUID CalleeGUID; + std::tie(CalleeGUID, Hotness) = + readCallGraphEdge(Record, I, IsOldProfileFormat, HasProfile); + FS->addCallGraphEdge(CalleeGUID, CalleeInfo(Hotness)); } auto GUID = getGUIDFromValueId(ValueID); FS->setOriginalName(GUID.second); @@ -6356,10 +6360,9 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { break; } // FS_COMBINED: [valueid, modid, flags, instcount, numrefs, - // numrefs x valueid, n x (valueid, callsitecount)] + // numrefs x valueid, n x (valueid)] // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs, - // numrefs x valueid, - // n x (valueid, callsitecount, profilecount)] + // numrefs x valueid, n x (valueid, hotness)] case bitc::FS_COMBINED: case bitc::FS_COMBINED_PROFILE: { unsigned ValueID = Record[0]; @@ -6385,12 +6388,11 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE); for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E; ++I) { - unsigned CalleeValueId = Record[I]; - unsigned CallsiteCount = Record[++I]; - uint64_t ProfileCount = HasProfile ? Record[++I] : 0; - GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; - FS->addCallGraphEdge(CalleeGUID, - CalleeInfo(CallsiteCount, ProfileCount)); + CalleeInfo::HotnessType Hotness; + GlobalValue::GUID CalleeGUID; + std::tie(CalleeGUID, Hotness) = + readCallGraphEdge(Record, I, IsOldProfileFormat, HasProfile); + FS->addCallGraphEdge(CalleeGUID, CalleeInfo(Hotness)); } GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; TheIndex->addGlobalValueSummary(GUID, std::move(FS)); @@ -6456,6 +6458,23 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { llvm_unreachable("Exit infinite loop"); } +std::pair +ModuleSummaryIndexBitcodeReader::readCallGraphEdge( + const SmallVector &Record, unsigned int &I, + const bool IsOldProfileFormat, const bool HasProfile) { + + auto Hotness = CalleeInfo::HotnessType::Unknown; + unsigned CalleeValueId = Record[I]; + GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first; + if (IsOldProfileFormat) { + I += 1; // Skip old callsitecount field + if (HasProfile) + I += 1; // Skip old profilecount field + } else if (HasProfile) + Hotness = static_cast(Record[++I]); + return {CalleeGUID, Hotness}; +} + // Parse the module string table block into the Index. // This populates the ModulePathStringTable map in the index. std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index a353edf7aec1..af722723845b 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3293,10 +3293,8 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord( bool HasProfileData = F.getEntryCount().hasValue(); for (auto &ECI : Calls) { NameVals.push_back(getValueId(ECI.first)); - assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite"); - NameVals.push_back(ECI.second.CallsiteCount); if (HasProfileData) - NameVals.push_back(ECI.second.ProfileCount); + NameVals.push_back(static_cast(ECI.second.Hotness)); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); @@ -3336,7 +3334,7 @@ void ModuleBitcodeWriter::writeModuleLevelReferences( // Current version for the summary. // This is bumped whenever we introduce changes in the way some record are // interpreted, like flags for instance. -static const uint64_t INDEX_VERSION = 1; +static const uint64_t INDEX_VERSION = 2; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. @@ -3357,7 +3355,7 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs - // numrefs x valueid, n x (valueid, callsitecount) + // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv); @@ -3369,7 +3367,7 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs - // numrefs x valueid, n x (valueid, callsitecount, profilecount) + // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv); @@ -3442,7 +3440,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs - // numrefs x valueid, n x (valueid, callsitecount) + // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv); @@ -3455,7 +3453,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs - // numrefs x valueid, n x (valueid, callsitecount, profilecount) + // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv); @@ -3542,7 +3540,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { bool HasProfileData = false; for (auto &EI : FS->calls()) { - HasProfileData |= EI.second.ProfileCount != 0; + HasProfileData |= EI.second.Hotness != CalleeInfo::HotnessType::Unknown; if (HasProfileData) break; } @@ -3553,10 +3551,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { if (!hasValueId(EI.first.getGUID())) continue; NameVals.push_back(getValueId(EI.first.getGUID())); - assert(EI.second.CallsiteCount > 0 && "Expected at least one callsite"); - NameVals.push_back(EI.second.CallsiteCount); if (HasProfileData) - NameVals.push_back(EI.second.ProfileCount); + NameVals.push_back(static_cast(EI.second.Hotness)); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index ae646b7347aa..f4232dc2f897 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeWriterPass.h" @@ -377,7 +378,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, SmallVector OutputBuffer; { raw_svector_ostream OS(OutputBuffer); - auto Index = buildModuleSummaryIndex(TheModule); + ProfileSummaryInfo PSI(TheModule); + auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr); WriteBitcodeToFile(&TheModule, OS, true, &Index); } return make_unique(std::move(OutputBuffer)); diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 81d9ca5638d5..6c43b7808700 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -48,6 +48,10 @@ static cl::opt cl::desc("As we import functions, multiply the " "`import-instr-limit` threshold by this factor " "before processing newly imported functions")); +static cl::opt ImportHotMultiplier( + "import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"), + cl::ZeroOrMore, cl::desc("Multiply the `import-instr-limit` threshold for " + "hot callsites")); static cl::opt PrintImports("print-imports", cl::init(false), cl::Hidden, cl::desc("Print imported functions")); @@ -268,7 +272,7 @@ using EdgeInfo = std::pair; /// exported from their source module. static void computeImportForFunction( const FunctionSummary &Summary, const ModuleSummaryIndex &Index, - unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries, + const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries, SmallVectorImpl &Worklist, FunctionImporter::ImportMapTy &ImportList, StringMap *ExportLists = nullptr) { @@ -281,7 +285,12 @@ static void computeImportForFunction( continue; } - auto *CalleeSummary = selectCallee(GUID, Threshold, Index); + // FIXME: Also lower the threshold for cold callsites. + const auto NewThreshold = + Edge.second.Hotness == CalleeInfo::HotnessType::Hot + ? Threshold * ImportHotMultiplier + : Threshold; + auto *CalleeSummary = selectCallee(GUID, NewThreshold, Index); if (!CalleeSummary) { DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n"); continue; @@ -297,7 +306,7 @@ static void computeImportForFunction( } else ResolvedCalleeSummary = cast(CalleeSummary); - assert(ResolvedCalleeSummary->instCount() <= Threshold && + assert(ResolvedCalleeSummary->instCount() <= NewThreshold && "selectCallee() didn't honor the threshold"); auto ExportModulePath = ResolvedCalleeSummary->modulePath(); diff --git a/test/Bitcode/Inputs/thinlto-function-summary-callgraph-combined.1.bc b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-combined.1.bc new file mode 100644 index 000000000000..e6a134020054 Binary files /dev/null and b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-combined.1.bc differ diff --git a/test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo-combined.1.bc b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo-combined.1.bc new file mode 100644 index 000000000000..11b9037e568c Binary files /dev/null and b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo-combined.1.bc differ diff --git a/test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo.1.bc b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo.1.bc new file mode 100644 index 000000000000..cb3380efcb8e Binary files /dev/null and b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo.1.bc differ diff --git a/test/Bitcode/Inputs/thinlto-function-summary-callgraph-profile-summary.ll b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-profile-summary.ll new file mode 100644 index 000000000000..f7d2ca839b96 --- /dev/null +++ b/test/Bitcode/Inputs/thinlto-function-summary-callgraph-profile-summary.ll @@ -0,0 +1,27 @@ +; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + +define void @hot1() #1 { + ret void +} +define void @hot2() #1 { + ret void +} +define void @hot3() #1 { + ret void +} +define void @cold() #1 { + ret void +} +define void @none1() #1 { + ret void +} +define void @none2() #1 { + ret void +} +define void @none3() #1 { + ret void +} + diff --git a/test/Bitcode/Inputs/thinlto-function-summary-callgraph.1.bc b/test/Bitcode/Inputs/thinlto-function-summary-callgraph.1.bc new file mode 100644 index 000000000000..d42da69fcffb Binary files /dev/null and b/test/Bitcode/Inputs/thinlto-function-summary-callgraph.1.bc differ diff --git a/test/Bitcode/summary_version.ll b/test/Bitcode/summary_version.ll index 718a0ab9bd7f..dfb9e9b15e7b 100644 --- a/test/Bitcode/summary_version.ll +++ b/test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: diff --git a/test/Bitcode/thinlto-alias.ll b/test/Bitcode/thinlto-alias.ll index 58411ca65237..cfdf8f7b0bd9 100644 --- a/test/Bitcode/thinlto-alias.ll +++ b/test/Bitcode/thinlto-alias.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: +; COMBINED-NEXT: ; Followed by the alias and aliasee ; COMBINED-NEXT: +; and hotness type, with value id matching the subsequent value symbol table. +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: +; and hotness type, with value id matching the subsequent value symbol table. +; op6=2 which is hotnessType::None. +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: +; CHECK-NEXT: +; CHECK-LABEL: + +; COMBINED: +; COMBINED_NEXT: + + +; ModuleID = 'thinlto-function-summary-callgraph.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; This function have high profile count, so entry block is hot. +define void @hot_function(i1 %a, i1 %a2) !prof !20 { +entry: + call void @hot1() + br i1 %a, label %Cold, label %Hot, !prof !41 +Cold: ; 1/1000 goes here + call void @cold() + call void @hot2() + call void @none1() + br label %exit +Hot: ; 999/1000 goes here + call void @hot2() + call void @hot3() + br i1 %a2, label %None1, label %None2, !prof !42 +None1: ; half goes here + call void @none1() + call void @none2() + br label %exit +None2: ; half goes here + call void @none3() + br label %exit +exit: + ret void +} + +declare void @hot1() #1 +declare void @hot2() #1 +declare void @hot3() #1 +declare void @cold() #1 +declare void @none1() #1 +declare void @none2() #1 +declare void @none3() #1 + + +!41 = !{!"branch_weights", i32 1, i32 1000} +!42 = !{!"branch_weights", i32 1, i32 1} + + + +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 110} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/test/Bitcode/thinlto-function-summary-callgraph.ll b/test/Bitcode/thinlto-function-summary-callgraph.ll index af38c3ef1217..c00907b7fb29 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph.ll @@ -1,15 +1,20 @@ ; Test to check the callgraph in summary ; RUN: opt -module-summary %s -o %t.o ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s + ; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph.ll -o %t2.o ; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; Check parsing for old summary versions generated from this file. +; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph.1.bc | FileCheck %s --check-prefix=OLD +; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-combined.1.bc | FileCheck %s --check-prefix=OLD-COMBINED + ; CHECK: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: +; CHECK-DAG: ; Function W contains a call to func3 as well as a reference to globalvar: -; CHECK-DAG: +; CHECK-DAG: ; Function X contains call to foo, as well as address reference to foo ; which is in the same instruction as the call: -; CHECK-DAG: +; CHECK-DAG: ; Function Y contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while earlier analyzing the phi using its ; return value: -; CHECK-DAG: +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: -; CHECK-DAG: +; CHECK-DAG: ; Variable bar initialization contains address reference to func: ; CHECK-DAG: ; CHECK: diff --git a/test/Transforms/FunctionImport/Inputs/hotness_based_import.ll b/test/Transforms/FunctionImport/Inputs/hotness_based_import.ll new file mode 100644 index 000000000000..3882cd059fda --- /dev/null +++ b/test/Transforms/FunctionImport/Inputs/hotness_based_import.ll @@ -0,0 +1,43 @@ +; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + +define void @hot1() #1 { + ret void +} +define void @hot2() #1 { + call void @externalFunction() + call void @externalFunction() + ret void +} +define void @hot3() #1 { + call void @externalFunction() + call void @externalFunction() + call void @externalFunction() + ret void +} +define void @cold() #1 { + ret void +} +define void @cold2() #1 { + call void @externalFunction() + call void @externalFunction() + ret void +} + +define void @none1() #1 { + ret void +} +define void @none2() #1 { + call void @externalFunction() + ret void +} +define void @none3() #1 { + call void @externalFunction() + call void @externalFunction() + ret void +} + + +declare void @externalFunction() diff --git a/test/Transforms/FunctionImport/hotness_based_import.ll b/test/Transforms/FunctionImport/hotness_based_import.ll new file mode 100644 index 000000000000..4dbe9bb55624 --- /dev/null +++ b/test/Transforms/FunctionImport/hotness_based_import.ll @@ -0,0 +1,106 @@ +; Test to check the callgraph in summary when there is PGO +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/hotness_based_import.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc + +; Test import with default hot multiplier (3) +; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=1 --S | FileCheck %s --check-prefix=CHECK --check-prefix=HOT-DEFAULT +; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=1 --S -import-hot-multiplier=3.0 | FileCheck %s --check-prefix=CHECK --check-prefix=HOT-DEFAULT +; HOT-DEFAULT-DAG: define available_externally void @hot1() +; HOT-DEFAULT-DAG: define available_externally void @hot2() +; HOT-DEFAULT-DAG: define available_externally void @cold() +; HOT-DEFAULT-DAG: define available_externally void @none1() + +; HOT-DEFAULT-NOT: define available_externally void @hot3() +; HOT-DEFAULT-NOT: define available_externally void @none2() +; HOT-DEFAULT-NOT: define available_externally void @none3() +; HOT-DEFAULT-NOT: define available_externally void @cold2() + + +; Test import with hot multiplier 1.0 - treat hot callsites as normal. +; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=1 -import-hot-multiplier=1.0 --S | FileCheck %s --check-prefix=CHECK --check-prefix=HOT-ONE +; HOT-ONE-DAG: define available_externally void @hot1() +; HOT-ONE-DAG: define available_externally void @cold() +; HOT-ONE-DAG: define available_externally void @none1() +; HOT-ONE-NOT: define available_externally void @hot2() +; HOT-ONE-NOT: define available_externally void @hot3() +; HOT-ONE-NOT: define available_externally void @none2() +; HOT-ONE-NOT: define available_externally void @none3() +; HOT-ONE-NOT: define available_externally void @cold2() + + +; Test import with hot multiplier 0.0 and high threshold - don't import functions called from hot callsite. +; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=10 -import-hot-multiplier=0.0 --S | FileCheck %s --check-prefix=CHECK --check-prefix=HOT-ZERO +; HOT-ZERO-DAG: define available_externally void @cold() +; HOT-ZERO-DAG: define available_externally void @none1() +; HOT-ZERO-DAG: define available_externally void @none2() +; HOT-ZERO-DAG: define available_externally void @none3() +; HOT-ZERO-DAG: define available_externally void @cold2() +; HOT-ZERO-NOT: define available_externally void @hot2() +; HOT-ZERO-NOT: define available_externally void @hot1() +; HOT-ZERO-NOT: define available_externally void @hot3() + + + +; ModuleID = 'thinlto-function-summary-callgraph.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; This function have high profile count, so entry block is hot. +define void @hot_function(i1 %a, i1 %a2) !prof !20 { +entry: + call void @hot1() + br i1 %a, label %Cold, label %Hot, !prof !41 +Cold: ; 1/1000 goes here + call void @cold() + call void @cold2() + call void @hot2() + call void @none1() + br label %exit +Hot: ; 999/1000 goes here + call void @hot2() + call void @hot3() + br i1 %a2, label %None1, label %None2, !prof !42 +None1: ; half goes here + call void @none1() + call void @none2() + br label %exit +None2: ; half goes here + call void @none3() + br label %exit +exit: + ret void +} + +declare void @hot1() #1 +declare void @hot2() #1 +declare void @hot3() #1 +declare void @cold() #1 +declare void @cold2() #1 +declare void @none1() #1 +declare void @none2() #1 +declare void @none3() #1 + + +!41 = !{!"branch_weights", i32 1, i32 1000} +!42 = !{!"branch_weights", i32 1, i32 1} + + + +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 110} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2}