diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 192d182a68f75b..1482b3e49983dd 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -9399,6 +9399,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX * cLoopPtr, dLoopPtr : Display the blocks of a loop, including the trees, given a LoopDsc* (call * optPrintLoopInfo()). * cLoops, dLoops : Display the loop table (call optPrintLoopTable()). + * cNewLoops, dNewLoops : Display the loop table (call FlowGraphNaturalLoops::Dump()) with + * Compiler::m_loops. + * cNewLoopsA, dNewLoopsA : Display the loop table (call FlowGraphNaturalLoops::Dump()) with a given + * loops arg. + * cNewLoop, dNewLoop : Display a single loop (call FlowGraphNaturalLoop::Dump()) with given + * loop arg. * cTreeFlags, dTreeFlags : Display tree flags for a specified tree. * * The following don't require a Compiler* to work: @@ -9454,6 +9460,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma comment(linker, "/include:cLoop") #pragma comment(linker, "/include:cLoopPtr") #pragma comment(linker, "/include:cLoops") +#pragma comment(linker, "/include:cNewLoops") +#pragma comment(linker, "/include:cNewLoopsA") +#pragma comment(linker, "/include:cNewLoop") #pragma comment(linker, "/include:cTreeFlags") // Functions which call the c* functions getting Compiler* using `JitTls::GetCompiler()` @@ -9479,6 +9488,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma comment(linker, "/include:dLoop") #pragma comment(linker, "/include:dLoopPtr") #pragma comment(linker, "/include:dLoops") +#pragma comment(linker, "/include:dNewLoops") +#pragma comment(linker, "/include:dNewLoopsA") +#pragma comment(linker, "/include:dNewLoop") #pragma comment(linker, "/include:dTreeFlags") // Functions which don't require a Compiler* @@ -9725,6 +9737,27 @@ JITDBGAPI void __cdecl cLoops(Compiler* comp) comp->optPrintLoopTable(); } +JITDBGAPI void __cdecl cNewLoops(Compiler* comp) +{ + static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called + printf("===================================================================== *NewLoops %u\n", sequenceNumber++); + FlowGraphNaturalLoops::Dump(comp->m_loops); +} + +JITDBGAPI void __cdecl cNewLoopsA(Compiler* comp, FlowGraphNaturalLoops* loops) +{ + static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called + printf("===================================================================== *NewLoopsA %u\n", sequenceNumber++); + FlowGraphNaturalLoops::Dump(loops); +} + +JITDBGAPI void __cdecl cNewLoop(Compiler* comp, FlowGraphNaturalLoop* loop) +{ + static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called + printf("===================================================================== *NewLoop %u\n", sequenceNumber++); + FlowGraphNaturalLoop::Dump(loop); +} + JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called @@ -10417,6 +10450,21 @@ JITDBGAPI void __cdecl dLoops() cLoops(JitTls::GetCompiler()); } +JITDBGAPI void __cdecl dNewLoops() +{ + cNewLoops(JitTls::GetCompiler()); +} + +JITDBGAPI void __cdecl dNewLoopsA(FlowGraphNaturalLoops* loops) +{ + cNewLoopsA(JitTls::GetCompiler(), loops); +} + +JITDBGAPI void __cdecl dNewLoop(FlowGraphNaturalLoop* loop) +{ + cNewLoop(JitTls::GetCompiler(), loop); +} + JITDBGAPI void __cdecl dTreeFlags(GenTree* tree) { cTreeFlags(JitTls::GetCompiler(), tree); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 82a714b303da95..63438b08051aee 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2141,6 +2141,7 @@ class FlowGraphNaturalLoop void MatchInit(NaturalLoopIterInfo* info, BasicBlock* initBlock, GenTree* init); bool MatchLimit(NaturalLoopIterInfo* info, GenTree* test); + public: BasicBlock* GetHeader() const { @@ -2230,6 +2231,10 @@ class FlowGraphNaturalLoop bool AnalyzeIteration(NaturalLoopIterInfo* info); bool HasDef(unsigned lclNum); + +#ifdef DEBUG + static void Dump(FlowGraphNaturalLoop* loop); +#endif // DEBUG }; // Represents a collection of the natural loops in the flow graph. See @@ -2253,6 +2258,7 @@ class FlowGraphNaturalLoops FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs); static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, ArrayStack& worklist); + public: const FlowGraphDfsTree* GetDfsTree() { @@ -2330,6 +2336,10 @@ class FlowGraphNaturalLoops } static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs); + +#ifdef DEBUG + static void Dump(FlowGraphNaturalLoops* loops); +#endif // DEBUG }; // Represents the dominator tree of the flow graph. diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index ee2f7bda9d39c8..923d3276664988 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -812,7 +812,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) // if (capped && (loop->ExitEdges().size() > 0)) { - // Figure out how much flow exits the loop with the capped probablility + // Figure out how much flow exits the loop with the capped probability // and current block frequencies and exit likelihoods. // weight_t cappedExitWeight = 0.0; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index aebae003262303..c9b1c16b436ca1 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4549,6 +4549,8 @@ FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfsTr { JITDUMP("Rejected %u loop headers\n", loops->m_improperLoopHeaders); } + + JITDUMPEXEC(Dump(loops)); #endif return loops; @@ -4624,6 +4626,261 @@ bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, Ar return true; } +#ifdef DEBUG + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::Dump: print one loops to the JitDump +// +/* static */ +void FlowGraphNaturalLoop::Dump(FlowGraphNaturalLoop* loop) +{ + if (loop == nullptr) + { + printf("loop is nullptr"); + return; + } + + // Display: LOOP# (old LOOP#) / header / parent loop# / blocks / entry edges / exit edges / back edges + // Blocks can compacted be "[top .. bottom]" if lexically adjacent and no non-loop blocks in + // the range. Otherwise, print a verbose list of blocks. + + Compiler* comp = loop->GetDfsTree()->GetCompiler(); + printf(FMT_LP, loop->GetIndex()); + + // We might want to print out the old loop number using something like: + // + // if (comp->m_newToOldLoop[loop->GetIndex()] != nullptr) + // { + // printf(" (old: " FMT_LP ")", (unsigned)(comp->m_newToOldLoop[loop->GetIndex()] - comp->optLoopTable)); + // } + // + // However, not all callers of FlowGraphNaturalLoops::Find update m_newToOldLoop -- only + // Compiler::optFindNewLoops() does that. This dumper should work with any construction of + // FlowGraphNaturalLoops. + + printf(" header: " FMT_BB, loop->GetHeader()->bbNum); + if (loop->GetParent() != nullptr) + { + printf(" parent: " FMT_LP, loop->GetParent()->GetIndex()); + } + + // Dump the set of blocks in the loop. There are three cases: + // 1. If there is only one block in the loop, display it. + // 2. If the blocks happen to be lexically dense and without non-loop blocks in the range, + // then use a shortcut of `[BBtop .. BBbottom]`. Note that "lexically dense" is defined + // in terms of the "bbNext" ordering of blocks, which is the default used by the basic + // block dumper fgDispBasicBlocks. However, setting JitDumpFgBlockOrder can change the + // basic block dump order. + // 3. If all the loop blocks are found when traversing from the lexical top to lexical + // bottom block (as defined by `bbNum` ordering, not `bbNext` ordering), then display + // a set of ranges, with the non-loop blocks in the range breaking up the continuous range. + // 4. Otherwise, display the entire list of blocks individually. + // + // Lexicality depends on properly renumbered blocks, which we might not have when dumping. + + bool first; + const unsigned numBlocks = loop->NumLoopBlocks(); + printf("\n Members (%u): ", numBlocks); + + if (numBlocks == 0) + { + // This should never happen + printf("NONE?"); + } + else if (numBlocks == 1) + { + // If there's exactly one block, it must be the header. + printf(FMT_BB, loop->GetHeader()->bbNum); + } + else + { + BasicBlock* const lexicalTopBlock = loop->GetLexicallyTopMostBlock(); + BasicBlock* const lexicalBottomBlock = loop->GetLexicallyBottomMostBlock(); + BasicBlock* const lexicalEndIteration = lexicalBottomBlock->Next(); + unsigned numLexicalBlocks = 0; + + // Count the number of loop blocks found in the identified lexical range. If there are non-loop blocks + // found, or if we don't find all the loop blocks in the lexical walk (meaning the bbNums might not be + // properly ordered), we fail. + bool lexicallyDense = true; // assume the best + bool lexicalRangeContainsAllLoopBlocks = true; // assume the best + for (BasicBlock* block = lexicalTopBlock; (block != nullptr) && (block != lexicalEndIteration); + block = block->Next()) + { + if (!loop->ContainsBlock(block)) + { + lexicallyDense = false; + } + else + { + ++numLexicalBlocks; + } + } + if (numBlocks != numLexicalBlocks) + { + lexicalRangeContainsAllLoopBlocks = false; + } + + if (lexicallyDense && lexicalRangeContainsAllLoopBlocks) + { + // This is just an optimization over the next case (`!lexicallyDense`) as there's no need to + // loop over the blocks again. + printf("[" FMT_BB ".." FMT_BB "]", lexicalTopBlock->bbNum, lexicalBottomBlock->bbNum); + } + else if (lexicalRangeContainsAllLoopBlocks) + { + // The lexical range from top to bottom contains all the loop blocks, but also contains some + // non-loop blocks. Try to display the blocks in groups of ranges, to avoid dumping all the + // blocks individually. + BasicBlock* firstInRange = nullptr; + BasicBlock* lastInRange = nullptr; + first = true; + auto printRange = [&]() { + // Dump current range if there is one; reset firstInRange. + if (firstInRange == nullptr) + { + return; + } + if (!first) + { + printf(";"); + } + if (firstInRange == lastInRange) + { + // Just one block in range + printf(FMT_BB, firstInRange->bbNum); + } + else + { + printf("[" FMT_BB ".." FMT_BB "]", firstInRange->bbNum, lastInRange->bbNum); + } + firstInRange = lastInRange = nullptr; + first = false; + }; + for (BasicBlock* block = lexicalTopBlock; block != lexicalEndIteration; block = block->Next()) + { + if (!loop->ContainsBlock(block)) + { + printRange(); + } + else + { + if (firstInRange == nullptr) + { + firstInRange = block; + } + lastInRange = block; + } + } + printRange(); + } + else + { + // We didn't see all the loop blocks in the lexical range; maybe the `bbNum` order is + // not well ordered such that `top` and `bottom` are not first/last in `bbNext` order. + // Just dump all the blocks individually using the loop block visitor. + first = true; + loop->VisitLoopBlocksReversePostOrder([&first](BasicBlock* block) { + printf("%s" FMT_BB, first ? "" : ";", block->bbNum); + first = false; + return BasicBlockVisit::Continue; + }); + + // Print out the lexical top and bottom blocks, which will explain why we didn't print ranges. + printf("\n Lexical top: " FMT_BB, lexicalTopBlock->bbNum); + printf("\n Lexical bottom: " FMT_BB, lexicalBottomBlock->bbNum); + } + } + + // Dump Entry Edges, Back Edges, Exit Edges + + printf("\n Entry: "); + if (loop->EntryEdges().size() == 0) + { + printf("NONE"); + } + else + { + first = true; + for (FlowEdge* const edge : loop->EntryEdges()) + { + printf("%s" FMT_BB " -> " FMT_BB, first ? "" : "; ", edge->getSourceBlock()->bbNum, + loop->GetHeader()->bbNum); + first = false; + } + } + + printf("\n Exit: "); + if (loop->ExitEdges().size() == 0) + { + printf("NONE"); + } + else + { + first = true; + for (FlowEdge* const edge : loop->ExitEdges()) + { + BasicBlock* const exitingBlock = edge->getSourceBlock(); + printf("%s" FMT_BB " ->", first ? "" : "; ", exitingBlock->bbNum); + exitingBlock->VisitRegularSuccs(comp, [=](BasicBlock* succ) { + if (comp->fgGetPredForBlock(succ, exitingBlock) == edge) + { + printf(" " FMT_BB, succ->bbNum); + } + return BasicBlockVisit::Continue; + }); + first = false; + } + } + + printf("\n Back: "); + if (loop->BackEdges().size() == 0) + { + printf("NONE"); + } + else + { + first = true; + for (FlowEdge* const edge : loop->BackEdges()) + { + printf("%s" FMT_BB " -> " FMT_BB, first ? "" : "; ", edge->getSourceBlock()->bbNum, + loop->GetHeader()->bbNum); + first = false; + } + } + + printf("\n"); +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::Dump: print the loops to the JitDump +// +/* static */ +void FlowGraphNaturalLoops::Dump(FlowGraphNaturalLoops* loops) +{ + printf("\n*************** (New) Natural loop graph\n"); + + if (loops == nullptr) + { + printf("loops is nullptr\n"); + } + else if (loops->NumLoops() == 0) + { + printf("No loops\n"); + } + else + { + for (FlowGraphNaturalLoop* loop : loops->InReversePostOrder()) + { + FlowGraphNaturalLoop::Dump(loop); + } + } + + printf("\n"); +} + +#endif // DEBUG + //------------------------------------------------------------------------ // FlowGraphNaturalLoop::VisitDefs: Visit all definitions contained in the // loop. diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 30218fd0da0cda..5557d299a964d0 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -5495,7 +5495,7 @@ void Compiler::optFindNewLoops() { m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); - m_newToOldLoop = m_loops->NumLoops() == 0 ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); + m_newToOldLoop = (m_loops->NumLoops() == 0) ? nullptr : (new (this, CMK_Loops) LoopDsc*[m_loops->NumLoops()]{}); m_oldToNewLoop = new (this, CMK_Loops) FlowGraphNaturalLoop*[BasicBlock::MAX_LOOP_NUM]{}; // Unnatural loops can quickly become natural if we manage to remove some