Skip to content

Commit

Permalink
JIT: Extend jump tables (dotnet#107831)
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo authored Sep 19, 2024
1 parent d6094d2 commit a35f722
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 27 deletions.
8 changes: 4 additions & 4 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5170,14 +5170,14 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);

// Optimize block order
//
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);

// Conditional to Switch conversion
//
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optSwitchRecognition);

// Optimize block order
//
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);

// Determine start of cold region if we are hot/cold splitting
//
DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7053,6 +7053,7 @@ class Compiler
PhaseStatus optSwitchRecognition();
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
bool optExtendSwitch(BasicBlock* block);

PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication
Expand Down Expand Up @@ -10509,6 +10510,7 @@ class Compiler
STRESS_MODE(IF_CONVERSION_INNER_LOOPS) \
STRESS_MODE(POISON_IMPLICIT_BYREFS) \
STRESS_MODE(STORE_BLOCK_UNROLLING) \
STRESS_MODE(DONT_LIMIT_JUMP_TABLE) \
STRESS_MODE(COUNT)

enum compStressArea
Expand Down
265 changes: 242 additions & 23 deletions src/coreclr/jit/switchrecognition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,33 @@
//
PhaseStatus Compiler::optSwitchRecognition()
{
// Limit to XARCH, ARM is already doing a great job with such comparisons using
// a series of ccmp instruction (see ifConvert phase).
#ifdef TARGET_XARCH
bool modified = false;
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
{
// block->KindIs(BBJ_COND) check is for better throughput.
#ifdef TARGET_XARCH
// Limit to XARCH, ARM is already doing a great job with such comparisons using
// a series of ccmp instruction (see ifConvert phase).
if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block))
{
JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum)
modified = true;
}
#endif

// See if we can merge BBJ_COND into an existing switch block
while (block->KindIs(BBJ_SWITCH) && optExtendSwitch(block))
{
modified = true;
}
}

if (modified)
{
// Remove unreachable blocks
fgUpdateFlowGraph();
fgRenumberBlocks();
return PhaseStatus::MODIFIED_EVERYTHING;
}
#endif
return PhaseStatus::MODIFIED_NOTHING;
}

Expand All @@ -63,8 +70,8 @@ PhaseStatus Compiler::optSwitchRecognition()
//
bool IsConstantTestCondBlock(const BasicBlock* block,
bool allowSideEffects,
BasicBlock** trueTarget,
BasicBlock** falseTarget,
FlowEdge** trueEdge,
FlowEdge** falseEdge,
bool* isReversed,
GenTree** variableNode = nullptr,
ssize_t* cns = nullptr)
Expand Down Expand Up @@ -102,9 +109,9 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
return false;
}

*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
*trueTarget = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget();
*falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget();
*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
*trueEdge = *isReversed ? block->GetFalseEdge() : block->GetTrueEdge();
*falseEdge = *isReversed ? block->GetTrueEdge() : block->GetFalseEdge();

if (block->FalseTargetIs(block) || block->TrueTargetIs(block))
{
Expand Down Expand Up @@ -132,6 +139,215 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
return false;
}

//------------------------------------------------------------------------------
// GetSwitchValueOp: Try to extract BBJ_SWITCH's variable (and the offset)
//
// Arguments:
// switchBlock - BBJ_SWITCH block to get the switch value from
// variableNode - [out] Switch value node
// offset - [out] Offset of the switch value
//
// Return Value:
// True if switch value was successfully extracted, false otherwise
//
static bool GetSwitchValueOp(const BasicBlock* switchBlock, GenTree** variableNode, ssize_t* offset)
{
assert(switchBlock->KindIs(BBJ_SWITCH));

GenTree* switchNode = switchBlock->lastStmt()->GetRootNode()->gtEffectiveVal();
assert(switchNode->OperIs(GT_SWITCH));

GenTree* switchOp = switchNode->gtGetOp1();

// In most cases the switch value is either ADD(LCL_VAR, CNS) or LCL_VAR
if (switchOp->OperIs(GT_ADD))
{
// We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before
// any other case node in the switch block anyway.
GenTree* op1 = switchOp->gtGetOp1()->gtEffectiveVal();
GenTree* op2 = switchOp->gtGetOp2();

// TODO-JumpTable: We should be able to accept any side-effect-free op1 here
if (op1->OperIs(GT_LCL_VAR) && op2->IsCnsIntOrI())
{
*variableNode = op1;
*offset = op2->AsIntCon()->IconValue();
return true;
}
}
else
{
// We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before
// any other case node in the switch block anyway.
GenTree* op1 = switchOp->gtEffectiveVal();
if (op1->OperIs(GT_LCL_VAR))
{
*variableNode = op1;
*offset = 0;
return true;
}
}
return false;
}

//------------------------------------------------------------------------------
// optExtendSwitch: If one of the switch's targets is a value test block,
// and the switch's variable is the same as the value test's variable - try to merge
// that test (BBJ_COND) into the switch block. It's useful to assist Roslyn, which
// uses fairly conservative heuristics to generate switch blocks and may give up on
// some tests that could be merged into the switch.
//
// Arguments:
// block - BBJ_SWITCH block to extend
//
// Return Value:
// True if switch was successfully extended, false otherwise
//
bool Compiler::optExtendSwitch(BasicBlock* block)
{
assert(block->KindIs(BBJ_SWITCH));
JITDUMP("Considering expanding switch " FMT_BB "\n", block->bbNum);

if (opts.IsInstrumented())
{
JITDUMP("Instrumentation is enabled - bail out.\n");
return false;
}

assert(opts.OptimizationEnabled());

GenTree* switchOp = nullptr;
ssize_t switchTargetOffset = 0;
if (!GetSwitchValueOp(block, &switchOp, &switchTargetOffset))
{
JITDUMP("Couldn't extract the switch's variable - bail out.\n");
return false;
}

// TODO-JumpTable: Currently, we only expand the default target, but it should be possible
// to traverse all non-default edges as well (if they point to similar value tests).
// It shouldn't be too hard to implement (might be TP costly, though).

BBswtDesc* switchTargets = block->GetSwitchTargets();
if (!switchTargets->bbsHasDefault)
{
JITDUMP("Switch doesn't have a default target - bail out.\n");
return false;
}

// Now let's see if we can merge the value test in the edgeToExpand's destination block
// with the switch block.
FlowEdge* edgeToExpand = switchTargets->getDefault();
BasicBlock* edgeToExpandDst = edgeToExpand->getDestinationBlock();

// IsConstantTestCondBlock will tell us if the default target is a value test block (e.g. "x == cns")
// TODO-JumpTable: Add support for more complex tests like "x > 10 && x < 20"
// Also, we should be able to merge two BBJ_SWITCH on the same variable. Roslyn may create such shapes.
FlowEdge* testPasses;
FlowEdge* testFails;
bool isReversed;
GenTree* variableNode;
ssize_t cns;
if (!IsConstantTestCondBlock(edgeToExpandDst, false, &testPasses, &testFails, &isReversed, &variableNode, &cns) ||
!edgeToExpandDst->hasSingleStmt())
{
JITDUMP("The default target is not performing a value test - bail out.\n");
return false;
}

BasicBlock* testPassesBb = testPasses->getDestinationBlock();
BasicBlock* testFailsBb = testFails->getDestinationBlock();

if (!GenTree::Compare(variableNode, switchOp))
{
JITDUMP("The default target is performing a value test on a different variable - bail out.\n");
return false;
}

// We're less conservative than Roslyn, but we still have some limits (unless stress mode is enabled)

if ((static_cast<size_t>(cns + switchTargetOffset) > SWITCH_MAX_DISTANCE) &&
!compStressCompile(STRESS_DONT_LIMIT_JUMP_TABLE, 50))
{
JITDUMP("Switch value is out of range - bail out.\n");
return false;
}

// We need to normalize the test value to the switch's range
const ssize_t newTestValue = cns + switchTargetOffset;

unsigned oldJumpCnt = switchTargets->bbsCount;
unsigned newJumpCnt = max(oldJumpCnt, (unsigned)(newTestValue + 1 + (switchTargets->bbsHasDefault ? 1 : 0)));
FlowEdge** oldJumpTab = switchTargets->bbsDstTab;
FlowEdge** newJumpTab = new (this, CMK_FlowEdge) FlowEdge*[newJumpCnt];

//
// Update likelihoods
//
weight_t edgeToExpandLikelihood = edgeToExpand->getLikelihood();
weight_t newTestLikelihood = edgeToExpandLikelihood * testPasses->getLikelihood();
weight_t newDefLikelihood = edgeToExpandLikelihood * testFails->getLikelihood();

// However, it is tricky if testPasses or testFails are already presented in the jump table.
// Switch block currently doesn't support different likelihoods for edges pointing to the same block.
// So we need to adjust the existing likelihoods.
for (unsigned i = 0; i < oldJumpCnt; i++)
{
BasicBlock* edgeDstBb = oldJumpTab[i]->getDestinationBlock();
weight_t edgeLikelihood = oldJumpTab[i]->getLikelihood();

if (edgeDstBb == testPassesBb)
{
newTestLikelihood = edgeLikelihood + edgeToExpandLikelihood * testPasses->getLikelihood();
}
if (edgeDstBb == testFailsBb)
{
newDefLikelihood = edgeLikelihood + edgeToExpandLikelihood * testFails->getLikelihood();
}
}

//
// Create the new jump table
//
for (unsigned i = 0; i < newJumpCnt; i++)
{
if (i < oldJumpCnt)
{
// Not only the default case can point to edgeToExpandDst
if (oldJumpTab[i]->getDestinationBlock() == edgeToExpandDst)
{
fgRemoveRefPred(oldJumpTab[i]);
}
else
{
// Just copy the old edge
newJumpTab[i] = oldJumpTab[i];
continue;
}
}

if (i == (unsigned)newTestValue)
{
newJumpTab[i] = fgAddRefPred(testPassesBb, block);
newJumpTab[i]->setLikelihood(newTestLikelihood);
}
else
{
newJumpTab[i] = fgAddRefPred(testFailsBb, block);
newJumpTab[i]->setLikelihood(newDefLikelihood);
}
}

switchTargets->bbsCount = newJumpCnt;
switchTargets->bbsDstTab = newJumpTab;

// m_switchDescMap cache is invalidated by this change, so it has to be recomputed
InvalidateUniqueSwitchSuccMap();

JITDUMP("Successfully merged the default target's value test with the switch.\n");
return true;
}

//------------------------------------------------------------------------------
// optSwitchDetectAndConvert : Try to detect a series of conditional blocks which
// can be converted into a switch (jump-table) construct. See optSwitchConvert
Expand All @@ -147,10 +363,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
{
assert(firstBlock->KindIs(BBJ_COND));

GenTree* variableNode = nullptr;
ssize_t cns = 0;
BasicBlock* trueTarget = nullptr;
BasicBlock* falseTarget = nullptr;
GenTree* variableNode = nullptr;
ssize_t cns = 0;
FlowEdge* trueTarget = nullptr;
FlowEdge* falseTarget = nullptr;

// The algorithm is simple - we check that the given block is a constant test block
// and then try to accumulate as many constant test blocks as possible. Once we hit
Expand Down Expand Up @@ -180,10 +396,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
// Now walk the next blocks and see if they are basically the same type of test
for (const BasicBlock* currBb = firstBlock->Next(); currBb != nullptr; currBb = currBb->Next())
{
GenTree* currVariableNode = nullptr;
ssize_t currCns = 0;
BasicBlock* currTrueTarget = nullptr;
BasicBlock* currFalseTarget = nullptr;
GenTree* currVariableNode = nullptr;
ssize_t currCns = 0;
FlowEdge* currTrueTarget = nullptr;
FlowEdge* currFalseTarget = nullptr;

if (!currBb->hasSingleStmt())
{
Expand All @@ -196,7 +412,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
if (IsConstantTestCondBlock(currBb, false, &currTrueTarget, &currFalseTarget, &isReversed,
&currVariableNode, &currCns))
{
if (currTrueTarget != trueTarget)
if (currTrueTarget->getDestinationBlock() != trueTarget->getDestinationBlock())
{
// This blocks jumps to a different target, stop searching and process what we already have.
return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
Expand Down Expand Up @@ -329,12 +545,15 @@ bool Compiler::optSwitchConvert(
lastBlock = lastBlock->Next();
}

BasicBlock* blockIfTrue = nullptr;
BasicBlock* blockIfFalse = nullptr;
bool isReversed = false;
const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed);
FlowEdge* blockIfTrueEdge = nullptr;
FlowEdge* blockIfFalseEdge = nullptr;
bool isReversed = false;
const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrueEdge, &blockIfFalseEdge, &isReversed);
assert(isTest);

BasicBlock* blockIfTrue = blockIfTrueEdge->getDestinationBlock();
BasicBlock* blockIfFalse = blockIfFalseEdge->getDestinationBlock();

assert(firstBlock->TrueTargetIs(blockIfTrue));
FlowEdge* const trueEdge = firstBlock->GetTrueEdge();
FlowEdge* const falseEdge = firstBlock->GetFalseEdge();
Expand Down

0 comments on commit a35f722

Please sign in to comment.