Skip to content

Commit

Permalink
StoreIND/Store_BLK/Store_OBJ improvements. (dotnet#38316)
Browse files Browse the repository at this point in the history
* Arm64: support contained `GT_LCL_VAR/FLD_ADDR` in indir.

Support contained `GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR` under `IND, STOREIND`.

* XARCH: Support contained `GT_LCL_FLD_ADDR` in IND.

Support contained `GT_LCL_FLD_ADDR` under `GT_STORE_IND, GT_IND`.

* Clear `GTF_IND_ASG_LHS` after Rationalize for STORE_OBJ/BLK.

We were doing this for `STOREIND`, but forgetting for `STORE_OBJ/BLK`.

* Extract `LowerStoreIndirCommon`, `LowerIndir`.

Extract without changes, I will need to add additional calls to them later.

* Call the extracted funtions.

Fix a few regressions with `NoRetyping`.

* Create `LEA` of comples addr expr.

Gives a few improvements when addr has an index.

* Extract `LowerBlockStoreCommon`.

* Tranform STORE_BLK/OBJ into STOREIND.

When it is possible and profitable.

* Don't tranform for GC and small types.

For GC types we were not trying to contain addr(why?) when needed a barrier and it looked dangerous for me to do such a change(for 5.0).

For small types we were generating `movzx rax` for `IND byte` instead of `mov ax` .

* Update src/coreclr/src/jit/lower.cpp

Co-authored-by: Carol Eidt <[email protected]>

* Use `TODO-CQ`.

* Extract `Rationalizer::RewriteIndir`.

* Fix code review.

Co-authored-by: Carol Eidt <[email protected]>
  • Loading branch information
Sergey Andreenko and CarolEidt authored Jun 28, 2020
1 parent 4ef07c2 commit a2f1b58
Show file tree
Hide file tree
Showing 8 changed files with 309 additions and 118 deletions.
20 changes: 19 additions & 1 deletion src/coreclr/src/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13244,7 +13244,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR

if (addr->isContained())
{
assert(addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA));

int offset = 0;
DWORD lsl = 0;
Expand Down Expand Up @@ -13327,6 +13327,24 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
regNumber addrReg = indir->GetSingleTempReg();
emitIns_R_C(ins, attr, dataReg, addrReg, addr->AsClsVar()->gtClsVarHnd, 0);
}
else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
{
GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
unsigned lclNum = varNode->GetLclNum();
unsigned offset = 0;
if (addr->OperIs(GT_LCL_FLD_ADDR))
{
offset = varNode->AsLclFld()->GetLclOffs();
}
if (emitInsIsStore(ins))
{
emitIns_S_R(ins, attr, dataReg, lclNum, offset);
}
else
{
emitIns_R_S(ins, attr, dataReg, lclNum, offset);
}
}
else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet())))
{
// Then load/store dataReg from/to [memBase + offset]
Expand Down
20 changes: 15 additions & 5 deletions src/coreclr/src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2954,10 +2954,15 @@ void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, G
return;
}

if (addr->OperGet() == GT_LCL_VAR_ADDR)
if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
{
GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), 0);
unsigned offset = 0;
if (addr->OperIs(GT_LCL_FLD_ADDR))
{
offset = varNode->AsLclFld()->GetLclOffs();
}
emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), offset);

// Updating variable liveness after instruction was emitted
codeGen->genUpdateLife(varNode);
Expand Down Expand Up @@ -3006,17 +3011,22 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m
return;
}

if (addr->OperGet() == GT_LCL_VAR_ADDR)
if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
{
GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
unsigned offset = 0;
if (addr->OperIs(GT_LCL_FLD_ADDR))
{
offset = varNode->AsLclFld()->GetLclOffs();
}
if (data->isContainedIntOrIImmed())
{
emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
emitIns_S_I(ins, attr, varNode->GetLclNum(), offset, (int)data->AsIntConCommon()->IconValue());
}
else
{
assert(!data->isContained());
emitIns_S_R(ins, attr, data->GetRegNum(), varNode->GetLclNum(), 0);
emitIns_S_R(ins, attr, data->GetRegNum(), varNode->GetLclNum(), offset);
}

// Updating variable liveness after instruction was emitted
Expand Down
254 changes: 195 additions & 59 deletions src/coreclr/src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,64 +113,11 @@ GenTree* Lowering::LowerNode(GenTree* node)
{
case GT_NULLCHECK:
case GT_IND:
// Process struct typed indirs separately unless they are unused;
// they only appear as the source of a block copy operation or a return node.
if (!node->TypeIs(TYP_STRUCT) || node->IsUnusedValue())
{
// TODO-Cleanup: We're passing isContainable = true but ContainCheckIndir rejects
// address containment in some cases so we end up creating trivial (reg + offfset)
// or (reg + reg) LEAs that are not necessary.
TryCreateAddrMode(node->AsIndir()->Addr(), true);
ContainCheckIndir(node->AsIndir());

if (node->OperIs(GT_NULLCHECK) || node->IsUnusedValue())
{
// A nullcheck is essentially the same as an indirection with no use.
// The difference lies in whether a target register must be allocated.
// On XARCH we can generate a compare with no target register as long as the addresss
// is not contained.
// On ARM64 we can generate a load to REG_ZR in all cases.
// However, on ARM we must always generate a load to a register.
// In the case where we require a target register, it is better to use GT_IND, since
// GT_NULLCHECK is a non-value node and would therefore require an internal register
// to use as the target. That is non-optimal because it will be modeled as conflicting
// with the source register(s).
// So, to summarize:
// - On ARM64, always use GT_NULLCHECK for a dead indirection.
// - On ARM, always use GT_IND.
// - On XARCH, use GT_IND if we have a contained address, and GT_NULLCHECK otherwise.
// In all cases, change the type to TYP_INT.
//
node->gtType = TYP_INT;
#ifdef TARGET_ARM64
bool useNullCheck = true;
#elif TARGET_ARM
bool useNullCheck = false;
#else // TARGET_XARCH
bool useNullCheck = !node->AsIndir()->Addr()->isContained();
#endif // !TARGET_XARCH

if (useNullCheck && node->OperIs(GT_IND))
{
node->ChangeOper(GT_NULLCHECK);
node->ClearUnusedValue();
}
else if (!useNullCheck && node->OperIs(GT_NULLCHECK))
{
node->ChangeOper(GT_IND);
node->SetUnusedValue();
}
}
}
LowerIndir(node->AsIndir());
break;

case GT_STOREIND:
assert(node->TypeGet() != TYP_STRUCT);
TryCreateAddrMode(node->AsIndir()->Addr(), true);
if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(node))
{
LowerStoreIndir(node->AsIndir());
}
LowerStoreIndirCommon(node->AsIndir());
break;

case GT_ADD:
Expand Down Expand Up @@ -303,7 +250,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
}
__fallthrough;
case GT_STORE_DYN_BLK:
LowerBlockStore(node->AsBlk());
LowerBlockStoreCommon(node->AsBlk());
break;

case GT_LCLHEAP:
Expand Down Expand Up @@ -3168,7 +3115,7 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
objStore->SetAddr(addr);
objStore->SetData(src);
BlockRange().InsertBefore(objStore, addr);
LowerBlockStore(objStore);
LowerBlockStoreCommon(objStore);
return;
}
}
Expand Down Expand Up @@ -3245,6 +3192,7 @@ void Lowering::LowerRetStruct(GenTreeUnOp* ret)
__fallthrough;
case GT_IND:
retVal->ChangeType(nativeReturnType);
LowerIndir(retVal->AsIndir());
break;

case GT_LCL_VAR:
Expand Down Expand Up @@ -3475,7 +3423,8 @@ void Lowering::LowerStoreCallStruct(GenTreeBlk* store)
{
store->ChangeType(regType);
store->SetOper(GT_STOREIND);
LowerStoreIndir(store->AsIndir());
LowerStoreIndirCommon(store);
return;
}
else
{
Expand All @@ -3492,7 +3441,7 @@ void Lowering::LowerStoreCallStruct(GenTreeBlk* store)

GenTreeLclVar* spilledCall = SpillStructCallResult(call);
store->SetData(spilledCall);
LowerBlockStore(store);
LowerBlockStoreCommon(store);
#endif // WINDOWS_AMD64_ABI
}
}
Expand Down Expand Up @@ -6351,3 +6300,190 @@ void Lowering::ContainCheckBitCast(GenTree* node)
op1->SetContained();
}
}

//------------------------------------------------------------------------
// LowerStoreIndirCommon: a common logic to lower StoreIndir.
//
// Arguments:
// ind - the store indirection node we are lowering.
//
void Lowering::LowerStoreIndirCommon(GenTreeIndir* ind)
{
assert(ind->OperIs(GT_STOREIND));
assert(ind->TypeGet() != TYP_STRUCT);
TryCreateAddrMode(ind->Addr(), true);
if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind))
{
LowerStoreIndir(ind);
}
}

//------------------------------------------------------------------------
// LowerIndir: a common logic to lower IND load or NullCheck.
//
// Arguments:
// ind - the ind node we are lowering.
//
void Lowering::LowerIndir(GenTreeIndir* ind)
{
assert(ind->OperIs(GT_IND, GT_NULLCHECK));
// Process struct typed indirs separately unless they are unused;
// they only appear as the source of a block copy operation or a return node.
if (!ind->TypeIs(TYP_STRUCT) || ind->IsUnusedValue())
{
// TODO-Cleanup: We're passing isContainable = true but ContainCheckIndir rejects
// address containment in some cases so we end up creating trivial (reg + offfset)
// or (reg + reg) LEAs that are not necessary.
TryCreateAddrMode(ind->Addr(), true);
ContainCheckIndir(ind);

if (ind->OperIs(GT_NULLCHECK) || ind->IsUnusedValue())
{
// A nullcheck is essentially the same as an indirection with no use.
// The difference lies in whether a target register must be allocated.
// On XARCH we can generate a compare with no target register as long as the addresss
// is not contained.
// On ARM64 we can generate a load to REG_ZR in all cases.
// However, on ARM we must always generate a load to a register.
// In the case where we require a target register, it is better to use GT_IND, since
// GT_NULLCHECK is a non-value node and would therefore require an internal register
// to use as the target. That is non-optimal because it will be modeled as conflicting
// with the source register(s).
// So, to summarize:
// - On ARM64, always use GT_NULLCHECK for a dead indirection.
// - On ARM, always use GT_IND.
// - On XARCH, use GT_IND if we have a contained address, and GT_NULLCHECK otherwise.
// In all cases, change the type to TYP_INT.
//
ind->gtType = TYP_INT;
#ifdef TARGET_ARM64
bool useNullCheck = true;
#elif TARGET_ARM
bool useNullCheck = false;
#else // TARGET_XARCH
bool useNullCheck = !ind->Addr()->isContained();
#endif // !TARGET_XARCH

if (useNullCheck && ind->OperIs(GT_IND))
{
ind->ChangeOper(GT_NULLCHECK);
ind->ClearUnusedValue();
}
else if (!useNullCheck && ind->OperIs(GT_NULLCHECK))
{
ind->ChangeOper(GT_IND);
ind->SetUnusedValue();
}
}
}
else
{
// If the `ADDR` node under `STORE_OBJ(dstAddr, IND(struct(ADDR))`
// is a complex one it could benefit from an `LEA` that is not contained.
const bool isContainable = false;
TryCreateAddrMode(ind->Addr(), isContainable);
}
}

//------------------------------------------------------------------------
// LowerBlockStoreCommon: a common logic to lower STORE_OBJ/BLK/DYN_BLK.
//
// Arguments:
// blkNode - the store blk/obj node we are lowering.
//
void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode)
{
assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK, GT_STORE_OBJ));
if (TryTransformStoreObjAsStoreInd(blkNode))
{
return;
}

LowerBlockStore(blkNode);
}

//------------------------------------------------------------------------
// TryTransformStoreObjAsStoreInd: try to replace STORE_OBJ/BLK as STOREIND.
//
// Arguments:
// blkNode - the store node.
//
// Return value:
// true if the replacement was made, false otherwise.
//
// Notes:
// TODO-CQ: this method should do the transformation when possible
// and STOREIND should always generate better or the same code as
// STORE_OBJ/BLK for the same copy.
//
bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode)
{
assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK, GT_STORE_OBJ));
if (blkNode->OperIs(GT_STORE_DYN_BLK))
{
return false;
}

ClassLayout* layout = blkNode->GetLayout();
if (layout == nullptr)
{
return false;
}

var_types regType = layout->GetRegisterType();
if (regType == TYP_UNDEF)
{
return false;
}
if (varTypeIsSIMD(regType))
{
// TODO-CQ: support STORE_IND SIMD16(SIMD16, CNT_INT 0).
return false;
}

if (varTypeIsGC(regType))
{
// TODO-CQ: STOREIND does not try to contain src if we need a barrier,
// STORE_OBJ generates better code currently.
return false;
}

GenTree* src = blkNode->Data();
if (src->OperIsInitVal() && !src->IsConstInitVal())
{
return false;
}

if (varTypeIsSmall(regType) && !src->IsConstInitVal())
{
// source operand INDIR will use a widening instruction
// and generate worse code, like `movzx` instead of `mov`
// on x64.
return false;
}

blkNode->ChangeOper(GT_STOREIND);
blkNode->ChangeType(regType);

if ((blkNode->gtFlags & GTF_IND_TGT_NOT_HEAP) == 0)
{
blkNode->gtFlags |= GTF_IND_TGTANYWHERE;
}

if (varTypeIsStruct(src))
{
src->ChangeType(regType);
LowerNode(blkNode->Data());
}
else if (src->OperIsInitVal())
{
GenTreeUnOp* initVal = src->AsUnOp();
src = src->gtGetOp1();
assert(src->IsCnsIntOrI());
src->AsIntCon()->FixupInitBlkValue(regType);
blkNode->SetData(src);
BlockRange().Remove(initVal);
}
LowerStoreIndirCommon(blkNode);
return true;
}
Loading

0 comments on commit a2f1b58

Please sign in to comment.