From d1352213b9e74f290cb001328ec9dc248575d2aa Mon Sep 17 00:00:00 2001 From: Eugene Rozenfeld Date: Fri, 4 Jan 2019 21:27:20 -0800 Subject: [PATCH] Improvements for object stack allocation. This change enables object stack allocation for more cases. 1. Objects with gc fields can now be stack-allocated. 2. Object stack allocation is enabled for x86. ObjectAllocator updates the types of trees containing references to possibly-stack-allocated objects to TYP_BYREF or TYP_I_IMPL as appropriate. That allows us to remove the hacks in gcencode.cpp and refine reporting of pointers: the pointer is not reported when we can prove that it always points to a stack-allocated object or is null (typed as TYP_I_IMPL); the pointer is reported as an interior pointer when it may point to either a stack-allocated object or a heap-allocated object (typed as TYP_BYREF); the pointer is reported as a normal pointer when it points to a heap-allocated object (typed as TYP_REF). ObjectAllocator also adds flags to indirections: GTF_IND_TGTANYWHERE when the indirection may be the heap or the stack (that results in checked write barriers used for writes) or the new GTF_IND_TGT_NOT_HEAP when the indirection is null or stack memory (that results in no barrier used for writes). Commit migrated from https://github.com/dotnet/coreclr/commit/f2d3dfeda5140ea003f0dae166cc62f0fb1d31d8 --- src/coreclr/src/jit/compiler.cpp | 4 + src/coreclr/src/jit/gcencode.cpp | 28 -- src/coreclr/src/jit/gcinfo.cpp | 6 + src/coreclr/src/jit/gentree.cpp | 11 + src/coreclr/src/jit/gentree.h | 10 +- src/coreclr/src/jit/jitgcinfo.h | 3 - src/coreclr/src/jit/morph.cpp | 4 - src/coreclr/src/jit/objectalloc.cpp | 265 ++++++++++++++++-- src/coreclr/src/jit/objectalloc.h | 67 ++++- src/coreclr/tests/issues.targets | 7 - .../ObjectStackAllocationTests.cs | 92 +++++- 11 files changed, 400 insertions(+), 97 deletions(-) diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index 40412903d4473..58197f9532540 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -9269,6 +9269,10 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree) { chars += printf("[IND_TGTANYWHERE]"); } + if (tree->gtFlags & GTF_IND_TGT_NOT_HEAP) + { + chars += printf("[IND_TGT_NOT_HEAP]"); + } if (tree->gtFlags & GTF_IND_TLS_REF) { chars += printf("[IND_TLS_REF]"); diff --git a/src/coreclr/src/jit/gcencode.cpp b/src/coreclr/src/jit/gcencode.cpp index af37304d48bd1..3d3cd88236bb3 100644 --- a/src/coreclr/src/jit/gcencode.cpp +++ b/src/coreclr/src/jit/gcencode.cpp @@ -4245,7 +4245,6 @@ void GCInfo::gcMakeRegPtrTable( // Or in byref_OFFSET_FLAG for 'byref' pointer tracking flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); } - gcUpdateFlagForStackAllocatedObjects(flags); if (varDsc->lvPinned) { @@ -4345,7 +4344,6 @@ void GCInfo::gcMakeRegPtrTable( { flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); } - gcUpdateFlagForStackAllocatedObjects(flags); GcStackSlotBase stackSlotBase = GC_SP_REL; if (compiler->isFramePointerUsed()) @@ -4730,7 +4728,6 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, { regFlags = (GcSlotFlags)(regFlags | GC_SLOT_INTERIOR); } - gcUpdateFlagForStackAllocatedObjects(regFlags); RegSlotIdKey rskey(regNum, regFlags); GcSlotId regSlotId; @@ -4821,7 +4818,6 @@ void GCInfo::gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode { flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); } - gcUpdateFlagForStackAllocatedObjects(flags); if ((lowBits & pinned_OFFSET_FLAG) != 0) { @@ -4925,30 +4921,6 @@ void GCInfo::gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder, } } -//------------------------------------------------------------------------ -// gcUpdateFlagForStackAllocatedObjects: Update the flags to handle a possibly stack-allocated object. -// allocation. -// Arguments: -// flags - flags to update -// -// -// Notes: -// TODO-ObjectStackAllocation: This is a temporary conservative implementation. -// Currently variables pointing to heap and/or stack allocated objects have type TYP_REF so we -// conservatively report them as INTERIOR. -// Ideally we should have the following types for object pointers: -// 1. TYP_I_IMPL for variables always pointing to stack-allocated objects (not reporting to GC) -// 2. TYP_REF for variables always pointing to heap-allocated objects (reporting as normal objects to GC) -// 3. TYP_BYREF for variables that may point to the stack or to the heap (reporting as interior objects to GC) - -void GCInfo::gcUpdateFlagForStackAllocatedObjects(GcSlotFlags& flags) -{ - if ((compiler->optMethodFlags & OMF_HAS_OBJSTACKALLOC) != 0) - { - flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR); - } -} - #undef GCENCODER_WITH_LOGGING #endif // !JIT32_GCENCODER diff --git a/src/coreclr/src/jit/gcinfo.cpp b/src/coreclr/src/jit/gcinfo.cpp index ed4c468de9b0a..14ca8a873f5e0 100644 --- a/src/coreclr/src/jit/gcinfo.cpp +++ b/src/coreclr/src/jit/gcinfo.cpp @@ -267,6 +267,12 @@ GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTree* tgt, GenTree // This case occurs for Span. return WBF_NoBarrier; } + if (tgt->gtFlags & GTF_IND_TGT_NOT_HEAP) + { + // This indirection is not from to the heap. + // This case occurs for stack-allocated objects. + return WBF_NoBarrier; + } return gcWriteBarrierFormFromTargetAddress(tgt->gtOp.gtOp1); case GT_LEA: diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index d9831113f21e0..46105791a62e5 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -9337,6 +9337,12 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, __in __in_z _ --msgLength; break; } + if (tree->gtFlags & GTF_IND_TGT_NOT_HEAP) + { + printf("s"); + --msgLength; + break; + } if (tree->gtFlags & GTF_IND_INVARIANT) { printf("#"); @@ -14439,6 +14445,11 @@ GenTree* Compiler::gtNewTempAssign( { ok = true; } + // 3) TYP_BYREF = TYP_REF when object stack allocation is enabled + else if (JitConfig.JitObjectStackAllocation() && (dstTyp == TYP_BYREF) && (valTyp == TYP_REF)) + { + ok = true; + } if (!ok) { diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 6a1fd4a9bb11c..e639d5400eda9 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -798,6 +798,7 @@ struct GenTree #define GTF_INX_REFARR_LAYOUT 0x20000000 // GT_INDEX #define GTF_INX_STRING_LAYOUT 0x40000000 // GT_INDEX -- this uses the special string array layout +#define GTF_IND_TGT_NOT_HEAP 0x80000000 // GT_IND -- the target is not on the heap #define GTF_IND_VOLATILE 0x40000000 // GT_IND -- the load or store must use volatile sematics (this is a nop on X86) #define GTF_IND_NONFAULTING 0x20000000 // Operations for which OperIsIndir() is true -- An indir that cannot fault. // Same as GTF_ARRLEN_NONFAULTING. @@ -818,7 +819,7 @@ struct GenTree #define GTF_IND_FLAGS \ (GTF_IND_VOLATILE | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | \ - GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX) + GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX | GTF_IND_TGT_NOT_HEAP) #define GTF_CLS_VAR_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE #define GTF_CLS_VAR_INITCLASS 0x20000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_FLD_INITCLASS @@ -1807,8 +1808,11 @@ struct GenTree while (node->gtOper == GT_COMMA) { node = node->gtGetOp2(); - assert(node->gtType == oldType); - node->gtType = newType; + if (node->gtType != newType) + { + assert(node->gtType == oldType); + node->gtType = newType; + } } } diff --git a/src/coreclr/src/jit/jitgcinfo.h b/src/coreclr/src/jit/jitgcinfo.h index 1d643e9c8277d..193e77c172ca4 100644 --- a/src/coreclr/src/jit/jitgcinfo.h +++ b/src/coreclr/src/jit/jitgcinfo.h @@ -231,9 +231,6 @@ class GCInfo regPtrDsc* genStackPtrFirst, regPtrDsc* genStackPtrLast); - // Update the flags for a stack allocated object - void gcUpdateFlagForStackAllocatedObjects(GcSlotFlags& flags); - #endif #if MEASURE_PTRTAB_SIZE diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 1cf4f37747542..5f675dd76b47b 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -16857,14 +16857,10 @@ void Compiler::fgMorph() // local variable allocation on the stack. ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS -// TODO-ObjectStackAllocation: Enable the optimization for architectures using -// JIT32_GCENCODER (i.e., x86). -#ifndef JIT32_GCENCODER if (JitConfig.JitObjectStackAllocation() && opts.OptimizationEnabled()) { objectAllocator.EnableObjectStackAllocation(); } -#endif // JIT32_GCENCODER objectAllocator.Run(); diff --git a/src/coreclr/src/jit/objectalloc.cpp b/src/coreclr/src/jit/objectalloc.cpp index 7b9fac8aa9e8d..1a8f779bb8ceb 100644 --- a/src/coreclr/src/jit/objectalloc.cpp +++ b/src/coreclr/src/jit/objectalloc.cpp @@ -49,6 +49,7 @@ void ObjectAllocator::DoPhase() if (didStackAllocate) { + ComputeStackObjectPointers(&m_bitVecTraits); RewriteUses(); } } @@ -66,18 +67,29 @@ void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum) } //------------------------------------------------------------------------------ -// IsLclVarEscaping : Check if the local variable has been marked as escaping. +// MarkLclVarAsPossiblyStackPointing : Mark local variable as possibly pointing +// to a stack-allocated object. // // // Arguments: -// lclNum - Local variable number +// lclNum - Possibly stack-object-pointing local variable number + +void ObjectAllocator::MarkLclVarAsPossiblyStackPointing(unsigned int lclNum) +{ + BitVecOps::AddElemD(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum); +} + +//------------------------------------------------------------------------------ +// MarkLclVarAsDefinitelyStackPointing : Mark local variable as definitely pointing +// to a stack-allocated object. // -// Return Value: -// True if the local variable has been marked as escaping; false otherwise +// +// Arguments: +// lclNum - Definitely stack-object-pointing local variable number -bool ObjectAllocator::IsLclVarEscaping(unsigned int lclNum) +void ObjectAllocator::MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum) { - return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum); + BitVecOps::AddElemD(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum); } //------------------------------------------------------------------------------ @@ -163,7 +175,7 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph() if (m_allocator->CanLclVarEscapeViaParentStack(&m_ancestors, lclNum)) { - if (!m_allocator->IsLclVarEscaping(lclNum)) + if (!m_allocator->CanLclVarEscape(lclNum)) { JITDUMP("V%02u first escapes via [%06u]\n", lclNum, m_compiler->dspTreeID(tree)); } @@ -178,7 +190,7 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph() { var_types type = comp->lvaTable[lclNum].TypeGet(); - if (type == TYP_REF || type == TYP_I_IMPL || type == TYP_BYREF) + if (type == TYP_REF || genActualType(type) == TYP_I_IMPL || type == TYP_BYREF) { m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::MakeEmpty(&m_bitVecTraits); @@ -230,18 +242,78 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e while (iterator.NextElem(&lclNum)) { - doOneMoreIteration = true; - - // newEscapingNodes = adjacentNodes[lclNum] - BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclNum]); - // newEscapingNodes = newEscapingNodes \ escapingNodes - BitVecOps::DiffD(bitVecTraits, newEscapingNodes, escapingNodes); - // escapingNodesToProcess = escapingNodesToProcess U newEscapingNodes - BitVecOps::UnionD(bitVecTraits, escapingNodesToProcess, newEscapingNodes); - // escapingNodes = escapingNodes U newEscapingNodes - BitVecOps::UnionD(bitVecTraits, escapingNodes, newEscapingNodes); - // escapingNodesToProcess = escapingNodesToProcess \ { lclNum } - BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclNum); + if (m_ConnGraphAdjacencyMatrix[lclNum] != nullptr) + { + doOneMoreIteration = true; + + // newEscapingNodes = adjacentNodes[lclNum] + BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclNum]); + // newEscapingNodes = newEscapingNodes \ escapingNodes + BitVecOps::DiffD(bitVecTraits, newEscapingNodes, escapingNodes); + // escapingNodesToProcess = escapingNodesToProcess U newEscapingNodes + BitVecOps::UnionD(bitVecTraits, escapingNodesToProcess, newEscapingNodes); + // escapingNodes = escapingNodes U newEscapingNodes + BitVecOps::UnionD(bitVecTraits, escapingNodes, newEscapingNodes); + // escapingNodesToProcess = escapingNodesToProcess \ { lclNum } + BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclNum); + } + } + } +} + +//------------------------------------------------------------------------------ +// ComputeStackObjectPointers : Given an initial set of possibly stack-pointing nodes, +// and an initial set of definitely stack-pointing nodes, +// update both sets by computing nodes reachable from the +// given set in the reverse connection graph. +// +// Arguments: +// bitVecTraits - Bit vector traits + +void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits) +{ + bool changed = true; + + while (changed) + { + changed = false; + for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum) + { + LclVarDsc* lclVarDsc = comp->lvaTable + lclNum; + var_types type = lclVarDsc->TypeGet(); + + if (type == TYP_REF || type == TYP_I_IMPL || type == TYP_BYREF) + { + if (!MayLclVarPointToStack(lclNum) && + !BitVecOps::IsEmptyIntersection(bitVecTraits, m_PossiblyStackPointingPointers, + m_ConnGraphAdjacencyMatrix[lclNum])) + { + // We discovered a new pointer that may point to the stack. + MarkLclVarAsPossiblyStackPointing(lclNum); + + // Check if this pointer always points to the stack. + if (lclVarDsc->lvSingleDef == 1) + { + // Check if we know what is assigned to this pointer. + unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]); + assert(bitCount <= 1); + if (bitCount == 1) + { + BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]); + unsigned rhsLclNum = 0; + iter.NextElem(&rhsLclNum); + + if (DoesLclVarPointToStack(rhsLclNum)) + { + // The only assignment to lclNum local is definitely-stack-pointing + // rhsLclNum local so lclNum local is also definitely-stack-pointing. + MarkLclVarAsDefinitelyStackPointing(lclNum); + } + } + } + changed = true; + } + } } } } @@ -258,7 +330,9 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e bool ObjectAllocator::MorphAllocObjNodes() { - bool didStackAllocate = false; + bool didStackAllocate = false; + m_PossiblyStackPointingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); + m_DefinitelyStackPointingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits); BasicBlock* block; @@ -321,6 +395,10 @@ bool ObjectAllocator::MorphAllocObjNodes() const unsigned int stackLclNum = MorphAllocObjNodeIntoStackAlloc(asAllocObj, block, stmt); m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum); + // We keep the set of possibly-stack-pointing pointers as a superset of the set of + // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets. + MarkLclVarAsDefinitelyStackPointing(lclNum); + MarkLclVarAsPossiblyStackPointing(lclNum); stmt->gtStmtExpr->gtBashToNOP(); comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC; didStackAllocate = true; @@ -623,6 +701,119 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack* parent return canLclVarEscapeViaParentStack; } +//------------------------------------------------------------------------ +// UpdateAncestorTypes: Update types of some ancestor nodes of a possibly-stack-pointing +// tree from TYP_REF to TYP_BYREF or TYP_I_IMPL. +// +// Arguments: +// tree - Possibly-stack-pointing tree +// parentStack - Parent stack of the possibly-stack-pointing tree +// newType - New type of the possibly-stack-pointing tree +// +// Notes: +// If newType is TYP_I_IMPL, the tree is definitely pointing to the stack (or is null); +// if newType is TYP_BYREF, the tree may point to the stack. +// In addition to updating types this method may set GTF_IND_TGTANYWHERE +// or GTF_IND_TGT_NOT_HEAP on ancestor indirections to help codegen +// with write barrier selection. + +void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* parentStack, var_types newType) +{ + assert(newType == TYP_BYREF || newType == TYP_I_IMPL); + assert(parentStack != nullptr); + int parentIndex = 1; + + bool keepChecking = true; + + while (keepChecking && (parentStack->Height() > parentIndex)) + { + GenTree* parent = parentStack->Index(parentIndex); + keepChecking = false; + + switch (parent->OperGet()) + { + case GT_ASG: + { + GenTree* op2 = parent->AsOp()->gtGetOp2(); + + if ((op2 == tree) && (parent->TypeGet() == TYP_REF)) + { + assert(parent->AsOp()->gtGetOp1()->OperGet() == GT_LCL_VAR); + parent->ChangeType(newType); + } + + break; + } + + case GT_EQ: + case GT_NE: + break; + + case GT_COMMA: + if (parent->AsOp()->gtGetOp1() == parentStack->Index(parentIndex - 1)) + { + // Left child of GT_COMMA, it will be discarded + break; + } + __fallthrough; + case GT_COLON: + case GT_QMARK: + case GT_ADD: + if (parent->TypeGet() == TYP_REF) + { + parent->ChangeType(newType); + } + ++parentIndex; + keepChecking = true; + break; + + case GT_FIELD: + case GT_IND: + { + if (newType == TYP_BYREF) + { + // This ensures that a checked write barrier is used when writing + // to this field/indirection (it can be inside a stack-allocated object). + parent->gtFlags |= GTF_IND_TGTANYWHERE; + } + else + { + // This indicates that a write barrier is not needed when writing + // to this field/indirection since the address is not pointing to the heap. + // It's either null or points to inside a stack-allocated object. + parent->gtFlags |= GTF_IND_TGT_NOT_HEAP; + } + int grandParentIndex = parentIndex + 1; + + if (parentStack->Height() > grandParentIndex) + { + GenTree* grandParent = parentStack->Index(grandParentIndex); + if (grandParent->OperGet() == GT_ADDR) + { + if (grandParent->TypeGet() == TYP_REF) + { + grandParent->ChangeType(newType); + } + parentIndex += 2; + keepChecking = true; + } + } + break; + } + + default: + unreached(); + } + + if (keepChecking) + { + tree = parentStack->Index(parentIndex - 1); + } + } + + return; +} + #ifdef DEBUG //------------------------------------------------------------------------ // AssertWhenAllocObjFoundVisitor: Look for a GT_ALLOCOBJ node and assert @@ -662,6 +853,7 @@ void ObjectAllocator::RewriteUses() { DoPreOrder = true, DoLclVarsOnly = true, + ComputeStack = true, }; RewriteUsesVisitor(ObjectAllocator* allocator) @@ -677,12 +869,35 @@ void ObjectAllocator::RewriteUses() const unsigned int lclNum = tree->AsLclVarCommon()->gtLclNum; unsigned int newLclNum = BAD_VAR_NUM; + LclVarDsc* lclVarDsc = m_compiler->lvaTable + lclNum; - if (m_allocator->m_HeapLocalToStackLocalMap.TryGetValue(lclNum, &newLclNum)) + if ((lclNum < BitVecTraits::GetSize(&m_allocator->m_bitVecTraits)) && + m_allocator->MayLclVarPointToStack(lclNum)) { - GenTree* newTree = - m_compiler->gtNewOperNode(GT_ADDR, TYP_I_IMPL, m_compiler->gtNewLclvNode(newLclNum, TYP_STRUCT)); - *use = newTree; + var_types newType; + if (m_allocator->m_HeapLocalToStackLocalMap.TryGetValue(lclNum, &newLclNum)) + { + newType = TYP_I_IMPL; + tree = + m_compiler->gtNewOperNode(GT_ADDR, newType, m_compiler->gtNewLclvNode(newLclNum, TYP_STRUCT)); + *use = tree; + } + else + { + newType = m_allocator->DoesLclVarPointToStack(lclNum) ? TYP_I_IMPL : TYP_BYREF; + if (tree->TypeGet() == TYP_REF) + { + tree->ChangeType(newType); + } + } + + if (lclVarDsc->lvType != newType) + { + JITDUMP("changing the type of V%02u from %s to %s\n", lclNum, varTypeName(lclVarDsc->lvType), + varTypeName(newType)); + lclVarDsc->lvType = newType; + } + m_allocator->UpdateAncestorTypes(tree, &m_ancestors, newType); } return Compiler::fgWalkResult::WALK_CONTINUE; diff --git a/src/coreclr/src/jit/objectalloc.h b/src/coreclr/src/jit/objectalloc.h index 7be82ebb33af6..b114c2b7674b8 100644 --- a/src/coreclr/src/jit/objectalloc.h +++ b/src/coreclr/src/jit/objectalloc.h @@ -26,10 +26,14 @@ class ObjectAllocator final : public Phase //=============================================================================== // Data members - bool m_IsObjectStackAllocationEnabled; - bool m_AnalysisDone; - BitVecTraits m_bitVecTraits; - BitVec m_EscapingPointers; + bool m_IsObjectStackAllocationEnabled; + bool m_AnalysisDone; + BitVecTraits m_bitVecTraits; + BitVec m_EscapingPointers; + // We keep the set of possibly-stack-pointing pointers as a superset of the set of + // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets. + BitVec m_PossiblyStackPointingPointers; + BitVec m_DefinitelyStackPointingPointers; LocalToLocalMap m_HeapLocalToStackLocalMap; BitSetShortLongRep* m_ConnGraphAdjacencyMatrix; @@ -46,18 +50,23 @@ class ObjectAllocator final : public Phase private: bool CanAllocateLclVarOnStack(unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd); bool CanLclVarEscape(unsigned int lclNum); + void MarkLclVarAsPossiblyStackPointing(unsigned int lclNum); + void MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum); + bool MayLclVarPointToStack(unsigned int lclNum); + bool DoesLclVarPointToStack(unsigned int lclNum); void DoAnalysis(); void MarkLclVarAsEscaping(unsigned int lclNum); - bool IsLclVarEscaping(unsigned int lclNum); void MarkEscapingVarsAndBuildConnGraph(); void AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum); void ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes); + void ComputeStackObjectPointers(BitVecTraits* bitVecTraits); bool MorphAllocObjNodes(); void RewriteUses(); GenTree* MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj); unsigned int MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, BasicBlock* block, GenTreeStmt* stmt); struct BuildConnGraphVisitorCallbackData; bool CanLclVarEscapeViaParentStack(ArrayStack* parentStack, unsigned int lclNum); + void UpdateAncestorTypes(GenTree* tree, ArrayStack* parentStack, var_types newType); #ifdef DEBUG static Compiler::fgWalkResult AssertWhenAllocObjFoundVisitor(GenTree** pTree, Compiler::fgWalkData* data); #endif // DEBUG @@ -75,9 +84,11 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp) { // Disable checks since this phase runs before fgComputePreds phase. // Checks are not expected to pass before fgComputePreds. - doChecks = false; - m_EscapingPointers = BitVecOps::UninitVal(); - m_ConnGraphAdjacencyMatrix = nullptr; + doChecks = false; + m_EscapingPointers = BitVecOps::UninitVal(); + m_PossiblyStackPointingPointers = BitVecOps::UninitVal(); + m_DefinitelyStackPointingPointers = BitVecOps::UninitVal(); + m_ConnGraphAdjacencyMatrix = nullptr; } //------------------------------------------------------------------------ @@ -119,12 +130,6 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, CORIN DWORD classAttribs = comp->info.compCompHnd->getClassAttribs(clsHnd); - if ((classAttribs & CORINFO_FLG_CONTAINS_GC_PTR) != 0) - { - // TODO-ObjectStackAllocation: enable stack allocation of objects with gc fields - return false; - } - if ((classAttribs & CORINFO_FLG_VALUECLASS) != 0) { // TODO-ObjectStackAllocation: enable stack allocation of boxed structs @@ -153,10 +158,42 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, CORIN inline bool ObjectAllocator::CanLclVarEscape(unsigned int lclNum) { - assert(m_AnalysisDone); return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum); } +//------------------------------------------------------------------------ +// MayLclVarPointToStack: Returns true iff local variable may +// point to a stack-allocated object +// +// Arguments: +// lclNum - Local variable number +// +// Return Value: +// Returns true iff local variable may point to a stack-allocated object + +inline bool ObjectAllocator::MayLclVarPointToStack(unsigned int lclNum) +{ + assert(m_AnalysisDone); + return BitVecOps::IsMember(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum); +} + +//------------------------------------------------------------------------ +// DoesLclVarPointToStack: Returns true iff local variable definitely +// points to a stack-allocated object (or is null) +// +// Arguments: +// lclNum - Local variable number +// +// Return Value: +// Returns true iff local variable definitely points to a stack-allocated object +// (or is null) + +inline bool ObjectAllocator::DoesLclVarPointToStack(unsigned int lclNum) +{ + assert(m_AnalysisDone); + return BitVecOps::IsMember(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum); +} + //=============================================================================== #endif // OBJECTALLOC_H diff --git a/src/coreclr/tests/issues.targets b/src/coreclr/tests/issues.targets index fd1d96558f936..b89a010b1cfa7 100644 --- a/src/coreclr/tests/issues.targets +++ b/src/coreclr/tests/issues.targets @@ -309,13 +309,6 @@ - - - - Object stack allocation is not supported on x86 yet - - - diff --git a/src/coreclr/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs b/src/coreclr/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs index 9e665244678e5..7c524ba033efd 100644 --- a/src/coreclr/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs +++ b/src/coreclr/tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs @@ -33,7 +33,7 @@ public SimpleClassB(long f1, long f2) } } - class SimpleClassWithGCField : SimpleClassA + sealed class SimpleClassWithGCField : SimpleClassA { public object o; @@ -80,6 +80,13 @@ class Tests { static volatile int f1 = 5; static volatile int f2 = 7; + static SimpleClassA classA; + static SimpleClassWithGCField classWithGCField; + static string str0; + static string str1; + static string str2; + static string str3; + static string str4; delegate int Test(); @@ -89,12 +96,24 @@ public static int Main() { AllocationKind expectedAllocationKind = AllocationKind.Stack; if (GCStressEnabled()) { + Console.WriteLine("GCStress is enabled"); expectedAllocationKind = AllocationKind.Undefined; } else if (!SPCOptimizationsEnabled()) { + Console.WriteLine("System.Private.CoreLib.dll optimizations are disabled"); expectedAllocationKind = AllocationKind.Heap; } + classA = new SimpleClassA(f1, f2); + + classWithGCField = new SimpleClassWithGCField(f1, f2, null); + + str0 = "str_zero"; + str1 = "str_one"; + str2 = "str_two"; + str3 = "str_three"; + str4 = "str_four"; + CallTestAndVerifyAllocation(AllocateSimpleClassAndAddFields, 12, expectedAllocationKind); CallTestAndVerifyAllocation(AllocateSimpleClassesAndEQCompareThem, 0, expectedAllocationKind); @@ -107,23 +126,25 @@ public static int Main() CallTestAndVerifyAllocation(AllocateClassWithNestedStructAndAddFields, 24, expectedAllocationKind); + CallTestAndVerifyAllocation(AllocateSimpleClassAndCheckTypeNoHelper, 1, expectedAllocationKind); + + CallTestAndVerifyAllocation(AllocateSimpleClassWithGCFieldAndAddFields, 12, expectedAllocationKind); + + CallTestAndVerifyAllocation(AllocateSimpleClassAndAssignRefToAField, 12, expectedAllocationKind); + + CallTestAndVerifyAllocation(TestMixOfReportingAndWriteBarriers, 34, expectedAllocationKind); + // The remaining tests currently never allocate on the stack if (expectedAllocationKind == AllocationKind.Stack) { expectedAllocationKind = AllocationKind.Heap; } // This test calls CORINFO_HELP_ISINSTANCEOFCLASS - CallTestAndVerifyAllocation(AllocateSimpleClassAndCheckType, 1, expectedAllocationKind); + CallTestAndVerifyAllocation(AllocateSimpleClassAndCheckTypeHelper, 1, expectedAllocationKind); // This test calls CORINFO_HELP_CHKCASTCLASS_SPECIAL CallTestAndVerifyAllocation(AllocateSimpleClassAndCast, 7, expectedAllocationKind); - // Stack allocation of classes with GC fields is currently disabled - CallTestAndVerifyAllocation(AllocateSimpleClassWithGCFieldAndAddFields, 12, expectedAllocationKind); - - // Assigning class ref to a field of another object currently always disables stack allocation - CallTestAndVerifyAllocation(AllocateSimpleClassAndAssignRefToAField, 12, expectedAllocationKind); - // Stack allocation of boxed structs is currently disabled CallTestAndVerifyAllocation(BoxSimpleStructAndAddFields, 12, expectedAllocationKind); @@ -183,6 +204,7 @@ static int AllocateSimpleClassesAndEQCompareThem() { SimpleClassA a1 = new SimpleClassA(f1, f2); SimpleClassA a2 = (f1 == 0) ? a1 : new SimpleClassA(f2, f1); + GC.Collect(); return (a1 == a2) ? 1 : 0; } @@ -191,20 +213,31 @@ static int AllocateSimpleClassesAndNECompareThem() { SimpleClassA a1 = new SimpleClassA(f1, f2); SimpleClassA a2 = (f1 == 0) ? a1 : new SimpleClassA(f2, f1); + GC.Collect(); return (a1 != a2) ? 1 : 0; } [MethodImpl(MethodImplOptions.NoInlining)] - static int AllocateSimpleClassAndCheckType() + static int AllocateSimpleClassAndCheckTypeNoHelper() + { + object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f1, f2); + GC.Collect(); + return (o is SimpleClassB) ? 0 : 1; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int AllocateSimpleClassAndCheckTypeHelper() { object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f1, f2); - return (o is SimpleClassB) || !(o is SimpleClassA) ? 0 : 1; + GC.Collect(); + return !(o is SimpleClassA) ? 0 : 1; } [MethodImpl(MethodImplOptions.NoInlining)] static int AllocateSimpleClassAndCast() { object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f2, f1); + GC.Collect(); return ((SimpleClassA)o).f1; } @@ -212,6 +245,7 @@ static int AllocateSimpleClassAndCast() static int AllocateSimpleClassAndGetField() { SimpleClassA a = new SimpleClassA(f1, f2); + GC.Collect(); ref int f = ref a.f2; return f; } @@ -220,6 +254,7 @@ static int AllocateSimpleClassAndGetField() static int AllocateClassWithNestedStructAndGetField() { ClassWithNestedStruct c = new ClassWithNestedStruct(f1, f2); + GC.Collect(); ref int f = ref c.ns.s.f1; return f; } @@ -228,6 +263,7 @@ static int AllocateClassWithNestedStructAndGetField() static int AllocateClassWithNestedStructAndAddFields() { ClassWithNestedStruct c = new ClassWithNestedStruct(f1, f2); + GC.Collect(); return c.ns.f1 + c.ns.f2 + c.ns.s.f1 + c.ns.s.f2; } @@ -235,14 +271,15 @@ static int AllocateClassWithNestedStructAndAddFields() static int AllocateSimpleClassWithGCFieldAndAddFields() { SimpleClassWithGCField c = new SimpleClassWithGCField(f1, f2, null); + GC.Collect(); return c.f1 + c.f2; } static int AllocateSimpleClassAndAssignRefToAField() { SimpleClassWithGCField c = new SimpleClassWithGCField(f1, f2, null); - SimpleClassA a = new SimpleClassA(f1, f2); - c.o = a; + GC.Collect(); + c.o = classA; return c.f1 + c.f2; } @@ -253,7 +290,38 @@ static int BoxSimpleStructAndAddFields() str.f1 = f1; str.f2 = f2; object boxedSimpleStruct = (object)str; + GC.Collect(); return ((SimpleStruct)boxedSimpleStruct).f1 + ((SimpleStruct)boxedSimpleStruct).f2; } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int TestMixOfReportingAndWriteBarriers() + { + // c1 doesn't escape and is allocated on the stack + SimpleClassWithGCField c1 = new SimpleClassWithGCField(f1, f2, str0); + + // c2 always points to a heap-allocated object + SimpleClassWithGCField c2 = classWithGCField; + + // c2 and c3 may point to a heap-allocated object or to a stack-allocated object + SimpleClassWithGCField c3 = (f1 == 0) ? c1 : c2; + SimpleClassWithGCField c4 = (f2 == 0) ? c2 : c1; + + // c1 doesn't have to be reported to GC (but can be conservatively reported as an interior pointer) + // c1.o should be reported to GC as a normal pointer (but can be conservatively reported as an interior pointer) + // c2 should be reported to GC as a normal pointer (but can be conservatively reported as an interior pointer) + // c3 and c4 must be reported as interior pointers + GC.Collect(); + + // This assignment doesn't need a write barrier but may conservatively use a checked barrier + c1.o = str1; + // This assignment should optimally use a normal write barrier but may conservatively use a checked barrier + c2.o = str2; + // These assignments require a checked write barrier + c3.o = str3; + c4.o = str4; + + return c1.o.ToString().Length + c2.o.ToString().Length + c3.o.ToString().Length + c4.o.ToString().Length; + } } }