Skip to content

Commit

Permalink
Allow folding of aligned loads when using the VEX encoding and optimi…
Browse files Browse the repository at this point in the history
…zations are enabled (dotnet#376)
  • Loading branch information
tannergooding authored and CarolEidt committed Jan 2, 2020
1 parent 6f445d6 commit bed5e44
Showing 1 changed file with 26 additions and 12 deletions.
38 changes: 26 additions & 12 deletions src/coreclr/src/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2555,11 +2555,13 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge

// containingNode supports nodes that read from an aligned memory address
//
// This will generally be an explicit LoadAligned instruction and is generally
// false for machines with VEX support. This is because there is currently no way
// to guarantee that the address read from will always be aligned and we could silently
// change the behavior of the program in the case where an Access Violation would have
// otherwise occurred.
// This will generally be an explicit LoadAligned instruction and is false for
// machines with VEX support when minOpts is enabled. This is because there is
// currently no way to guarantee that the address read from will always be
// aligned and we want to assert that the address is aligned when optimizations
// aren't enabled. However, when optimizations are enabled, we want to allow
// folding of memory operands as it produces better codegen and allows simpler
// coding patterns on the managed side.
bool supportsAlignedSIMDLoads = false;

// containingNode supports nodes that read from general memory
Expand Down Expand Up @@ -2609,10 +2611,23 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
supportsAlignedSIMDLoads =
!comp->canUseVexEncoding() && (containingIntrinsicId != NI_SSE2_ConvertToVector128Double);
supportsUnalignedSIMDLoads = !supportsAlignedSIMDLoads;
supportsGeneralLoads = supportsUnalignedSIMDLoads;

if (!comp->canUseVexEncoding())
{
// Most instructions under the non-VEX encoding require aligned operands.
// Those used for Sse2.ConvertToVector128Double (CVTDQ2PD and CVTPS2PD)
// are exceptions and don't fail for unaligned inputs.

supportsAlignedSIMDLoads = (containingIntrinsicId != NI_SSE2_ConvertToVector128Double);
supportsUnalignedSIMDLoads = !supportsAlignedSIMDLoads;
}
else
{
supportsAlignedSIMDLoads = !comp->opts.MinOpts();
supportsUnalignedSIMDLoads = true;
}

supportsGeneralLoads = supportsUnalignedSIMDLoads;
break;
}
}
Expand Down Expand Up @@ -2660,8 +2675,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
assert(supportsSIMDScalarLoads == false);

supportsAlignedSIMDLoads = !comp->canUseVexEncoding();
supportsUnalignedSIMDLoads = !supportsAlignedSIMDLoads;
supportsAlignedSIMDLoads = !comp->canUseVexEncoding() || !comp->opts.MinOpts();
supportsUnalignedSIMDLoads = comp->canUseVexEncoding();
supportsGeneralLoads = supportsUnalignedSIMDLoads;

break;
Expand Down Expand Up @@ -2893,7 +2908,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
return supportsSIMDScalarLoads;
}

// VEX encoding supports unaligned memory ops, so we can fold them
case NI_SSE_LoadVector128:
case NI_SSE2_LoadVector128:
case NI_AVX_LoadVector256:
Expand Down

0 comments on commit bed5e44

Please sign in to comment.