Skip to content

Commit

Permalink
[X86] Modify patterns from memory form of RCP/RSQRT/SQRT intrinsics t…
Browse files Browse the repository at this point in the history
…o only allow (scalar_to_vector (loadf32/load64)) instead of anything that sse_load_f32/f64 can match.

sse_load_f32/f64 can also match loads that are zero extended to vectors. We shouldn't match that because we wouldn't be able to get the instruction to zero the upper bits like the intrinsic semantics would require for such a case.

There is a test case that does depend on this behavior.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289193 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Dec 9, 2016
1 parent 3e9ee5f commit ff96f08
Showing 1 changed file with 11 additions and 14 deletions.
25 changes: 11 additions & 14 deletions lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -3386,8 +3386,8 @@ def SSE_RCPS : OpndItins<
/// the HW instructions are 2 operand / destructive.
multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop, Operand vec_memop,
ComplexPattern mem_cpat, Intrinsic Intr,
X86MemOperand x86memop,
Intrinsic Intr,
SDNode OpNode, Domain d, OpndItins itins,
Predicate target, string Suffix> {
let hasSideEffects = 0 in {
Expand All @@ -3407,7 +3407,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2),
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
Expand All @@ -3427,16 +3427,15 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
// which has a clobber before the rcp, vs.
// rcpss mem, %xmm0
let Predicates = [target, OptForSize] in {
def : Pat<(Intr mem_cpat:$src),
def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
(!cast<Instruction>(NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
(vt (IMPLICIT_DEF)), addr:$src2)>;
}
}

multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop, Operand vec_memop,
ComplexPattern mem_cpat,
X86MemOperand x86memop,
Intrinsic Intr, SDNode OpNode, Domain d,
OpndItins itins, string Suffix> {
let hasSideEffects = 0 in {
Expand All @@ -3454,7 +3453,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
[]>, Sched<[itins.Sched.Folded]>;
let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, vec_memop:$src2),
(ins VR128:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
Expand All @@ -3479,9 +3478,9 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
VR128:$src)>;
}
let Predicates = [HasAVX, OptForSize] in {
def : Pat<(Intr mem_cpat:$src),
def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
(vt (IMPLICIT_DEF)), addr:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
def : Pat<(ScalarVT (OpNode (load addr:$src))),
Expand Down Expand Up @@ -3565,23 +3564,21 @@ let Predicates = [HasAVX] in {
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, UseSSE1, "SS">, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem, ssmem, sse_load_f32,
f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
}

multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem, sdmem, sse_load_f64,
f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, "SD">,
XD, VEX_4V, VEX_LIG;
Expand Down

0 comments on commit ff96f08

Please sign in to comment.