Skip to content

Commit

Permalink
[X86] Remove some more RCP and RSQRT patterns from InstrAVX512.td tha…
Browse files Browse the repository at this point in the history
…t I missed in r317413.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317441 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Nov 5, 2017
1 parent f99fcce commit 3594a87
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 37 deletions.
13 changes: 0 additions & 13 deletions lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -7641,19 +7641,6 @@ defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,

defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;

let Predicates = [HasAVX512] in {
def : Pat<(f32 (X86frsqrt FR32X:$src)),
(COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
def : Pat<(f32 (X86frsqrt (load addr:$src))),
(COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
def : Pat<(f32 (X86frcp FR32X:$src)),
(COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
def : Pat<(f32 (X86frcp (load addr:$src))),
(COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
}

multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {

Expand Down
24 changes: 12 additions & 12 deletions lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -3095,7 +3095,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType vt, ValueType ScalarVT,
X86MemOperand x86memop,
Intrinsic Intr, SDNode OpNode, Domain d,
OpndItins itins, string Suffix> {
OpndItins itins, Predicate target, string Suffix> {
let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
Expand Down Expand Up @@ -3126,7 +3126,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
// vrcpss mem, %xmm0, %xmm0
// TODO: In theory, we could fold the load, and avoid the stall caused by
// the partial register store, either in ExecutionDepsFix or with smarter RA.
let Predicates = [UseAVX] in {
let Predicates = [target] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
}
Expand All @@ -3140,7 +3140,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), addr:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
let Predicates = [target, OptForSize] in {
def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;
Expand Down Expand Up @@ -3220,40 +3220,40 @@ let Predicates = [HasAVX, NoVLX] in {
}

multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
OpndItins itins, Predicate AVXTarget> {
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, UseSSE1, "SS">, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG,
NotMemoryFoldable;
SSEPackedSingle, itins, AVXTarget, "SS">, XS, VEX_4V,
VEX_LIG, VEX_WIG, NotMemoryFoldable;
}

multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
OpndItins itins, Predicate AVXTarget> {
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, "SD">,
OpNode, SSEPackedDouble, itins, AVXTarget, "SD">,
XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
}

// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS, UseAVX>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX, NoVLX]>,
sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD, UseAVX>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;

// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS, HasAVX>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX]>;
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS, HasAVX>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX]>;

// There is no f64 version of the reciprocal approximation instructions.
Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/X86/recip-fastmath.ll
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,14 @@ define float @f32_one_step(float %x) #1 {
;
; KNL-LABEL: f32_one_step:
; KNL: # BB#0:
; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: f32_one_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50]
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
Expand Down Expand Up @@ -257,7 +257,7 @@ define float @f32_two_step(float %x) #2 {
;
; KNL-LABEL: f32_two_step:
; KNL: # BB#0:
; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
Expand All @@ -268,7 +268,7 @@ define float @f32_two_step(float %x) #2 {
;
; SKX-LABEL: f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33]
Expand Down
16 changes: 8 additions & 8 deletions test/CodeGen/X86/recip-fastmath2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ define float @f32_no_step_2(float %x) #3 {
;
; KNL-LABEL: f32_no_step_2:
; KNL: # BB#0:
; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: f32_no_step_2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%div = fdiv fast float 1234.0, %x
Expand Down Expand Up @@ -144,15 +144,15 @@ define float @f32_one_step_2(float %x) #1 {
;
; KNL-LABEL: f32_one_step_2:
; KNL: # BB#0:
; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [2:1.00]
;
; SKX-LABEL: f32_one_step_2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50]
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
Expand Down Expand Up @@ -243,7 +243,7 @@ define float @f32_one_step_2_divs(float %x) #1 {
;
; KNL-LABEL: f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
Expand All @@ -252,7 +252,7 @@ define float @f32_one_step_2_divs(float %x) #1 {
;
; SKX-LABEL: f32_one_step_2_divs:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50]
; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
Expand Down Expand Up @@ -368,7 +368,7 @@ define float @f32_two_step_2(float %x) #2 {
;
; KNL-LABEL: f32_two_step_2:
; KNL: # BB#0:
; KNL-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
Expand All @@ -380,7 +380,7 @@ define float @f32_two_step_2(float %x) #2 {
;
; SKX-LABEL: f32_two_step_2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33]
Expand Down

0 comments on commit 3594a87

Please sign in to comment.