Skip to content

Commit

Permalink
[X86] Fix PR30926 - Add patterns for (v)cvtsi2s{s,d} and (v)cvtsd2s{s,d}
Browse files Browse the repository at this point in the history
The code emiited by Clang's intrinsics for (v)cvtsi2ss, (v)cvtsi2sd,
(v)cvtsd2ss and (v)cvtss2sd is lowered to a code sequence that includes
redundant (v)movss/(v)movsd instructions. This patch adds patterns for
optimizing these sequences.

Differential revision: https://reviews.llvm.org/D28455



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291660 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Elad Cohen committed Jan 11, 2017
1 parent 208cabd commit 3cf3d08
Show file tree
Hide file tree
Showing 6 changed files with 220 additions and 10 deletions.
39 changes: 39 additions & 0 deletions lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -5957,6 +5957,30 @@ let Predicates = [HasAVX512] in {
(VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
} // Predicates = [HasAVX512]

// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
// which produce unnecessary vmovs{s,d} instructions
let Predicates = [HasAVX512] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;

def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
} // Predicates = [HasAVX512]

// Convert float/double to signed/unsigned int 32/64 with truncation
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
Expand Down Expand Up @@ -6136,6 +6160,21 @@ def : Pat<(f32 (fpround FR64X:$src)),
(COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
(COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
Requires<[HasAVX512]>;

def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
(VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;

//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer
Expand Down
74 changes: 73 additions & 1 deletion lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
InstrItinClass ri = arg_ri;
}


// scalar
let Sched = WriteFAdd in {
def SSE_ALU_F32S : OpndItins<
Expand Down Expand Up @@ -1923,6 +1922,79 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
}
} // isCodeGenOnly = 1

// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
// vmovs{s,d} instructions
let Predicates = [UseAVX] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(Int_VCVTSD2SSrr VR128:$dst, VR128:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(Int_VCVTSS2SDrr VR128:$dst, VR128:$src)>;

def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(Int_VCVTSI2SS64rr VR128:$dst, GR64:$src)>;

def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(Int_VCVTSI2SSrr VR128:$dst, GR32:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(Int_VCVTSI2SD64rr VR128:$dst, GR64:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(Int_VCVTSI2SDrr VR128:$dst, GR32:$src)>;
} // Predicates = [UseAVX]

let Predicates = [UseSSE2] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))),
(Int_CVTSD2SSrr VR128:$dst, VR128:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(Int_CVTSS2SDrr VR128:$dst, VR128:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(Int_CVTSI2SD64rr VR128:$dst, GR64:$src)>;

def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(Int_CVTSI2SDrr VR128:$dst, GR32:$src)>;
} // Predicates = [UseSSE2]

let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(Int_CVTSI2SS64rr VR128:$dst, GR64:$src)>;

def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(Int_CVTSI2SSrr VR128:$dst, GR32:$src)>;
} // Predicates = [UseSSE1]

// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
Expand Down
22 changes: 22 additions & 0 deletions test/CodeGen/X86/avx-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
ret <8 x float> %a
}

define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
; CHECK-LABEL: fptrunc01:
; CHECK: # BB#0:
; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%ext = extractelement <2 x double> %a0, i32 0
%cvt = fptrunc double %ext to float
%res = insertelement <4 x float> %a1, float %cvt, i32 0
ret <4 x float> %res
}

define <4 x double> @fpext00(<4 x float> %b) nounwind {
; CHECK-LABEL: fpext00:
; CHECK: # BB#0:
Expand All @@ -71,6 +82,17 @@ define <4 x double> @fpext00(<4 x float> %b) nounwind {
ret <4 x double> %a
}

define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
; CHECK-LABEL: fpext01:
; CHECK: # BB#0:
; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%ext = extractelement <4 x float> %a1, i32 0
%cvt = fpext float %ext to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
}

define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcA:
; CHECK: # BB#0:
Expand Down
22 changes: 22 additions & 0 deletions test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,17 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
ret <4 x float> %c
}

define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind {
; ALL-LABEL: fptrunc03:
; ALL: ## BB#0:
; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
; ALL-NEXT: retq
%ext = extractelement <2 x double> %a0, i32 0
%cvt = fptrunc double %ext to float
%res = insertelement <4 x float> %a1, float %cvt, i32 0
ret <4 x float> %res
}

define <8 x double> @fpext00(<8 x float> %b) nounwind {
; ALL-LABEL: fpext00:
; ALL: ## BB#0:
Expand Down Expand Up @@ -476,6 +487,17 @@ define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
ret <4 x double> %c
}

define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind {
; ALL-LABEL: fpext02:
; ALL: ## BB#0:
; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
%ext = extractelement <4 x float> %a1, i32 0
%cvt = fpext float %ext to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
}

define double @funcA(i64* nocapture %e) {
; ALL-LABEL: funcA:
; ALL: ## BB#0: ## %entry
Expand Down
13 changes: 4 additions & 9 deletions test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1257,15 +1257,12 @@ define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
; X32-LABEL: test_mm_cvtsi32_sd:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: cvtsi2sdl %eax, %xmm1
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtsi32_sd:
; X64: # BB#0:
; X64-NEXT: cvtsi2sdl %edi, %xmm1
; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X64-NEXT: cvtsi2sdl %edi, %xmm0
; X64-NEXT: retq
%cvt = sitofp i32 %a1 to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
Expand Down Expand Up @@ -1293,14 +1290,12 @@ define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
; X32-LABEL: test_mm_cvtss_sd:
; X32: # BB#0:
; X32-NEXT: cvtss2sd %xmm1, %xmm1
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-NEXT: cvtss2sd %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtss_sd:
; X64: # BB#0:
; X64-NEXT: cvtss2sd %xmm1, %xmm1
; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X64-NEXT: cvtss2sd %xmm1, %xmm0
; X64-NEXT: retq
%ext = extractelement <4 x float> %a1, i32 0
%cvt = fpext float %ext to double
Expand Down
60 changes: 60 additions & 0 deletions test/CodeGen/X86/vec_int_to_fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4818,3 +4818,63 @@ define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) {
store <8 x float> %4, <8 x float>* %3, align 32
ret void
}

define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
; SSE-LABEL: sitofp_i32_to_2f64:
; SSE: # BB#0:
; SSE-NEXT: cvtsi2sdl %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sitofp_i32_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = sitofp i32 %a1 to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
}

define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
; SSE-LABEL: sitofp_i32_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: cvtsi2ssl %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sitofp_i32_to_4f32:
; AVX: # BB#0:
; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = sitofp i32 %a1 to float
%res = insertelement <4 x float> %a0, float %cvt, i32 0
ret <4 x float> %res
}

define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
; SSE-LABEL: sitofp_i64_to_2f64:
; SSE: # BB#0:
; SSE-NEXT: cvtsi2sdq %rdi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sitofp_i64_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = sitofp i64 %a1 to double
%res = insertelement <2 x double> %a0, double %cvt, i32 0
ret <2 x double> %res
}

define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
; SSE-LABEL: sitofp_i64_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: cvtsi2ssq %rdi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sitofp_i64_to_4f32:
; AVX: # BB#0:
; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = sitofp i64 %a1 to float
%res = insertelement <4 x float> %a0, float %cvt, i32 0
ret <4 x float> %res
}

0 comments on commit 3cf3d08

Please sign in to comment.