Skip to content

Commit

Permalink
AVX-512: Implemented missing encoding for FMA scalar instructions
Browse files Browse the repository at this point in the history
Added tests for encoding

Differential Revision: http://reviews.llvm.org/D10865

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241159 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Igor Breger committed Jul 1, 2015
1 parent 456ac28 commit 2ae3081
Show file tree
Hide file tree
Showing 3 changed files with 1,374 additions and 35 deletions.
129 changes: 95 additions & 34 deletions lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,16 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;

multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(X86select _.KRCWM:$mask, RHS, _.RC:$src1)>;

multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
Expand Down Expand Up @@ -4205,44 +4215,95 @@ defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubR

// Scalar FMA
let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, ValueType OpVT,
X86MemOperand x86memop, Operand memop,
PatFrag mem_frag> {
let isCommutable = 1 in
def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
dag RHS_r, dag RHS_m > {
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base;

let mayLoad = 1 in
def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;

defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>,
AVX512FMA3Base, EVEX_B, EVEX_RC;

let isCodeGenOnly = 1 in {
def r : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src2, RC:$src1,
(mem_frag addr:$src3))))]>;
[RHS_r]>;
let mayLoad = 1 in
def m : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[RHS_m]>;
}// isCodeGenOnly = 1
}
}// Constraints = "$src1 = $dst"

multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ ,
string SUFF> {

defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
(_.VT (OpNode _.RC:$src2, _.RC:$src1,
(_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))))),
(_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
_.FRC:$src3))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
(_.ScalarLdFrag addr:$src3))))>;

defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)),
(_.VT (OpNode _.RC:$src2,
(_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
_.RC:$src1)),
(_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
_.FRC:$src1))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
(_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;

defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)),
(_.VT (OpNode _.RC:$src1,
(_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
_.RC:$src2)),
(_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
_.FRC:$src2))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
}

multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{
let Predicates = [HasAVX512] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f32x_info, "SS">,
EVEX_CD8<32, CD8VT1>, VEX_LIG;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f64x_info, "SD">,
EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
}
}
} // Constraints = "$src1 = $dst"

defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;

//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from sign integer to float/double
Expand Down
31 changes: 30 additions & 1 deletion test/CodeGen/X86/avx512-fma.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,41 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8
ret <8 x double> %res
}

define double @test_x86_fmsub_sd_z(double %a0, double %a1, double %a2) {
define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
; CHECK-LABEL: test_x86_fmsub_213:
; CHECK: ## BB#0:
; CHECK-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
ret double %res
}

define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
; CHECK-LABEL: test_x86_fmsub_213_m:
; CHECK: ## BB#0:
; CHECK-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
ret double %res
}

define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
; CHECK-LABEL: test_x86_fmsub_231_m:
; CHECK: ## BB#0:
; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a2
%res = fsub double %x, %a1
ret double %res
}

define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
; CHECK-LABEL: test231_br:
; CHECK: ## BB#0:
Expand Down
Loading

0 comments on commit 2ae3081

Please sign in to comment.