@@ -4831,137 +4831,146 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
4831
4831
// SSE3 Instructions
4832
4832
//===---------------------------------------------------------------------===//
4833
4833
4834
+ let Sched = WriteFHAdd in
4835
+ def SSE_HADDSUB : OpndItins<
4836
+ IIC_SSE_HADDSUB_RR, IIC_SSE_HADDSUB_RM
4837
+ >;
4838
+
4834
4839
// Horizontal ops
4835
4840
multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4836
- X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag ,
4837
- bit Is2Addr = 1> {
4841
+ X86MemOperand x86memop, SDNode OpNode, OpndItins itins ,
4842
+ PatFrag ld_frag, bit Is2Addr = 1> {
4838
4843
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4839
4844
!if(Is2Addr,
4840
4845
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4841
4846
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4842
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR >,
4843
- Sched<[WriteFHAdd ]>;
4847
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr >,
4848
+ Sched<[itins.Sched ]>;
4844
4849
4845
4850
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4846
4851
!if(Is2Addr,
4847
4852
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4848
4853
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4849
4854
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
4850
- IIC_SSE_HADDSUB_RM >, Sched<[WriteFHAddLd , ReadAfterLd]>;
4855
+ itins.rm >, Sched<[itins.Sched.Folded , ReadAfterLd]>;
4851
4856
}
4852
4857
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
4853
- X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag ,
4854
- bit Is2Addr = 1> {
4858
+ X86MemOperand x86memop, SDNode OpNode, OpndItins itins ,
4859
+ PatFrag ld_frag, bit Is2Addr = 1> {
4855
4860
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
4856
4861
!if(Is2Addr,
4857
4862
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4858
4863
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4859
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR >,
4860
- Sched<[WriteFHAdd ]>;
4864
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr >,
4865
+ Sched<[itins.Sched ]>;
4861
4866
4862
4867
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
4863
4868
!if(Is2Addr,
4864
4869
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
4865
4870
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4866
4871
[(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
4867
- IIC_SSE_HADDSUB_RM >, Sched<[WriteFHAddLd , ReadAfterLd]>;
4872
+ itins.rm >, Sched<[itins.Sched.Folded , ReadAfterLd]>;
4868
4873
}
4869
4874
4870
4875
let Predicates = [HasAVX] in {
4871
4876
let ExeDomain = SSEPackedSingle in {
4872
4877
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
4873
- X86fhadd, loadv4f32, 0>, VEX_4V, VEX_WIG;
4878
+ X86fhadd, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG;
4874
4879
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
4875
- X86fhsub, loadv4f32, 0>, VEX_4V, VEX_WIG;
4880
+ X86fhsub, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG;
4876
4881
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
4877
- X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
4882
+ X86fhadd, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
4878
4883
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
4879
- X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
4884
+ X86fhsub, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
4880
4885
}
4881
4886
let ExeDomain = SSEPackedDouble in {
4882
4887
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
4883
- X86fhadd, loadv2f64, 0>, VEX_4V, VEX_WIG;
4888
+ X86fhadd, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG;
4884
4889
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
4885
- X86fhsub, loadv2f64, 0>, VEX_4V, VEX_WIG;
4890
+ X86fhsub, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG;
4886
4891
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
4887
- X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
4892
+ X86fhadd, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
4888
4893
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
4889
- X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
4894
+ X86fhsub, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
4890
4895
}
4891
4896
}
4892
4897
4893
4898
let Constraints = "$src1 = $dst" in {
4894
4899
let ExeDomain = SSEPackedSingle in {
4895
4900
defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd,
4896
- memopv4f32>;
4901
+ SSE_HADDSUB, memopv4f32>;
4897
4902
defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub,
4898
- memopv4f32>;
4903
+ SSE_HADDSUB, memopv4f32>;
4899
4904
}
4900
4905
let ExeDomain = SSEPackedDouble in {
4901
4906
defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd,
4902
- memopv2f64>;
4907
+ SSE_HADDSUB, memopv2f64>;
4903
4908
defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub,
4904
- memopv2f64>;
4909
+ SSE_HADDSUB, memopv2f64>;
4905
4910
}
4906
4911
}
4907
4912
4908
4913
//===---------------------------------------------------------------------===//
4909
4914
// SSSE3 - Packed Absolute Instructions
4910
4915
//===---------------------------------------------------------------------===//
4911
4916
4917
+ let Sched = WriteVecALU in
4918
+ def SSE_PABS : OpndItins<
4919
+ IIC_SSE_PABS_RR, IIC_SSE_PABS_RM
4920
+ >;
4912
4921
4913
4922
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
4914
4923
multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
4915
- SDNode OpNode, PatFrag ld_frag> {
4924
+ SDNode OpNode, OpndItins itins, PatFrag ld_frag> {
4916
4925
def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
4917
4926
(ins VR128:$src),
4918
4927
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4919
4928
[(set VR128:$dst, (vt (OpNode VR128:$src)))],
4920
- IIC_SSE_PABS_RR >, Sched<[WriteVecALU ]>;
4929
+ itins.rr >, Sched<[itins.Sched ]>;
4921
4930
4922
4931
def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
4923
4932
(ins i128mem:$src),
4924
4933
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4925
4934
[(set VR128:$dst,
4926
4935
(vt (OpNode (bitconvert (ld_frag addr:$src)))))],
4927
- IIC_SSE_PABS_RM >, Sched<[WriteVecALULd ]>;
4936
+ itins.rm >, Sched<[itins.Sched.Folded ]>;
4928
4937
}
4929
4938
4930
4939
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
4931
4940
multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
4932
- SDNode OpNode> {
4941
+ SDNode OpNode, OpndItins itins > {
4933
4942
def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
4934
4943
(ins VR256:$src),
4935
4944
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4936
- [(set VR256:$dst, (vt (OpNode VR256:$src)))]>,
4937
- Sched<[WriteVecALU ]>;
4945
+ [(set VR256:$dst, (vt (OpNode VR256:$src)))], itins.rr >,
4946
+ Sched<[itins.Sched ]>;
4938
4947
4939
4948
def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
4940
4949
(ins i256mem:$src),
4941
4950
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4942
4951
[(set VR256:$dst,
4943
- (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>,
4944
- Sched<[WriteVecALULd ]>;
4952
+ (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))], itins.rm >,
4953
+ Sched<[itins.Sched.Folded ]>;
4945
4954
}
4946
4955
4947
4956
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
4948
- defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG;
4949
- defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG;
4957
+ defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG;
4958
+ defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG;
4950
4959
}
4951
4960
let Predicates = [HasAVX, NoVLX] in {
4952
- defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG;
4961
+ defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG;
4953
4962
}
4954
4963
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
4955
- defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG;
4956
- defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG;
4964
+ defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SSE_PABS >, VEX, VEX_L, VEX_WIG;
4965
+ defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SSE_PABS >, VEX, VEX_L, VEX_WIG;
4957
4966
}
4958
4967
let Predicates = [HasAVX2, NoVLX] in {
4959
- defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG;
4968
+ defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SSE_PABS >, VEX, VEX_L, VEX_WIG;
4960
4969
}
4961
4970
4962
- defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>;
4963
- defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>;
4964
- defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
4971
+ defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SSE_PABS, memopv2i64>;
4972
+ defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SSE_PABS, memopv2i64>;
4973
+ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SSE_PABS, memopv2i64>;
4965
4974
4966
4975
//===---------------------------------------------------------------------===//
4967
4976
// SSSE3 - Packed Binary Operator Instructions
@@ -5181,9 +5190,14 @@ defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
5181
5190
// SSSE3 - Packed Align Instruction Patterns
5182
5191
//===---------------------------------------------------------------------===//
5183
5192
5193
+ let Sched = WriteShuffle in
5194
+ def SSE_PALIGN : OpndItins<
5195
+ IIC_SSE_PALIGNRR, IIC_SSE_PALIGNRM
5196
+ >;
5197
+
5184
5198
multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
5185
5199
PatFrag memop_frag, X86MemOperand x86memop,
5186
- bit Is2Addr = 1> {
5200
+ OpndItins itins, bit Is2Addr = 1> {
5187
5201
let hasSideEffects = 0 in {
5188
5202
def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst),
5189
5203
(ins RC:$src1, RC:$src2, u8imm:$src3),
@@ -5192,7 +5206,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
5192
5206
!strconcat(asm,
5193
5207
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5194
5208
[(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))],
5195
- IIC_SSE_PALIGNRR >, Sched<[WriteShuffle ]>;
5209
+ itins.rr >, Sched<[itins.Sched ]>;
5196
5210
let mayLoad = 1 in
5197
5211
def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
5198
5212
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
@@ -5203,19 +5217,19 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
5203
5217
[(set RC:$dst, (VT (X86PAlignr RC:$src1,
5204
5218
(bitconvert (memop_frag addr:$src2)),
5205
5219
(i8 imm:$src3))))],
5206
- IIC_SSE_PALIGNRM >, Sched<[WriteShuffleLd , ReadAfterLd]>;
5220
+ itins.rm >, Sched<[itins.Sched.Folded , ReadAfterLd]>;
5207
5221
}
5208
5222
}
5209
5223
5210
5224
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
5211
5225
defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64,
5212
- i128mem, 0>, VEX_4V, VEX_WIG;
5226
+ i128mem, SSE_PALIGN, 0>, VEX_4V, VEX_WIG;
5213
5227
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
5214
5228
defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64,
5215
- i256mem, 0>, VEX_4V, VEX_L, VEX_WIG;
5229
+ i256mem, SSE_PALIGN, 0>, VEX_4V, VEX_L, VEX_WIG;
5216
5230
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
5217
5231
defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64,
5218
- i128mem>;
5232
+ i128mem, SSE_PALIGN >;
5219
5233
5220
5234
//===---------------------------------------------------------------------===//
5221
5235
// SSSE3 - Thread synchronization
0 commit comments