Skip to content

Commit

Permalink
Corrected Atom latencies for SSE SQRT instructions.
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181346 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
pgurd committed May 7, 2013
1 parent f931f69 commit acccd2e
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 19 deletions.
30 changes: 19 additions & 11 deletions lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -3049,12 +3049,20 @@ let isCodeGenOnly = 1 in {
/// And, we have a special variant form for a full-vector intrinsic form.

let Sched = WriteFSqrt in {
def SSE_SQRTP : OpndItins<
IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
def SSE_SQRTPS : OpndItins<
IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM
>;

def SSE_SQRTS : OpndItins<
IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
def SSE_SQRTSS : OpndItins<
IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM
>;

def SSE_SQRTPD : OpndItins<
IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM
>;

def SSE_SQRTSD : OpndItins<
IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM
>;
}

Expand Down Expand Up @@ -3319,18 +3327,18 @@ let Predicates = [HasAVX] in {

// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
SSE_SQRTS>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
SSE_SQRTSS>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd,
SSE_SQRTS>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
SSE_SQRTSD>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;

// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>,
sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>;
defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
Expand Down
12 changes: 8 additions & 4 deletions lib/Target/X86/X86Schedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,14 @@ def IIC_SSE_PINSRW : InstrItinClass;
def IIC_SSE_PABS_RR : InstrItinClass;
def IIC_SSE_PABS_RM : InstrItinClass;

def IIC_SSE_SQRTP_RR : InstrItinClass;
def IIC_SSE_SQRTP_RM : InstrItinClass;
def IIC_SSE_SQRTS_RR : InstrItinClass;
def IIC_SSE_SQRTS_RM : InstrItinClass;
def IIC_SSE_SQRTPS_RR : InstrItinClass;
def IIC_SSE_SQRTPS_RM : InstrItinClass;
def IIC_SSE_SQRTSS_RR : InstrItinClass;
def IIC_SSE_SQRTSS_RM : InstrItinClass;
def IIC_SSE_SQRTPD_RR : InstrItinClass;
def IIC_SSE_SQRTPD_RM : InstrItinClass;
def IIC_SSE_SQRTSD_RR : InstrItinClass;
def IIC_SSE_SQRTSD_RM : InstrItinClass;

def IIC_SSE_RCPP_RR : InstrItinClass;
def IIC_SSE_RCPP_RM : InstrItinClass;
Expand Down
13 changes: 9 additions & 4 deletions lib/Target/X86/X86ScheduleAtom.td
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,15 @@ def AtomItineraries : ProcessorItineraries<

InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,

InstrItinData<IIC_SSE_SQRTP_RR, [InstrStage<13, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTP_RM, [InstrStage<14, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTS_RR, [InstrStage<11, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTS_RM, [InstrStage<12, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<70, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<34, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<34, [Port0, Port1]>] >,

InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<125, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<125, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<62, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<62, [Port0, Port1]>] >,

InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
Expand Down

0 comments on commit acccd2e

Please sign in to comment.