Skip to content

Commit

Permalink
[AArch64] Cortex-A57 FDIV/FSQRT scheduling fix (W-unit)
Browse files Browse the repository at this point in the history
According to the Cortex-A57 doc, FDIV/FSQRT instructions should use F0 unit
(W-unit in AArch64SchedA57.td, the same as cryptography instructions),
not F1 unit (X-unit in td, like ASIMD absolute diff accum SABA/UABA).

This patch changes FDIV/FSQRT scheduling declarations to use A57UnitW
instead of A57UnitX. Also, latencies for those instructions are
corrected.

Patch by Andrew Zhogin.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290426 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
rengolin committed Dec 23, 2016
1 parent 6c22759 commit acb4edf
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
22 changes: 11 additions & 11 deletions lib/Target/AArch64/AArch64SchedA57.td
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>;
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
def : SchedAlias<WriteV, A57Write_3cyc_1V>;
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
Expand Down Expand Up @@ -444,19 +444,19 @@ def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f
def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;

// ASIMD FP divide, D-form, F32
def : InstRW<[A57Write_18cyc_1X], (instregex "FDIVv2f32")>;
def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>;
// ASIMD FP divide, Q-form, F32
def : InstRW<[A57Write_36cyc_2X], (instregex "FDIVv4f32")>;
def : InstRW<[A57Write_34cyc_2W], (instregex "FDIVv4f32")>;
// ASIMD FP divide, Q-form, F64
def : InstRW<[A57Write_64cyc_2X], (instregex "FDIVv2f64")>;
def : InstRW<[A57Write_64cyc_2W], (instregex "FDIVv2f64")>;

// Note: These were simply duplicated from ASIMD FDIV because of missing documentation
// ASIMD FP square root, D-form, F32
def : InstRW<[A57Write_18cyc_1X], (instregex "FSQRTv2f32")>;
def : InstRW<[A57Write_17cyc_1W], (instregex "FSQRTv2f32")>;
// ASIMD FP square root, Q-form, F32
def : InstRW<[A57Write_36cyc_2X], (instregex "FSQRTv4f32")>;
def : InstRW<[A57Write_34cyc_2W], (instregex "FSQRTv4f32")>;
// ASIMD FP square root, Q-form, F64
def : InstRW<[A57Write_64cyc_2X], (instregex "FSQRTv2f64")>;
def : InstRW<[A57Write_64cyc_2W], (instregex "FSQRTv2f64")>;

// ASIMD FP max/min, normal, D-form
def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>;
Expand Down Expand Up @@ -551,15 +551,15 @@ def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^
def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>;

def : InstRW<[A57Write_32cyc_1X], (instrs FDIVDrr)>;
def : InstRW<[A57Write_18cyc_1X], (instrs FDIVSrr)>;
def : InstRW<[A57Write_32cyc_1W], (instrs FDIVDrr)>;
def : InstRW<[A57Write_17cyc_1W], (instrs FDIVSrr)>;

def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>;

def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>;

def : InstRW<[A57Write_32cyc_1X], (instrs FSQRTDr)>;
def : InstRW<[A57Write_18cyc_1X], (instrs FSQRTSr)>;
def : InstRW<[A57Write_32cyc_1W], (instrs FSQRTDr)>;
def : InstRW<[A57Write_17cyc_1W], (instrs FSQRTSr)>;

def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>;
def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>;
Expand Down
14 changes: 7 additions & 7 deletions lib/Target/AArch64/AArch64SchedA57WriteRes.td
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
let ResourceCycles = [18]; }
def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
let ResourceCycles = [17]; }
def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
let ResourceCycles = [19]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
let ResourceCycles = [32]; }
def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
let ResourceCycles = [35]; }
Expand All @@ -54,7 +54,7 @@ def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
//===----------------------------------------------------------------------===//
// Define Generic 2 micro-op types

def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
def A57Write_64cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
let Latency = 64;
let NumMicroOps = 2;
let ResourceCycles = [32, 32];
Expand Down Expand Up @@ -139,10 +139,10 @@ def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
let Latency = 2;
let NumMicroOps = 2;
}
def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
let Latency = 36;
def A57Write_34cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
let Latency = 34;
let NumMicroOps = 2;
let ResourceCycles = [18, 18];
let ResourceCycles = [17, 17];
}
def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
A57UnitM]> {
Expand Down

0 comments on commit acb4edf

Please sign in to comment.