From acb4edfa667bcc9617270e12dd3548e6314c57a8 Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Fri, 23 Dec 2016 12:51:41 +0000 Subject: [PATCH] [AArch64] Cortex-A57 FDIV/FSQRT scheduling fix (W-unit) According to the Cortex-A57 doc, FDIV/FSQRT instructions should use F0 unit (W-unit in AArch64SchedA57.td, the same as cryptography instructions), not F1 unit (X-unit in td, like ASIMD absolute diff accum SABA/UABA). This patch changes FDIV/FSQRT scheduling declarations to use A57UnitW instead of A57UnitX. Also, latencies for those instructions are corrected. Patch by Andrew Zhogin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64SchedA57.td | 22 +++++++++---------- lib/Target/AArch64/AArch64SchedA57WriteRes.td | 14 ++++++------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td index a266351f7ffc..99c48d0146e4 100644 --- a/lib/Target/AArch64/AArch64SchedA57.td +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -92,7 +92,7 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; -def : SchedAlias; +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; @@ -444,19 +444,19 @@ def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP divide, D-form, F32 -def : InstRW<[A57Write_18cyc_1X], (instregex "FDIVv2f32")>; +def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>; // ASIMD FP divide, Q-form, F32 -def : InstRW<[A57Write_36cyc_2X], (instregex "FDIVv4f32")>; +def : InstRW<[A57Write_34cyc_2W], (instregex "FDIVv4f32")>; // ASIMD FP divide, Q-form, F64 -def : InstRW<[A57Write_64cyc_2X], (instregex "FDIVv2f64")>; +def : InstRW<[A57Write_64cyc_2W], (instregex "FDIVv2f64")>; // Note: These were simply duplicated from ASIMD FDIV because of missing documentation // ASIMD FP square root, D-form, F32 -def : InstRW<[A57Write_18cyc_1X], (instregex "FSQRTv2f32")>; +def : InstRW<[A57Write_17cyc_1W], (instregex "FSQRTv2f32")>; // ASIMD FP square root, Q-form, F32 -def : InstRW<[A57Write_36cyc_2X], (instregex "FSQRTv4f32")>; +def : InstRW<[A57Write_34cyc_2W], (instregex "FSQRTv4f32")>; // ASIMD FP square root, Q-form, F64 -def : InstRW<[A57Write_64cyc_2X], (instregex "FSQRTv2f64")>; +def : InstRW<[A57Write_64cyc_2W], (instregex "FSQRTv2f64")>; // ASIMD FP max/min, normal, D-form def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>; @@ -551,15 +551,15 @@ def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^ def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>; -def : InstRW<[A57Write_32cyc_1X], (instrs FDIVDrr)>; -def : InstRW<[A57Write_18cyc_1X], (instrs FDIVSrr)>; +def : InstRW<[A57Write_32cyc_1W], (instrs FDIVDrr)>; +def : InstRW<[A57Write_17cyc_1W], (instrs FDIVSrr)>; def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>; def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>; -def : InstRW<[A57Write_32cyc_1X], (instrs FSQRTDr)>; -def : InstRW<[A57Write_18cyc_1X], (instrs FSQRTSr)>; +def : InstRW<[A57Write_32cyc_1W], (instrs FSQRTDr)>; +def : InstRW<[A57Write_17cyc_1W], (instrs FSQRTSr)>; def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>; def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>; diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td index 6f30108a23e3..55005e1d9ed1 100644 --- a/lib/Target/AArch64/AArch64SchedA57WriteRes.td +++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td @@ -28,15 +28,15 @@ def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } -def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; - let ResourceCycles = [18]; } +def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; + let ResourceCycles = [17]; } def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; let ResourceCycles = [19]; } def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } -def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; +def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; let ResourceCycles = [32]; } def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; let ResourceCycles = [35]; } @@ -54,7 +54,7 @@ def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } //===----------------------------------------------------------------------===// // Define Generic 2 micro-op types -def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { +def A57Write_64cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { let Latency = 64; let NumMicroOps = 2; let ResourceCycles = [32, 32]; @@ -139,10 +139,10 @@ def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { let Latency = 2; let NumMicroOps = 2; } -def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { - let Latency = 36; +def A57Write_34cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 34; let NumMicroOps = 2; - let ResourceCycles = [18, 18]; + let ResourceCycles = [17, 17]; } def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, A57UnitM]> {