diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fdf718db09d1..f26578a0a9ee 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -538,7 +538,8 @@ class T2FourReg opc22_20, bits<4> opc7_4, string opc, list pattern> : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, - opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> { + opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -556,7 +557,8 @@ class T2MlaLong opc22_20, bits<4> opc7_4, string opc> : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -977,7 +979,8 @@ multiclass T2I_ld opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]>, + Sched<[WriteLd]> { bits<4> Rt; bits<17> addr; let Inst{31-25} = 0b1111100; @@ -993,7 +996,8 @@ multiclass T2I_ld opcod, string opc, } def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>, + Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; let Inst{31-27} = 0b11111; @@ -1015,7 +1019,8 @@ multiclass T2I_ld opcod, string opc, } def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]>, + Sched<[WriteLd]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -1039,7 +1044,8 @@ multiclass T2I_ld opcod, string opc, // from the PC. def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { + [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]>, + Sched<[WriteLd]> { let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1065,7 +1071,8 @@ multiclass T2I_st opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_imm12:$addr)]> { + [(opnode target:$Rt, t2addrmode_imm12:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0001; let Inst{22-21} = opcod; @@ -1082,7 +1089,8 @@ multiclass T2I_st opcod, string opc, } def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> { + [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -1102,7 +1110,8 @@ multiclass T2I_st opcod, string opc, } def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> { + [(opnode target:$Rt, t2addrmode_so_reg:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -1140,7 +1149,8 @@ class T2I_ext_rrot opcod, string opc> (outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), opc, ".w\t$Rd, $Rm$rot", []>, - Requires<[IsThumb2]>; + Requires<[IsThumb2]>, + Sched<[WriteALU, ReadALU]>; // UXTB16, SXTB16 - Requires HasDSP, does not need the .w qualifier. class T2I_ext_rrot_xtb16 opcod, string opc> @@ -1148,7 +1158,8 @@ class T2I_ext_rrot_xtb16 opcod, string opc> (outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), opc, "\t$Rd, $Rm$rot", []>, - Requires<[HasDSP, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>, + Sched<[WriteALU, ReadALU]>; /// T2I_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. @@ -1156,7 +1167,8 @@ class T2I_exta_rrot opcod, string opc> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>, - Requires<[HasDSP, IsThumb2]> { + Requires<[HasDSP, IsThumb2]>, + Sched<[WriteALU, ReadALU]> { bits<2> rot; let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -1246,7 +1258,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>; + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>, + Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 @@ -1300,17 +1313,20 @@ let mayLoad = 1, hasSideEffects = 0 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, - "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), @@ -1320,41 +1336,45 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []>; + []>, Sched<[WriteLd]>; def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []>; + []>, Sched<[WriteLd]>; def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT type, string opc, InstrItinClass ii> : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc, - "\t$Rt, $addr", []> { + "\t$Rt, $addr", []>, Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; let Inst{31-27} = 0b11111; @@ -1398,11 +1418,14 @@ class T2Ildacq bits23_20, bits<2> bit54, dag oops, dag iops, } def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; // Store defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>; @@ -1415,7 +1438,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>, + Sched<[WriteST]>; // Indexed stores @@ -1424,19 +1448,22 @@ def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; } // mayStore = 1, hasSideEffects = 0 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), @@ -1447,7 +1474,8 @@ def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_store GPRnopc:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, addr_offset_none:$Rn, @@ -1457,7 +1485,8 @@ def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, addr_offset_none:$Rn, @@ -1467,7 +1496,8 @@ def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; // Pseudo-instructions for pattern matching the pre-indexed stores. We can't // put the patterns on the instruction definitions directly as ISel wants @@ -1480,17 +1510,20 @@ def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; } // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly @@ -1498,7 +1531,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 class T2IstT type, string opc, InstrItinClass ii> : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, - "\t$Rt, $addr", []> { + "\t$Rt, $addr", []>, Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = 0; // not signed @@ -1524,7 +1557,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; let mayLoad = 1 in def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { + "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []>, + Sched<[WriteLd]> { let DecoderMethod = "DecodeT2LDRDPreInstruction"; } @@ -1532,13 +1566,13 @@ let mayLoad = 1 in def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm", - "$addr.base = $wb", []>; + "$addr.base = $wb", []>, Sched<[WriteLd]>; let mayStore = 1 in def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", - "$addr.base = $wb", []> { + "$addr.base = $wb", []>, Sched<[WriteST]> { let DecoderMethod = "DecodeT2STRDPreInstruction"; } @@ -1547,12 +1581,13 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", - "$addr.base = $wb", []>; + "$addr.base = $wb", []>, Sched<[WriteST]>; class T2Istrrel bit54, dag oops, dag iops, string opc, string asm, list pattern> : Thumb2I, Requires<[IsThumb, HasAcquireRelease]> { + asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]>, + Sched<[WriteST]> { bits<4> Rt; bits<4> addr; @@ -1893,10 +1928,11 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, def : InstAlias<"mov${p} $Rd, $imm", (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>; def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; let Constraints = "$src = $Rd" in { def t2MOVTi16 : T2I<(outs rGPR:$Rd), @@ -2530,7 +2566,8 @@ def : T2Pat<(t2_so_imm_not:$src), let isCommutable = 1 in def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2541,7 +2578,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, class T2FourRegMLA op7_4, string opc, list pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, UseMulOps]> { + Requires<[IsThumb2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2575,7 +2613,8 @@ class T2SMMUL op7_4, string opc, list pattern> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, opc, "\t$Rd, $Rn, $Rm", pattern>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2590,7 +2629,8 @@ class T2FourRegSMMLA op22_20, bits<4> op7_4, string opc, list pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2607,7 +2647,8 @@ class T2ThreeRegSMUL op22_20, bits<2> op5_4, string opc, list pattern> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm", pattern>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2642,7 +2683,8 @@ class T2FourRegSMLA op22_20, bits<2> op5_4, string opc, list pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2680,7 +2722,8 @@ class T2SMLAL op22_20, bits<4> op7_4, string opc, list pattern> (outs rGPR:$Ra, rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; // Halfword multiple accumulate long: SMLAL def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>; @@ -2693,7 +2736,8 @@ class T2DualHalfMul op22_20, bits<4> op7_4, string opc> (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{15-12} = 0b1111; } @@ -2720,7 +2764,8 @@ class T2DualHalfMulAddLong op22_20, bits<4> op7_4, string opc> (outs rGPR:$Ra, rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">; def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">; @@ -2734,7 +2779,8 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]> { + Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -2745,7 +2791,8 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]> { + Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index c01e43976865..87eb4c2b9074 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -89,9 +89,10 @@ def ReadMAC : SchedRead; // Divisions. def WriteDIV : SchedWrite; -// Loads. +// Loads/Stores. def WriteLd : SchedWrite; def WritePreLd : SchedWrite; +def WriteST : SchedWrite; // Branches. def WriteBr : SchedWrite; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 51c7ae4eb692..8fb8a2a3b6d2 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -2003,6 +2003,7 @@ def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } // Load Integer. def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } +def : SchedAlias; // Load the upper 32-bits using the same micro-op. def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; let NumMicroOps = 0; } @@ -2482,6 +2483,7 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; +def : SchedAlias; // ===---------------------------------------------------------------------===// // Floating-point. Map target defined SchedReadWrite to processor specific ones diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td index 36754eec68b9..59949344399c 100644 --- a/lib/Target/ARM/ARMScheduleR52.td +++ b/lib/Target/ARM/ARMScheduleR52.td @@ -77,10 +77,6 @@ def : WriteRes { let Latency = 8; let ResourceCycles = [8]; // not pipelined } -// Loads -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } - // Branches - LR written in Late EX2 def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 0; } @@ -167,6 +163,9 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { @@ -340,15 +339,6 @@ def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>; def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>; def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>; -//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>; -//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>; -//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>; - -//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>; -//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>; -//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>; - - // Integer Load, Multiple. foreach Lat = 3-25 in { def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> { diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index 1e9e4de91508..4b6f08ed6e50 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -133,6 +133,8 @@ let SchedModel = SwiftModel in { def : SchedAlias; def : ReadAdvance; def : SchedAlias; + def : SchedAlias; + def : SchedAlias; def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[ diff --git a/test/CodeGen/ARM/misched-int-basic-thumb2.mir b/test/CodeGen/ARM/misched-int-basic-thumb2.mir new file mode 100644 index 000000000000..86ef1e26f636 --- /dev/null +++ b/test/CodeGen/ARM/misched-int-basic-thumb2.mir @@ -0,0 +1,175 @@ +# Basic machine sched model test for Thumb2 int instructions +# RUN: llc -o /dev/null %s -mtriple=thumbv7-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +# RUN: llc -o /dev/null %s -mtriple=thumbv7--eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +# RUN: llc -o /dev/null %s -mtriple=thumbv8r-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +# REQUIRES: asserts +--- | + ; ModuleID = 'foo.ll' + source_filename = "foo.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7---eabi" + + @g1 = common global i32 0, align 4 + @g2 = common global i32 0, align 4 + + define i64 @foo(i16 signext %a, i16 signext %b) { + entry: + %0 = load i32, i32* @g1, align 4 + %1 = load i32, i32* @g2, align 4 + %2 = add nuw nsw i32 %0, %0 + %3 = sdiv i32 %2, %1 + store i32 %3, i32* @g1, align 4 + %d = mul nsw i16 %a, %a + %e = mul nsw i16 %b, %b + %f = add nuw nsw i16 %e, %d + %c = zext i16 %f to i32 + %mul8 = mul nsw i32 %c, %3 + %mul9 = mul nsw i32 %mul8, %mul8 + %add10 = add nuw nsw i32 %mul9, %mul8 + %conv1130 = zext i32 %add10 to i64 + %mul12 = mul nuw nsw i64 %conv1130, %conv1130 + %mul13 = mul nsw i64 %mul12, %mul12 + %add14 = add nuw nsw i64 %mul13, %mul12 + ret i64 %add14 + } +# +# CHECK: ********** MI Scheduling ********** +# CHECK: SU(2): %vreg2 = t2MOVi32imm ; rGPR:%vreg2 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 2 +# CHECK_R52: Latency : 2 +# +# CHECK: SU(3): %vreg3 = t2LDRi12 %vreg2, 0, pred:14, pred:%noreg; mem:LD4[@g1](dereferenceable) rGPR:%vreg3,%vreg2 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 3 +# CHECK_R52: Latency : 4 +# +# CHECK : SU(6): %vreg6 = t2ADDrr %vreg3, %vreg3, pred:14, pred:%noreg, opt:%noreg; rGPR:%vreg6,%vreg3,%vreg3 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 1 +# CHECK_R52: Latency : 3 + +# CHECK: SU(7): %vreg7 = t2SDIV %vreg6, %vreg5, pred:14, pred:%noreg; rGPR:%vreg7,%vreg6,%vreg5 +# CHECK_A9: Latency : 0 +# CHECK_SWIFT: Latency : 14 +# CHECK_R52: Latency : 8 + +# CHECK: SU(8): t2STRi12 %vreg7, %vreg2, 0, pred:14, pred:%noreg; mem:ST4[@g1] rGPR:%vreg7,%vreg2 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 0 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(9): %vreg8 = t2SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; rGPR:%vreg8,%vreg1,%vreg1 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(10): %vreg9 = t2SMLABB %vreg0, %vreg0, %vreg8, pred:14, pred:%noreg; rGPR:%vreg9,%vreg0,%vreg0,%vreg8 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(11): %vreg10 = t2UXTH %vreg9, 0, pred:14, pred:%noreg; rGPR:%vreg10,%vreg9 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 1 +# CHECK_R52: Latency : 3 +# +# CHECK: SU(12): %vreg11 = t2MUL %vreg10, %vreg7, pred:14, pred:%noreg; rGPR:%vreg11,%vreg10,%vreg7 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(13): %vreg12 = t2MLA %vreg11, %vreg11, %vreg11, pred:14, pred:%noreg; rGPR:%vreg12,%vreg11,%vreg11,%vreg11 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(14): %vreg13, %vreg14 = t2UMULL %vreg12, %vreg12, pred:14, pred:%noreg; rGPR:%vreg13,%vreg14,%vreg12,%vreg12 +# CHECK_A9: Latency : 3 +# CHECK_SWIFT: Latency : 5 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(18): %vreg19, %vreg20 = t2UMLAL %vreg12, %vreg12, %vreg19, %vreg20, pred:14, pred:%noreg; rGPR:%vreg19,%vreg20,%vreg12,%vreg12,%vreg20 +# CHECK_A9: Latency : 3 +# CHECK_SWIFT: Latency : 7 +# CHECK_R52: Latency : 4 +# CHECK: ** ScheduleDAGMILive::schedule picking next node +... +--- +name: foo +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: rgpr } + - { id: 1, class: rgpr } + - { id: 2, class: rgpr } + - { id: 3, class: rgpr } + - { id: 4, class: rgpr } + - { id: 5, class: rgpr } + - { id: 6, class: rgpr } + - { id: 7, class: rgpr } + - { id: 8, class: rgpr } + - { id: 9, class: rgpr } + - { id: 10, class: rgpr } + - { id: 11, class: rgpr } + - { id: 12, class: rgpr } + - { id: 13, class: rgpr } + - { id: 14, class: rgpr } + - { id: 15, class: rgpr } + - { id: 16, class: rgpr } + - { id: 17, class: rgpr } + - { id: 18, class: rgpr } + - { id: 19, class: rgpr } + - { id: 20, class: rgpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r0, %r1 + + %1 = COPY %r1 + %0 = COPY %r0 + %2 = t2MOVi32imm @g1 + %3 = t2LDRi12 %2, 0, 14, _ :: (dereferenceable load 4 from @g1) + %4 = t2MOVi32imm @g2 + %5 = t2LDRi12 %4, 0, 14, _ :: (dereferenceable load 4 from @g2) + %6 = t2ADDrr %3, %3, 14, _, _ + %7 = t2SDIV %6, %5, 14, _ + t2STRi12 %7, %2, 0, 14, _ :: (store 4 into @g1) + %8 = t2SMULBB %1, %1, 14, _ + %9 = t2SMLABB %0, %0, %8, 14, _ + %10 = t2UXTH %9, 0, 14, _ + %11 = t2MUL %10, %7, 14, _ + %12 = t2MLA %11, %11, %11, 14, _ + %13, %14 = t2UMULL %12, %12, 14, _ + %19, %16 = t2UMULL %13, %13, 14, _ + %17 = t2MLA %13, %14, %16, 14, _ + %20 = t2MLA %13, %14, %17, 14, _ + %19, %20 = t2UMLAL %12, %12, %19, %20, 14, _ + %r0 = COPY %19 + %r1 = COPY %20 + tBX_RET 14, _, implicit %r0, implicit %r1 + +...