Skip to content

Commit

Permalink
[ARM] Do not select SMULW[BT] or SMLAW[BT]
Browse files Browse the repository at this point in the history
The current instruction selection patterns for SMULW[BT] and SMLAW[BT]
are incorrect. These instructions multiply a 32-bit and a 16-bit value
(both signed) and return the top 32 bits of the 48-bit result. This
preserves the 16 bits of overflow, whereas the patterns they currently
match truncate the result to 16 bits then sign extend.

To select these instructions, we would need to match an ISD::SMUL_LOHI,
a sign extend, two shifts and an or. There is no way to match SMUL_LOHI
in an instruction pattern as it defines multiple values, so this would
have to be done in C++. I have raised
http://llvm.org/bugs/show_bug.cgi?id=21297 to cover allowing correct
selection of these instructions.

This fixes http://llvm.org/bugs/show_bug.cgi?id=19396



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220196 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
ostannard committed Oct 20, 2014
1 parent 508c393 commit 19d010b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 30 deletions.
26 changes: 4 additions & 22 deletions lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3942,14 +3942,12 @@ multiclass AI_smul<string opc, PatFrag opnode> {

def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (sra (opnode GPR:$Rn,
(sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
[]>,
Requires<[IsARM, HasV5TE]>;

def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (sra (opnode GPR:$Rn,
(sra GPR:$Rm, (i32 16))), (i32 16)))]>,
[]>,
Requires<[IsARM, HasV5TE]>;
}

Expand Down Expand Up @@ -3991,17 +3989,13 @@ multiclass AI_smla<string opc, PatFrag opnode> {
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>,
[]>,
Requires<[IsARM, HasV5TE, UseMulOps]>;

def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd,
(add GPR:$Ra, (sra (opnode GPRnopc:$Rn,
(sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>,
[]>,
Requires<[IsARM, HasV5TE, UseMulOps]>;
}
}
Expand Down Expand Up @@ -5326,11 +5320,6 @@ def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
(SMULTB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
(SMULTB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
(i32 16)),
(SMULWB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
(SMULWB GPR:$a, GPR:$b)>;

def : ARMV5MOPat<(add GPR:$acc,
(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
Expand All @@ -5353,13 +5342,6 @@ def : ARMV5MOPat<(add GPR:$acc,
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
(i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
(sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
(SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;


// Pre-v7 uses MCR for synchronization barriers.
Expand Down
12 changes: 4 additions & 8 deletions lib/Target/ARM/ARMInstrThumb2.td
Original file line number Diff line number Diff line change
Expand Up @@ -2726,8 +2726,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {

def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
(sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
[]>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
Expand All @@ -2739,8 +2738,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {

def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
(sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
[]>,
Requires<[IsThumb2, HasThumb2DSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
Expand Down Expand Up @@ -2809,8 +2807,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WB : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
[]>,
Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
Expand All @@ -2822,8 +2819,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WT : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
(sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
[]>,
Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
Expand Down
26 changes: 26 additions & 0 deletions test/CodeGen/ARM/smulw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; RUN: llc -mtriple=arm--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s

; We cannot codegen the smulw[bt] or smlaw[bt] instructions for these functions,
; as the top 16 bits of the result would differ

define i32 @f1(i32 %a, i16 %b) {
; CHECK-LABEL: f1:
; CHECK: mul
; CHECK: asr
%tmp1 = sext i16 %b to i32
%tmp2 = mul i32 %a, %tmp1
%tmp3 = ashr i32 %tmp2, 16
ret i32 %tmp3
}

define i32 @f2(i32 %a, i16 %b, i32 %c) {
; CHECK-LABEL: f2:
; CHECK: mul
; CHECK: add{{.*}}, asr #16
%tmp1 = sext i16 %b to i32
%tmp2 = mul i32 %a, %tmp1
%tmp3 = ashr i32 %tmp2, 16
%tmp4 = add i32 %tmp3, %c
ret i32 %tmp4
}

0 comments on commit 19d010b

Please sign in to comment.