Skip to content

Commit

Permalink
[mips][msa] Prevent output operand from commuting for dpadd_[su].df ins
Browse files Browse the repository at this point in the history
Implementation of TargetInstrInfo::findCommutedOpIndices for MIPS target,
restricting commutativity to second and third operand only for
dpaadd_[su].df instructions therein.

Prior to this change, there were cases where the vector that is to be added
to the dot product of the other two could take a position other than the
first one in the instruction, generating false output in the destination
vector.

Such behavior has been noticed in the two functions generating v2i64 output
values so far. Other ones may exhibit such behavior as well, just not for
the vector operands which are present in the test at the moment.

Tests altered so that the function's first operand is a constant splat so
that it can be loaded with a ldi instruction, since that is the case in
which the erroneous instruction operand placement has occurred. We check
that the register which is present in the ldi instruction is placed as the
first operand in the corresponding dpadd instruction.

Patch by Stefan Maksimovic.

Differential Revision: https://reviews.llvm.org/D30827



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299223 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
petar-jovanovic committed Mar 31, 2017
1 parent 1cbe5a4 commit 7fb9f75
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 48 deletions.
28 changes: 28 additions & 0 deletions lib/Target/Mips/MipsInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,3 +501,31 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
return MIB;
}

bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
assert(!MI.isBundle() &&
"TargetInstrInfo::findCommutedOpIndices() can't handle bundles");

const MCInstrDesc &MCID = MI.getDesc();
if (!MCID.isCommutable())
return false;

switch (MI.getOpcode()) {
case Mips::DPADD_U_H:
case Mips::DPADD_U_W:
case Mips::DPADD_U_D:
case Mips::DPADD_S_H:
case Mips::DPADD_S_W:
case Mips::DPADD_S_D: {
// The first operand is both input and output, so it should not commute
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3))
return false;

if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
return false;
return true;
}
}
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
}
3 changes: 3 additions & 0 deletions lib/Target/Mips/MipsInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ class MipsInstrInfo : public MipsGenInstrInfo {
MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
MachineBasicBlock::iterator I) const;

bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;

protected:
bool isZeroImm(const MachineOperand &op) const;

Expand Down
84 changes: 36 additions & 48 deletions test/CodeGen/Mips/msa/3r_4r_widen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,16 @@
; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s

@llvm_mips_dpadd_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_dpadd_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
@llvm_mips_dpadd_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
@llvm_mips_dpadd_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16

define void @llvm_mips_dpadd_s_h_test() nounwind {
entry:
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
%2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_s_h_RES
%0 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
%2 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>, <16 x i8> %0, <16 x i8> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_dpadd_s_h_RES
ret void
}

Expand All @@ -25,23 +23,21 @@ declare <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
; CHECK: llvm_mips_dpadd_s_h_test:
; CHECK: ld.b
; CHECK: ld.b
; CHECK: ld.h
; CHECK: dpadd_s.h
; CHECK: ldi.h [[R1:\$w[0-9]+]],
; CHECK: dpadd_s.h [[R1]],
; CHECK: st.h
; CHECK: .size llvm_mips_dpadd_s_h_test
;
@llvm_mips_dpadd_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_dpadd_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
@llvm_mips_dpadd_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
@llvm_mips_dpadd_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16

define void @llvm_mips_dpadd_s_w_test() nounwind {
entry:
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
%2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_s_w_RES
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
%2 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> <i32 4, i32 4, i32 4, i32 4>, <8 x i16> %0, <8 x i16> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_dpadd_s_w_RES
ret void
}

Expand All @@ -50,48 +46,44 @@ declare <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
; CHECK: llvm_mips_dpadd_s_w_test:
; CHECK: ld.h
; CHECK: ld.h
; CHECK: ld.w
; CHECK: dpadd_s.w
; CHECK: ldi.w [[R1:\$w[0-9]+]],
; CHECK: dpadd_s.w [[R1]],
; CHECK: st.w
; CHECK: .size llvm_mips_dpadd_s_w_test
;
@llvm_mips_dpadd_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_dpadd_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
@llvm_mips_dpadd_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
@llvm_mips_dpadd_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16

define void @llvm_mips_dpadd_s_d_test() nounwind {
entry:
%0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
%2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_s_d_RES
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
%2 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> <i64 4, i64 4>, <4 x i32> %0, <4 x i32> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_dpadd_s_d_RES
ret void
}

declare <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind

; CHECK: llvm_mips_dpadd_s_d_test:
; CHECK: ldi.d [[R1:\$w[0-9]+]],
; CHECK: ld.w
; CHECK: ld.w
; CHECK: ld.d
; CHECK: dpadd_s.d
; CHECK: dpadd_s.d [[R1]],
; CHECK: st.d
; CHECK: .size llvm_mips_dpadd_s_d_test
;
@llvm_mips_dpadd_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_dpadd_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
@llvm_mips_dpadd_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
@llvm_mips_dpadd_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16

define void @llvm_mips_dpadd_u_h_test() nounwind {
entry:
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
%2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_u_h_RES
%0 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
%2 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>, <16 x i8> %0, <16 x i8> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_dpadd_u_h_RES
ret void
}

Expand All @@ -100,23 +92,21 @@ declare <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
; CHECK: llvm_mips_dpadd_u_h_test:
; CHECK: ld.b
; CHECK: ld.b
; CHECK: ld.h
; CHECK: dpadd_u.h
; CHECK: ldi.h [[R1:\$w[0-9]+]],
; CHECK: dpadd_u.h [[R1]],
; CHECK: st.h
; CHECK: .size llvm_mips_dpadd_u_h_test
;
@llvm_mips_dpadd_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_dpadd_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
@llvm_mips_dpadd_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
@llvm_mips_dpadd_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16

define void @llvm_mips_dpadd_u_w_test() nounwind {
entry:
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
%2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_u_w_RES
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
%2 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> <i32 4, i32 4, i32 4, i32 4>, <8 x i16> %0, <8 x i16> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_dpadd_u_w_RES
ret void
}

Expand All @@ -125,33 +115,31 @@ declare <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
; CHECK: llvm_mips_dpadd_u_w_test:
; CHECK: ld.h
; CHECK: ld.h
; CHECK: ld.w
; CHECK: dpadd_u.w
; CHECK: ldi.w [[R1:\$w[0-9]+]],
; CHECK: dpadd_u.w [[R1]],
; CHECK: st.w
; CHECK: .size llvm_mips_dpadd_u_w_test
;
@llvm_mips_dpadd_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_dpadd_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
@llvm_mips_dpadd_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
@llvm_mips_dpadd_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16

define void @llvm_mips_dpadd_u_d_test() nounwind {
entry:
%0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
%2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_u_d_RES
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
%2 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> <i64 4, i64 4>, <4 x i32> %0, <4 x i32> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_dpadd_u_d_RES
ret void
}

declare <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind

; CHECK: llvm_mips_dpadd_u_d_test:
; CHECK: ldi.d [[R1:\$w[0-9]+]],
; CHECK: ld.w
; CHECK: ld.w
; CHECK: ld.d
; CHECK: dpadd_u.d
; CHECK: dpadd_u.d [[R1]],
; CHECK: st.d
; CHECK: .size llvm_mips_dpadd_u_d_test
;
Expand Down

0 comments on commit 7fb9f75

Please sign in to comment.