Skip to content

Commit

Permalink
DAG: Avoid OOB when legalizing vector indexing
Browse files Browse the repository at this point in the history
If a vector index is out of bounds, the result is supposed to be
undefined but is not undefined behavior. Change the legalization
for indexing the vector on the stack so that an out of bounds
index does not create an out of bounds memory access.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291604 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Jan 10, 2017
1 parent aca5f16 commit fa5aafa
Show file tree
Hide file tree
Showing 22 changed files with 1,063 additions and 706 deletions.
7 changes: 7 additions & 0 deletions include/llvm/Target/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -3118,6 +3118,13 @@ class TargetLowering : public TargetLoweringBase {
EVT DataVT, SelectionDAG &DAG,
bool IsCompressedMemory) const;

/// Get a pointer to vector element \p Idx located in memory for a vector of
/// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
/// bounds the returned pointer is unspecified, but will be within the vector
/// bounds.
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
SDValue Idx) const;

//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
//
Expand Down
40 changes: 10 additions & 30 deletions lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,6 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
// supported by the target.
EVT VT = Tmp1.getValueType();
EVT EltVT = VT.getVectorElementType();
EVT IdxVT = Tmp3.getValueType();
EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
SDValue StackPtr = DAG.CreateStackTemporary(VT);

int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
Expand All @@ -341,13 +339,8 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
DAG.getEntryNode(), dl, Tmp1, StackPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));

// Truncate or zero extend offset to target pointer type.
Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
// Add the offset to the index.
unsigned EltSize = EltVT.getSizeInBits()/8;
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
DAG.getConstant(EltSize, dl, IdxVT));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);

// Store the scalar value.
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
// Load the updated vector.
Expand Down Expand Up @@ -1209,20 +1202,16 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
}
}

EVT VecVT = Vec.getValueType();

if (!Ch.getNode()) {
// Store the value to a temporary stack slot, then LOAD the returned part.
StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
StackPtr = DAG.CreateStackTemporary(VecVT);
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
MachinePointerInfo());
}

// Add the offset to the index.
unsigned EltSize = Vec.getScalarValueSizeInBits() / 8;
Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));

Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);

SDValue NewLoad;

Expand All @@ -1232,7 +1221,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
else
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
Vec.getValueType().getVectorElementType());
VecVT.getVectorElementType());

// Replace the chain going out of the store, by the one out of the load.
DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
Expand All @@ -1256,8 +1245,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDLoc dl(Op);

// Store the value to a temporary stack slot, then LOAD the returned part.

SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
EVT VecVT = Vec.getValueType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
Expand All @@ -1266,16 +1255,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);

// Then store the inserted part.

// Add the offset to the index.
unsigned EltSize = Vec.getScalarValueSizeInBits() / 8;

Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));

SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
StackPtr);
SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);

// Store the subvector.
Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
Expand Down
16 changes: 0 additions & 16 deletions lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1021,22 +1021,6 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
DAG.getIntPtrConstant(1, dl));
}

SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
SDValue Index) {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout()));

// Calculate the element offset and add it to the pointer.
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getSizeInBits() &&
"Converting bits to bytes lost precision");

Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
DAG.getConstant(EltSize, dl, Index.getValueType()));
return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
}

/// Build an integer with low bits Lo and high bits Hi.
SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
// Arbitrarily use dlHi for result SDLoc
Expand Down
1 change: 0 additions & 1 deletion lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
/// input operand is returned.
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);

SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);

Expand Down
7 changes: 3 additions & 4 deletions lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,6 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
GetSplitVector(Vec, Lo, Hi);

EVT VecVT = Vec.getValueType();
EVT VecElemVT = VecVT.getVectorElementType();
unsigned VecElems = VecVT.getVectorNumElements();
unsigned SubElems = SubVec.getValueType().getVectorNumElements();

Expand All @@ -872,7 +871,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());

// Store the new subvector into the specified index.
SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx);
SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
Expand Down Expand Up @@ -1003,7 +1002,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,

// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store =
Expand Down Expand Up @@ -1650,7 +1649,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());

// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo(), EltVT);
}
Expand Down
45 changes: 44 additions & 1 deletion lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3706,7 +3706,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
return Result;
}

SDValue
SDValue
TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
const SDLoc &DL, EVT DataVT,
SelectionDAG &DAG,
Expand Down Expand Up @@ -3738,6 +3738,49 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
}

static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
SDValue Idx,
EVT VecVT,
const SDLoc &dl) {
if (isa<ConstantSDNode>(Idx))
return Idx;

EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorNumElements();
if (isPowerOf2_32(NElts)) {
APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
Log2_32(NElts));
return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
DAG.getConstant(Imm, dl, IdxVT));
}

return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
DAG.getConstant(NElts - 1, dl, IdxVT));
}

SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
SDValue VecPtr, EVT VecVT,
SDValue Index) const {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout()));

EVT EltVT = VecVT.getVectorElementType();

// Calculate the element offset and add it to the pointer.
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getSizeInBits() &&
"Converting bits to bytes lost precision");

Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);

EVT IdxVT = Index.getValueType();

Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr);
}

//===----------------------------------------------------------------------===//
// Implementation of Emulated TLS Model
//===----------------------------------------------------------------------===//
Expand Down
9 changes: 6 additions & 3 deletions test/CodeGen/AArch64/arm64-neon-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -904,8 +904,9 @@ define <8 x i8> @getl(<16 x i8> %x) #0 {

; CHECK-LABEL: test_extracts_inserts_varidx_extract:
; CHECK: str q0
; CHECK: add x[[PTR:[0-9]+]], {{.*}}, w0, sxtw #1
; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], [x[[PTR]]]
; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7
; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3
; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], {{\[}}[[PTR]]{{\]}}
; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
Expand All @@ -922,7 +923,9 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
}

; CHECK-LABEL: test_extracts_inserts_varidx_insert:
; CHECK: str h0, [{{.*}}, w0, sxtw #1]
; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3
; CHECK: bfi x9, [[MASKED_IDX]], #1, #2
; CHECK: st1 { v0.h }[0], [x9]
; CHECK-DAG: ldr d[[R:[0-9]+]]
; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
Expand Down
18 changes: 11 additions & 7 deletions test/CodeGen/AArch64/arm64-nvcast.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s

; CHECK-LABEL: _test:
; CHECK: fmov.2d v0, #2.00000000
; CHECK: str q0, [sp, #-16]!
; CHECK: mov x8, sp
; CHECK: ldr s0, [x8, w1, sxtw #2]
; CHECK-DAG: fmov.2d v0, #2.00000000
; CHECK-DAG: and [[MASK_IDX:x[0-9]+]], x1, #0x3
; CHECK-DAG: mov x9, sp
; CHECK-DAG: str q0, [sp], #16
; CHECK-DAG: bfi [[PTR:x[0-9]+]], [[MASK_IDX]], #2, #2
; CHECK: ldr s0, {{\[}}[[PTR]]{{\]}}
; CHECK: str s0, [x0]

define void @test(float * %p1, i32 %v1) {
Expand All @@ -16,9 +18,11 @@ entry:

; CHECK-LABEL: _test2
; CHECK: movi.16b v0, #63
; CHECK: str q0, [sp, #-16]!
; CHECK: mov x8, sp
; CHECK: ldr s0, [x8, w1, sxtw #2]
; CHECK-DAG: and [[MASK_IDX:x[0-9]+]], x1, #0x3
; CHECK-DAG: str q0, [sp], #16
; CHECK-DAG: mov x9, sp
; CHECK-DAG: bfi [[PTR:x[0-9]+]], [[MASK_IDX]], #2, #2
; CHECK: ldr s0, {{\[}}[[PTR]]{{\]}}
; CHECK: str s0, [x0]

define void @test2(float * %p1, i32 %v1) {
Expand Down
8 changes: 6 additions & 2 deletions test/CodeGen/AMDGPU/insert_vector_elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,15 @@ define void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16>
; GCN: buffer_load_ushort v{{[0-9]+}}, off
; GCN: buffer_load_ushort v{{[0-9]+}}, off

; GCN-DAG: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
; GCN-DAG: s_and_b32 [[MASK_IDX:s[0-9]+]], s{{[0-9]+}}, 3{{$}}
; GCN-DAG: v_or_b32_e32 [[IDX:v[0-9]+]], [[MASK_IDX]], [[BASE_FI]]{{$}}

; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}{{$}}
; GCN: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
; GCN: buffer_store_short v{{[0-9]+}}, [[IDX]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}

; GCN: s_waitcnt

Expand Down
7 changes: 5 additions & 2 deletions test/CodeGen/AMDGPU/local-stack-slot-bug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
;
; CHECK-LABEL: {{^}}main:

; CHECK-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x200
; CHECK-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
; CHECK-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]

; TODO: add 0?
; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
; CHECK-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]]
; CHECK-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]]

; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
Expand Down
4 changes: 2 additions & 2 deletions test/CodeGen/ARM/fp16-promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
; CHECK-ALL: strh
; CHECK-ALL: mov
; CHECK-ALL-DAG: ldrh
; CHECK-ALL-DAG: add
; CHECK-ALL-DAG: orr
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
Expand Down Expand Up @@ -855,7 +855,7 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
; CHECK-VFP: orr
; CHECK-VFP: str
; CHECK-VFP: mov
; CHECK-VFP: add
; CHECK-VFP: orr
; CHECK-VFP: ldrh
; CHECK-VFP: strh
; CHECK-VFP: add sp, sp, #8
Expand Down
3 changes: 2 additions & 1 deletion test/CodeGen/ARM/vdup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,8 @@ define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) {
; CHECK: mov r[[FP:[0-9]+]], sp
; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4]
; CHECK: mov r[[SPCOPY:[0-9]+]], sp
; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]]
; CHECK: and r[[MASKED_IDX:[0-9]+]], r[[IDX]], #15
; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[MASKED_IDX]]
; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]]
%x = extractelement <16 x i8> %v, i32 %idx
%1 = insertelement <8 x i8> undef, i8 %x, i32 0
Expand Down
3 changes: 2 additions & 1 deletion test/CodeGen/Mips/llvm-ir/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ define i1 @via_stack_bug(i8 signext %idx) {
; ALL-DAG: addiu [[ONE:\$[0-9]+]], $zero, 1
; ALL-DAG: sb [[ONE]], 7($sp)
; ALL-DAG: sb $zero, 6($sp)
; ALL-DAG: andi [[MASKED_IDX:\$[0-9]+]], $4, 1
; ALL-DAG: addiu [[VPTR:\$[0-9]+]], $sp, 6
; ALL-DAG: addu [[EPTR:\$[0-9]+]], $4, [[VPTR]]
; ALL-DAG: or [[EPTR:\$[0-9]+]], [[MASKED_IDX]], [[VPTR]]
; ALL: lbu $2, 0([[EPTR]])
6 changes: 3 additions & 3 deletions test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ entry:
; CHECK: mfvsrd [[TOGPR:[0-9]+]],
; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
; CHECK: extsw 3, [[RSHREG]]
; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
; CHECK-P7-DAG: stxvw4x 34,
; CHECK-P7: lwax 3, [[ELEMOFFREG]],
; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2
Expand Down Expand Up @@ -52,7 +52,7 @@ entry:
; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
; CHECK: mfvsrd 3,
; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 3
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 3, 28, 28
; CHECK-P7-DAG: stxvd2x 34,
; CHECK-P7: ldx 3, [[ELEMOFFREG]],
; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
Expand All @@ -75,7 +75,7 @@ entry:
; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]]
; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
; CHECK: xscvspdpn 1,
; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
; CHECK-P7-DAG: rlwinm [[ELEMOFFREG:[0-9]+]], 5, 2, 28, 29
; CHECK-P7-DAG: stxvw4x 34,
; CHECK-P7: lfsx 1, [[ELEMOFFREG]],
; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2
Expand Down
Loading

0 comments on commit fa5aafa

Please sign in to comment.