Skip to content

Commit

Permalink
[NVPTX] Add support for [SHL,SRA,SRL]_PARTS
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211936 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
jholewinski committed Jun 27, 2014
1 parent 10da165 commit 863b0d4
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 0 deletions.
139 changes: 139 additions & 0 deletions lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);

if (nvptxSubtarget.hasROT64()) {
setOperationAction(ISD::ROTL, MVT::i64, Legal);
setOperationAction(ISD::ROTR, MVT::i64, Legal);
Expand Down Expand Up @@ -345,6 +352,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::FUN_SHFL_CLAMP";
case NVPTXISD::FUN_SHFR_CLAMP:
return "NVPTXISD::FUN_SHFR_CLAMP";
case NVPTXISD::IMAD:
return "NVPTXISD::IMAD";
case NVPTXISD::MUL_WIDE_SIGNED:
return "NVPTXISD::MUL_WIDE_SIGNED";
case NVPTXISD::MUL_WIDE_UNSIGNED:
return "NVPTXISD::MUL_WIDE_UNSIGNED";
case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
case NVPTXISD::Tex1DFloatFloatLevel:
Expand Down Expand Up @@ -1279,6 +1292,127 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
}

/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
/// amount, or
/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
/// amount.
SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);

EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;

if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {

// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
// {dHi, dLo} = {aHi, aLo} >> Amt
// dHi = aHi >> Amt
// dLo = shf.r.clamp aLo, aHi, Amt

SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
ShAmt);

SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
else {

// {dHi, dLo} = {aHi, aLo} >> Amt
// - if (Amt>=size) then
// dLo = aHi >> (Amt-size)
// dHi = aHi >> Amt (this is either all 0 or all 1)
// else
// dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
// dHi = aHi >> Amt

SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(VTBits, MVT::i32), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
DAG.getConstant(VTBits, MVT::i32));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);

SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);

SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
}

/// LowerShiftLeftParts - Lower SHL_PARTS, which
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
/// amount, or
/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
/// amount.
SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
assert(Op.getOpcode() == ISD::SHL_PARTS);

EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);

if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {

// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
// {dHi, dLo} = {aHi, aLo} << Amt
// dHi = shf.l.clamp aLo, aHi, Amt
// dLo = aLo << Amt

SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
ShAmt);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);

SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
else {

// {dHi, dLo} = {aHi, aLo} << Amt
// - if (Amt>=size) then
// dLo = aLo << Amt (all 0)
// dLo = aLo << (Amt-size)
// else
// dLo = aLo << Amt
// dHi = (aHi << Amt) | (aLo >> (size-Amt))

SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(VTBits, MVT::i32), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
DAG.getConstant(VTBits, MVT::i32));
SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);

SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);

SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
}

SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
Expand All @@ -1299,6 +1433,11 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTORE(Op, DAG);
case ISD::LOAD:
return LowerLOAD(Op, DAG);
case ISD::SHL_PARTS:
return LowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
Expand Down
5 changes: 5 additions & 0 deletions lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ enum NodeType {
CallSeqBegin,
CallSeqEnd,
CallPrototype,
FUN_SHFL_CLAMP,
FUN_SHFR_CLAMP,
MUL_WIDE_SIGNED,
MUL_WIDE_UNSIGNED,
IMAD,
Expand Down Expand Up @@ -259,6 +261,9 @@ class NVPTXTargetLowering : public TargetLowering {
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;

void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
Expand Down
26 changes: 26 additions & 0 deletions lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,32 @@ def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
(ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
(ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;

//
// Funnnel shift in clamp mode
//
// - SDNodes are created so they can be used in the DAG code,
// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
//
def SDTIntShiftDOp: SDTypeProfile<1, 3,
[SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisInt<0>, SDTCisInt<3>]>;
def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;

def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
"shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
[(set Int32Regs:$dst,
(FUN_SHFL_CLAMP Int32Regs:$lo,
Int32Regs:$hi, Int32Regs:$amt))]>;

def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
"shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
[(set Int32Regs:$dst,
(FUN_SHFR_CLAMP Int32Regs:$lo,
Int32Regs:$hi, Int32Regs:$amt))]>;

//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
Expand Down
38 changes: 38 additions & 0 deletions test/CodeGen/NVPTX/shift-parts.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s

; CHECK: shift_parts_left_128
define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
; CHECK: shl.b64
; CHECK: mov.u32
; CHECK: sub.s32
; CHECK: shr.u64
; CHECK: or.b64
; CHECK: add.s32
; CHECK: shl.b64
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shl.b64
%amt = load i128* %amtptr
%a = load i128* %val
%val0 = shl i128 %a, %amt
store i128 %val0, i128* %val
ret void
}

; CHECK: shift_parts_right_128
define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
; CHECK: shr.u64
; CHECK: sub.s32
; CHECK: shl.b64
; CHECK: or.b64
; CHECK: add.s32
; CHECK: shr.s64
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shr.s64
%amt = load i128* %amtptr
%a = load i128* %val
%val0 = ashr i128 %a, %amt
store i128 %val0, i128* %val
ret void
}

0 comments on commit 863b0d4

Please sign in to comment.