Skip to content

Commit

Permalink
R600: Compute masked bits for min and max
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205242 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Mar 31, 2014
1 parent c298307 commit 193c3e9
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 0 deletions.
44 changes: 44 additions & 0 deletions lib/Target/R600/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,11 +1219,55 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}

static void computeMaskedBitsForMinMax(const SDValue Op0,
const SDValue Op1,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) {
APInt Op0Zero, Op0One;
APInt Op1Zero, Op1One;
DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth);
DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth);

KnownZero = Op0Zero & Op1Zero;
KnownOne = Op0One & Op1One;
}

void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {

KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
unsigned Opc = Op.getOpcode();
switch (Opc) {
case ISD::INTRINSIC_WO_CHAIN: {
// FIXME: The intrinsic should just use the node.
switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
case AMDGPUIntrinsic::AMDGPU_imax:
case AMDGPUIntrinsic::AMDGPU_umax:
case AMDGPUIntrinsic::AMDGPU_imin:
case AMDGPUIntrinsic::AMDGPU_umin:
computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
KnownZero, KnownOne, DAG, Depth);
break;
default:
break;
}

break;
}
case AMDGPUISD::SMAX:
case AMDGPUISD::UMAX:
case AMDGPUISD::SMIN:
case AMDGPUISD::UMIN:
computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
KnownZero, KnownOne, DAG, Depth);
break;
default:
break;
}
}
15 changes: 15 additions & 0 deletions test/CodeGen/R600/llvm.AMDGPU.umax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@ entry:
ret void
}

; SI-LABEL: @trunc_zext_umax
; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
; SI-NOT: AND
; SI: BUFFER_STORE_SHORT [[RESULT]],
define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
%tmp6 = zext i8 %tmp4 to i16
store i16 %tmp6, i16 addrspace(1)* %out, align 2
ret void
}

; Function Attrs: readnone
declare i32 @llvm.AMDGPU.umax(i32, i32) #1

Expand Down
15 changes: 15 additions & 0 deletions test/CodeGen/R600/llvm.AMDGPU.umin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@ entry:
ret void
}

; SI-LABEL: @trunc_zext_umin
; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
; SI-NOT: AND
; SI: BUFFER_STORE_SHORT [[RESULT]],
define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
%tmp6 = zext i8 %tmp4 to i16
store i16 %tmp6, i16 addrspace(1)* %out, align 2
ret void
}

; Function Attrs: readnone
declare i32 @llvm.AMDGPU.umin(i32, i32) #1

Expand Down

0 comments on commit 193c3e9

Please sign in to comment.