Skip to content

Commit

Permalink
AMDGPU: Add Assert[SZ]Ext during argument load creation
Browse files Browse the repository at this point in the history
For i16 zeroext arguments when i16 was a legal type, the
known bits information from the truncate was lost. Insert
a zeroext so the known bits optimizations work with the 32-bit
loads.

Fixes code quality regressions vs. SI in min.ll test.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291461 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Jan 9, 2017
1 parent 155581a commit f7c0d40
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 88 deletions.
27 changes: 15 additions & 12 deletions lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,8 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,

SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
unsigned Offset, bool Signed) const {
unsigned Offset, bool Signed,
const ISD::InputArg *Arg) const {
const DataLayout &DL = DAG.getDataLayout();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
Expand All @@ -725,20 +726,21 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);

SDValue Val;
SDValue Val = Load;
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
VT.bitsLT(MemVT)) {
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
}

if (MemVT.isFloatingPoint())
Val = getFPExtOrFPTrunc(DAG, Load, SL, VT);
Val = getFPExtOrFPTrunc(DAG, Val, SL, VT);
else if (Signed)
Val = DAG.getSExtOrTrunc(Load, SL, VT);
Val = DAG.getSExtOrTrunc(Val, SL, VT);
else
Val = DAG.getZExtOrTrunc(Load, SL, VT);

SDValue Ops[] = {
Val,
Load.getValue(1)
};
Val = DAG.getZExtOrTrunc(Val, SL, VT);

return DAG.getMergeValues(Ops, SL);
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
}

SDValue SITargetLowering::LowerFormalArguments(
Expand Down Expand Up @@ -911,7 +913,8 @@ SDValue SITargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
Offset, Ins[i].Flags.isSExt());
Offset, Ins[i].Flags.isSExt(),
&Ins[i]);
Chains.push_back(Arg.getValue(1));

auto *ParamTy =
Expand Down
3 changes: 2 additions & 1 deletion lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
unsigned Offset) const;
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
SDValue Chain, unsigned Offset, bool Signed) const;
SDValue Chain, unsigned Offset, bool Signed,
const ISD::InputArg *Arg = nullptr) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
Expand Down
Loading

0 comments on commit f7c0d40

Please sign in to comment.