Skip to content

Commit

Permalink
GlobalISel: Implement moreElementsVector for select
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354354 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Feb 19, 2019
1 parent 6583403 commit 379689c
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 43 deletions.
12 changes: 12 additions & 0 deletions lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2405,6 +2405,18 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorSrc(MI, MoreTy, 1);
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT:
if (TypeIdx != 0)
return UnableToLegalize;
if (MRI.getType(MI.getOperand(1).getReg()).isVector())
return UnableToLegalize;

Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 2);
moreElementsVectorSrc(MI, MoreTy, 3);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
default:
return UnableToLegalize;
}
Expand Down
27 changes: 9 additions & 18 deletions lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
};
}

static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
};
}

AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM) {
Expand Down Expand Up @@ -453,28 +459,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
.clampScalar(0, S32, S64)
.fewerElementsIf(
[=](const LegalityQuery &Query) {
if (Query.Types[1].isVector())
return true;

LLT Ty = Query.Types[0];

// FIXME: Hack until odd splits handled
return Ty.isVector() &&
(Ty.getScalarSizeInBits() > 32 || Ty.getNumElements() % 2 != 0);
},
scalarize(0))
// FIXME: Handle 16-bit vectors better
.fewerElementsIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].isVector() &&
Query.Types[0].getElementType().getSizeInBits() < 32;},
scalarize(0))
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.fewerElementsIf(numElementsNotEven(0), scalarize(0))
.scalarize(1)
.clampMaxNumElements(0, S32, 2)
.clampMaxNumElements(0, LocalPtr, 2)
.clampMaxNumElements(0, PrivatePtr, 2)
.scalarize(0)
.legalIf(all(isPointer(0), typeIs(1, S1)));

// TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
Expand Down
50 changes: 25 additions & 25 deletions test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -248,23 +248,32 @@ body: |
; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
; CHECK: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>)
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<3 x s8>)
; CHECK: [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<3 x s8>)
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0
; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[TRUNC1]](<3 x s8>), 0
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>)
; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]]
; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s32)
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]]
; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s32)
; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8)
; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8)
; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]]
; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8)
; CHECK: [[ANYEXT6:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<3 x s8>)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT6]](<3 x s32>)
; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8)
; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]]
; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8)
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0
; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CONSTANT i32 0
%2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3
Expand Down Expand Up @@ -365,24 +374,15 @@ body: |
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0
; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY2]](<4 x s16>), 0
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]]
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT]](s32)
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]]
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32)
; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16)
; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]]
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT2]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s16>), 0
; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0
; CHECK: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]]
; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[SELECT]](<4 x s16>), 0
; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0
; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
%0:_(s32) = COPY $vgpr0
%1:_(<4 x s16>) = COPY $vgpr1_vgpr2
%2:_(<4 x s16>) = COPY $vgpr3_vgpr4
Expand Down

0 comments on commit 379689c

Please sign in to comment.