From 1b59f4c380659e330ff3c03ddcfd0d6547dfd43a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 20 Feb 2019 16:42:52 +0000 Subject: [PATCH] GlobalISel: Fix fewerElementsVector for ctlz with different result type Also complete the set of related operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 6 +- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 4 +- .../GlobalISel/legalize-ctlz-zero-undef.mir | 64 +++++++++++++++++++ .../AMDGPU/GlobalISel/legalize-ctlz.mir | 64 +++++++++++++++++++ .../AMDGPU/GlobalISel/legalize-ctpop.mir | 60 +++++++++++++++++ .../GlobalISel/legalize-cttz-zero-undef.mir | 60 +++++++++++++++++ .../AMDGPU/GlobalISel/legalize-cttz.mir | 64 +++++++++++++++++++ 7 files changed, 319 insertions(+), 3 deletions(-) diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 82ba0b8d09c9..7abce5a3fa89 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2129,11 +2129,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FSIN: case G_FSQRT: case G_BSWAP: - case G_CTLZ: return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: case G_ASHR: + case G_CTLZ: + case G_CTLZ_ZERO_UNDEF: + case G_CTTZ: + case G_CTTZ_ZERO_UNDEF: + case G_CTPOP: return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 300f5c18870c..0d0496eaa035 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -306,8 +306,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, G_CTPOP}) .legalFor({{S32, S32}, {S32, S64}}) .clampScalar(0, S32, S32) - .clampScalar(1, S32, S64); - // TODO: Scalarize + .clampScalar(1, S32, S64) + .scalarize(0); // TODO: Expand for > s32 getActionDefinitionsBuilder(G_BSWAP) diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir index aa01879f7bf6..ff93788bc86b 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -91,3 +91,67 @@ body: | %3:_(s32) = G_ZEXT %2 $vgpr0 = COPY %3 ... + +--- +name: ctlz_zero_undef_v2s32_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctlz_zero_undef_v2s32_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32) + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](s32), [[CTLZ_ZERO_UNDEF1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_CTLZ_ZERO_UNDEF %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctlz_zero_undef_v2s32_v2s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: ctlz_zero_undef_v2s32_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64) + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ_ZERO_UNDEF]](s32), [[CTLZ_ZERO_UNDEF1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_CTLZ_ZERO_UNDEF %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctlz_zero_undef_v2s16_v2s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_zero_undef_v2s16_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[ZEXT]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[ZEXT1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_CTLZ_ZERO_UNDEF %0 + $vgpr0 = COPY %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir index acdfc3abd575..5cbdea9d83df 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -91,3 +91,67 @@ body: | %3:_(s32) = G_ZEXT %2 $vgpr0 = COPY %3 ... + +--- +name: ctlz_v2s32_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctlz_v2s32_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[UV]](s32) + ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ]](s32), [[CTLZ1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_CTLZ %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctlz_v2s32_v2s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: ctlz_v2s32_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[UV]](s64) + ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ]](s32), [[CTLZ1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_CTLZ %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctlz_v2s16_v2s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctlz_v2s16_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[ZEXT]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[ZEXT1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_CTLZ %0 + $vgpr0 = COPY %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir index d0a272ea2339..ebb59c1bd0d9 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir @@ -89,3 +89,63 @@ body: | %3:_(s32) = G_ZEXT %2 $vgpr0 = COPY %3 ... + +--- +name: ctpop_v2s32_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctpop_v2s32_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[UV]](s32) + ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTPOP]](s32), [[CTPOP1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_CTPOP %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctpop_v2s32_v2s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: ctpop_v2s32_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[UV]](s64) + ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTPOP]](s32), [[CTPOP1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_CTPOP %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: ctpop_v2s16_v2s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ctpop_v2s16_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[CTPOP1:%[0-9]+]]:_(s32) = G_CTPOP [[ZEXT1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTPOP1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_CTPOP %0 + $vgpr0 = COPY %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir index cb474c59ee10..ba3516be3841 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz-zero-undef.mir @@ -89,3 +89,63 @@ body: | %3:_(s32) = G_ZEXT %2 $vgpr0 = COPY %3 ... + +--- +name: cttz_zero_undef_v2s32_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: cttz_zero_undef_v2s32_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](s32), [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_CTTZ_ZERO_UNDEF %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: cttz_zero_undef_v2s32_v2s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: cttz_zero_undef_v2s32_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s64) + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ_ZERO_UNDEF]](s32), [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_CTTZ_ZERO_UNDEF %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: cttz_zero_undef_v2s16_v2s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_zero_undef_v2s16_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[ZEXT1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_CTTZ_ZERO_UNDEF %0 + $vgpr0 = COPY %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir index 66e5951198f8..a73f5afcd3da 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -91,3 +91,67 @@ body: | %3:_(s32) = G_ZEXT %2 $vgpr0 = COPY %3 ... + +--- +name: cttz_v2s32_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: cttz_v2s32_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV]](s32) + ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ]](s32), [[CTTZ1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_CTTZ %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: cttz_v2s32_v2s64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: cttz_v2s32_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV]](s64) + ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ]](s32), [[CTTZ1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x s32>) = G_CTTZ %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: cttz_v2s16_v2s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: cttz_v2s16_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[C]] + ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[C1]] + ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[OR1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_CTTZ %0 + $vgpr0 = COPY %1 +...