Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Consider only legally typed splats to be legal shuffles #123415

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

preames
Copy link
Collaborator

@preames preames commented Jan 17, 2025

Given the comment, I'd expected test coverage. There was none so let's do the simple thing which benefits the one thing we have tests for.

Given the comment, I'd expected test coverage.  There was none so
let's do the simple thing which benefits the one thing we have
tests for.
@llvmbot
Copy link
Member

llvmbot commented Jan 17, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

Given the comment, I'd expected test coverage. There was none so let's do the simple thing which benefits the one thing we have tests for.


Full diff: https://github.com/llvm/llvm-project/pull/123415.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4-4)
  • (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll (+33-79)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index de100c683a94ff..bf3eda8ce8f8f1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5729,14 +5729,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
 }
 
 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
-  // Support splats for any type. These should type legalize well.
-  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
-    return true;
-
   // Only support legal VTs for other shuffles for now.
   if (!isTypeLegal(VT))
     return false;
 
+  // Support splats for any type. These should type legalize well.
+  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
+    return true;
+
   MVT SVT = VT.getSimpleVT();
 
   // Not for i1 vectors.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index df1c803ca8850a..8b26c58d5bee19 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -1287,37 +1287,17 @@ define void @shuffle_i64_splat(ptr %p) nounwind {
 }
 
 define void @shuffle_i128_splat(ptr %p) nounwind {
-; RV32-LABEL: shuffle_i128_splat:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lw a1, 0(a0)
-; RV32-NEXT:    lw a2, 4(a0)
-; RV32-NEXT:    lw a3, 8(a0)
-; RV32-NEXT:    lw a4, 12(a0)
-; RV32-NEXT:    sw a1, 48(a0)
-; RV32-NEXT:    sw a2, 52(a0)
-; RV32-NEXT:    sw a3, 56(a0)
-; RV32-NEXT:    sw a4, 60(a0)
-; RV32-NEXT:    sw a1, 16(a0)
-; RV32-NEXT:    sw a2, 20(a0)
-; RV32-NEXT:    sw a3, 24(a0)
-; RV32-NEXT:    sw a4, 28(a0)
-; RV32-NEXT:    sw a1, 32(a0)
-; RV32-NEXT:    sw a2, 36(a0)
-; RV32-NEXT:    sw a3, 40(a0)
-; RV32-NEXT:    sw a4, 44(a0)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: shuffle_i128_splat:
-; RV64:       # %bb.0:
-; RV64-NEXT:    ld a1, 0(a0)
-; RV64-NEXT:    ld a2, 8(a0)
-; RV64-NEXT:    sd a1, 48(a0)
-; RV64-NEXT:    sd a2, 56(a0)
-; RV64-NEXT:    sd a1, 16(a0)
-; RV64-NEXT:    sd a2, 24(a0)
-; RV64-NEXT:    sd a1, 32(a0)
-; RV64-NEXT:    sd a2, 40(a0)
-; RV64-NEXT:    ret
+; CHECK-LABEL: shuffle_i128_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    lui a1, 16
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v12, a1
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vrgatherei16.vv v16, v8, v12
+; CHECK-NEXT:    vse64.v v16, (a0)
+; CHECK-NEXT:    ret
   %a = load <4 x i128>, ptr %p
   %res = shufflevector <4 x i128> %a, <4 x i128> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   store <4 x i128> %res, ptr %p
@@ -1327,58 +1307,32 @@ define void @shuffle_i128_splat(ptr %p) nounwind {
 define void @shuffle_i256_splat(ptr %p) nounwind {
 ; RV32-LABEL: shuffle_i256_splat:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lw a1, 0(a0)
-; RV32-NEXT:    lw a2, 4(a0)
-; RV32-NEXT:    lw a3, 8(a0)
-; RV32-NEXT:    lw a4, 12(a0)
-; RV32-NEXT:    lw a5, 16(a0)
-; RV32-NEXT:    lw a6, 20(a0)
-; RV32-NEXT:    lw a7, 24(a0)
-; RV32-NEXT:    lw t0, 28(a0)
-; RV32-NEXT:    sw a5, 112(a0)
-; RV32-NEXT:    sw a6, 116(a0)
-; RV32-NEXT:    sw a7, 120(a0)
-; RV32-NEXT:    sw t0, 124(a0)
-; RV32-NEXT:    sw a1, 96(a0)
-; RV32-NEXT:    sw a2, 100(a0)
-; RV32-NEXT:    sw a3, 104(a0)
-; RV32-NEXT:    sw a4, 108(a0)
-; RV32-NEXT:    sw a5, 80(a0)
-; RV32-NEXT:    sw a6, 84(a0)
-; RV32-NEXT:    sw a7, 88(a0)
-; RV32-NEXT:    sw t0, 92(a0)
-; RV32-NEXT:    sw a1, 64(a0)
-; RV32-NEXT:    sw a2, 68(a0)
-; RV32-NEXT:    sw a3, 72(a0)
-; RV32-NEXT:    sw a4, 76(a0)
-; RV32-NEXT:    sw a5, 48(a0)
-; RV32-NEXT:    sw a6, 52(a0)
-; RV32-NEXT:    sw a7, 56(a0)
-; RV32-NEXT:    sw t0, 60(a0)
-; RV32-NEXT:    sw a1, 32(a0)
-; RV32-NEXT:    sw a2, 36(a0)
-; RV32-NEXT:    sw a3, 40(a0)
-; RV32-NEXT:    sw a4, 44(a0)
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    lui a1, 12320
+; RV32-NEXT:    addi a1, a1, 256
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a1
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV32-NEXT:    vsext.vf2 v18, v16
+; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT:    vrgatherei16.vv v24, v8, v18
+; RV32-NEXT:    vse64.v v24, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: shuffle_i256_splat:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    ld a1, 0(a0)
-; RV64-NEXT:    ld a2, 8(a0)
-; RV64-NEXT:    ld a3, 16(a0)
-; RV64-NEXT:    ld a4, 24(a0)
-; RV64-NEXT:    sd a1, 96(a0)
-; RV64-NEXT:    sd a2, 104(a0)
-; RV64-NEXT:    sd a3, 112(a0)
-; RV64-NEXT:    sd a4, 120(a0)
-; RV64-NEXT:    sd a1, 32(a0)
-; RV64-NEXT:    sd a2, 40(a0)
-; RV64-NEXT:    sd a3, 48(a0)
-; RV64-NEXT:    sd a4, 56(a0)
-; RV64-NEXT:    sd a1, 64(a0)
-; RV64-NEXT:    sd a2, 72(a0)
-; RV64-NEXT:    sd a3, 80(a0)
-; RV64-NEXT:    sd a4, 88(a0)
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    lui a1, 98305
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    addi a1, a1, 1
+; RV64-NEXT:    slli a1, a1, 16
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vrgatherei16.vv v24, v8, v16
+; RV64-NEXT:    vse64.v v24, (a0)
 ; RV64-NEXT:    ret
   %a = load <4 x i256>, ptr %p
   %res = shufflevector <4 x i256> %a, <4 x i256> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0>

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants