Skip to content

Commit

Permalink
[X86] Avoid returning the same shuffle operation for broadcast (llvm#…
Browse files Browse the repository at this point in the history
…70592)

This is to fix a crash since aab8b2e, which generates a new pattern
```
      t35: v8i32 = xor t11, t14
    t36: v8i32 = vector_shuffle<0,1,0,1,0,1,0,1> t35, undef:v8i32
```

The pattern exposed a bug introduced since f885c08, which breaks
element widen but doesn't handle the broadcast case.

The patch just solved the crash issue. I observed performance regression
cased by above patches in the test, which may need further
investigation.
  • Loading branch information
phoebewang authored Oct 29, 2023
1 parent f8fe400 commit b5281af
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15293,6 +15293,12 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
for (int i = 0; i != NumElts; i += NumBroadcastElts)
for (int j = 0; j != NumBroadcastElts; ++j)
BroadcastMask[i + j] = j;

// Avoid returning the same shuffle operation. For example,
// v8i32 = vector_shuffle<0,1,0,1,0,1,0,1> t5, undef:v8i32
if (BroadcastMask == Mask)
return SDValue();

return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT),
BroadcastMask);
}
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,avx512vl | FileCheck %s

define i1 @test(ptr %q) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: vptest %ymm0, %ymm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
%0 = load i64, ptr %q, align 8
%add = add nsw i64 %0, 0
%add2 = add nsw i64 %add, 0
%add5 = add nsw i64 %add2, 0
%vecinit1.i.i68 = insertelement <2 x i64> poison, i64 %add5, i64 0
%add8 = add nsw i64 %add5, 0
%vecinit.i.i55 = insertelement <4 x i64> undef, i64 %add8, i64 0
%1 = bitcast <2 x i64> %vecinit1.i.i68 to <4 x i32>
%2 = shufflevector <4 x i32> %1, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
%3 = bitcast <4 x i64> %vecinit.i.i55 to <8 x i32>
%4 = shufflevector <8 x i32> %3, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
%5 = icmp ne <8 x i32> %2, %4
%6 = bitcast <8 x i1> %5 to i8
%7 = icmp eq i8 %6, 0
ret i1 %7
}

0 comments on commit b5281af

Please sign in to comment.