Skip to content

Commit

Permalink
[SDAG] Turn umin into smin if the saturation pattern is broken (llvm#…
Browse files Browse the repository at this point in the history
…88505)

As we canonicalizes smin with non-negative operands into umin in the
middle-end, the saturation pattern will be broken.
This patch reverts the transform in DAGCombine to fix the regression on
ARM.

Fixes llvm#85706.
  • Loading branch information
dtcxzyw authored Apr 15, 2024
1 parent 64dc558 commit 4d28d3f
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 51 deletions.
11 changes: 7 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5577,9 +5577,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return RMINMAX;

// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
if (!TLI.isOperationLegal(Opcode, VT) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
// Only do this if:
// 1. The current op isn't legal and the flipped is.
// 2. The saturation pattern is broken by canonicalization in InstCombine.
bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
unsigned AltOpcode;
switch (Opcode) {
Expand All @@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
case ISD::UMAX: AltOpcode = ISD::SMAX; break;
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}

Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/umed3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(ptr addrspace(1) %ou
}

; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32:
; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
Expand Down
80 changes: 35 additions & 45 deletions llvm/test/CodeGen/ARM/usat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI20_0
; V4T-NEXT: cmp r1, r0
; V4T-NEXT: movlo r0, r1
; V4T-NEXT: movlt r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
Expand All @@ -765,23 +765,12 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_upper_lower_1:
; V6: @ %bb.0: @ %entry
; V6-NEXT: bic r1, r0, r0, asr #31
; V6-NEXT: ldr r0, .LCPI20_0
; V6-NEXT: cmp r1, r0
; V6-NEXT: movlo r0, r1
; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI20_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_1:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: bic r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #65535
; V6T2-NEXT: movt r0, #127
; V6T2-NEXT: cmp r1, r0
; V6T2-NEXT: movlo r0, r1
; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
Expand All @@ -795,7 +784,7 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI21_0
; V4T-NEXT: cmp r1, r0
; V4T-NEXT: movlo r0, r1
; V4T-NEXT: movlt r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
Expand All @@ -804,23 +793,12 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_upper_lower_2:
; V6: @ %bb.0: @ %entry
; V6-NEXT: bic r1, r0, r0, asr #31
; V6-NEXT: ldr r0, .LCPI21_0
; V6-NEXT: cmp r1, r0
; V6-NEXT: movlo r0, r1
; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI21_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_2:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: bic r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #65535
; V6T2-NEXT: movt r0, #127
; V6T2-NEXT: cmp r1, r0
; V6T2-NEXT: movlo r0, r1
; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
Expand All @@ -834,7 +812,7 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI22_0
; V4T-NEXT: cmp r1, r0
; V4T-NEXT: movlo r0, r1
; V4T-NEXT: movlt r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
Expand All @@ -843,23 +821,12 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
;
; V6-LABEL: mm_unsigned_sat_upper_lower_3:
; V6: @ %bb.0: @ %entry
; V6-NEXT: bic r1, r0, r0, asr #31
; V6-NEXT: ldr r0, .LCPI22_0
; V6-NEXT: cmp r1, r0
; V6-NEXT: movlo r0, r1
; V6-NEXT: usat r0, #23, r0
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI22_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_3:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: bic r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #65535
; V6T2-NEXT: movt r0, #127
; V6T2-NEXT: cmp r1, r0
; V6T2-NEXT: movlo r0, r1
; V6T2-NEXT: usat r0, #23, r0
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
Expand Down Expand Up @@ -913,7 +880,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
; V4T-NEXT: mov r0, #1
; V4T-NEXT: orr r0, r0, #8388608
; V4T-NEXT: cmp r1, #8388608
; V4T-NEXT: movls r0, r1
; V4T-NEXT: movle r0, r1
; V4T-NEXT: bx lr
;
; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2:
Expand All @@ -922,7 +889,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
; V6-NEXT: mov r0, #1
; V6-NEXT: orr r0, r0, #8388608
; V6-NEXT: cmp r1, #8388608
; V6-NEXT: movls r0, r1
; V6-NEXT: movle r0, r1
; V6-NEXT: bx lr
;
; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2:
Expand All @@ -931,7 +898,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
; V6T2-NEXT: movw r0, #1
; V6T2-NEXT: movt r0, #128
; V6T2-NEXT: cmp r1, #8388608
; V6T2-NEXT: movls r0, r1
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
Expand Down Expand Up @@ -981,6 +948,29 @@ entry:
ret i32 %1
}

define i32 @test_umin_smax_usat(i32 %x) {
; V4T-LABEL: test_umin_smax_usat:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: bic r0, r0, r0, asr #31
; V4T-NEXT: cmp r0, #255
; V4T-NEXT: movge r0, #255
; V4T-NEXT: bx lr
;
; V6-LABEL: test_umin_smax_usat:
; V6: @ %bb.0: @ %entry
; V6-NEXT: usat r0, #8, r0
; V6-NEXT: bx lr
;
; V6T2-LABEL: test_umin_smax_usat:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: usat r0, #8, r0
; V6T2-NEXT: bx lr
entry:
%v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0)
%v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255)
ret i32 %v2
}

declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i16 @llvm.smin.i16(i16, i16)
Expand Down

0 comments on commit 4d28d3f

Please sign in to comment.