diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dbbe8d9193c25c..cbba3a294b3d68 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5577,9 +5577,12 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return RMINMAX; // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. - // Only do this if the current op isn't legal and the flipped is. - if (!TLI.isOperationLegal(Opcode, VT) && - (N0.isUndef() || DAG.SignBitIsZero(N0)) && + // Only do this if: + // 1. The current op isn't legal and the flipped is. + // 2. The saturation pattern is broken by canonicalization in InstCombine. + bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT); + bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX; + if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) && (N1.isUndef() || DAG.SignBitIsZero(N1))) { unsigned AltOpcode; switch (Opcode) { @@ -5589,7 +5592,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { case ISD::UMAX: AltOpcode = ISD::SMAX; break; default: llvm_unreachable("Unknown MINMAX opcode"); } - if (TLI.isOperationLegal(AltOpcode, VT)) + if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT)) return DAG.getNode(AltOpcode, DL, VT, N0, N1); } diff --git a/llvm/test/CodeGen/AMDGPU/umed3.ll b/llvm/test/CodeGen/AMDGPU/umed3.ll index a2d99f1f8c2c26..557d023c45f9d5 100644 --- a/llvm/test/CodeGen/AMDGPU/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/umed3.ll @@ -43,8 +43,7 @@ define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(ptr addrspace(1) %ou } ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32: -; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} -; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} +; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17 define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll index 024a98dd29346b..d01aa1520b326b 100644 --- a/llvm/test/CodeGen/ARM/usat.ll +++ b/llvm/test/CodeGen/ARM/usat.ll @@ -756,7 +756,7 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) { ; V4T-NEXT: bic r1, r0, r0, asr #31 ; V4T-NEXT: ldr r0, .LCPI20_0 ; V4T-NEXT: cmp r1, r0 -; V4T-NEXT: movlo r0, r1 +; V4T-NEXT: movlt r0, r1 ; V4T-NEXT: bx lr ; V4T-NEXT: .p2align 2 ; V4T-NEXT: @ %bb.1: @@ -765,23 +765,12 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) { ; ; V6-LABEL: mm_unsigned_sat_upper_lower_1: ; V6: @ %bb.0: @ %entry -; V6-NEXT: bic r1, r0, r0, asr #31 -; V6-NEXT: ldr r0, .LCPI20_0 -; V6-NEXT: cmp r1, r0 -; V6-NEXT: movlo r0, r1 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI20_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_upper_lower_1: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: bic r1, r0, r0, asr #31 -; V6T2-NEXT: movw r0, #65535 -; V6T2-NEXT: movt r0, #127 -; V6T2-NEXT: cmp r1, r0 -; V6T2-NEXT: movlo r0, r1 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -795,7 +784,7 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) { ; V4T-NEXT: bic r1, r0, r0, asr #31 ; V4T-NEXT: ldr r0, .LCPI21_0 ; V4T-NEXT: cmp r1, r0 -; V4T-NEXT: movlo r0, r1 +; V4T-NEXT: movlt r0, r1 ; V4T-NEXT: bx lr ; V4T-NEXT: .p2align 2 ; V4T-NEXT: @ %bb.1: @@ -804,23 +793,12 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) { ; ; V6-LABEL: mm_unsigned_sat_upper_lower_2: ; V6: @ %bb.0: @ %entry -; V6-NEXT: bic r1, r0, r0, asr #31 -; V6-NEXT: ldr r0, .LCPI21_0 -; V6-NEXT: cmp r1, r0 -; V6-NEXT: movlo r0, r1 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI21_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_upper_lower_2: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: bic r1, r0, r0, asr #31 -; V6T2-NEXT: movw r0, #65535 -; V6T2-NEXT: movt r0, #127 -; V6T2-NEXT: cmp r1, r0 -; V6T2-NEXT: movlo r0, r1 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -834,7 +812,7 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) { ; V4T-NEXT: bic r1, r0, r0, asr #31 ; V4T-NEXT: ldr r0, .LCPI22_0 ; V4T-NEXT: cmp r1, r0 -; V4T-NEXT: movlo r0, r1 +; V4T-NEXT: movlt r0, r1 ; V4T-NEXT: bx lr ; V4T-NEXT: .p2align 2 ; V4T-NEXT: @ %bb.1: @@ -843,23 +821,12 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) { ; ; V6-LABEL: mm_unsigned_sat_upper_lower_3: ; V6: @ %bb.0: @ %entry -; V6-NEXT: bic r1, r0, r0, asr #31 -; V6-NEXT: ldr r0, .LCPI22_0 -; V6-NEXT: cmp r1, r0 -; V6-NEXT: movlo r0, r1 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI22_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_upper_lower_3: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: bic r1, r0, r0, asr #31 -; V6T2-NEXT: movw r0, #65535 -; V6T2-NEXT: movt r0, #127 -; V6T2-NEXT: cmp r1, r0 -; V6T2-NEXT: movlo r0, r1 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -913,7 +880,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) { ; V4T-NEXT: mov r0, #1 ; V4T-NEXT: orr r0, r0, #8388608 ; V4T-NEXT: cmp r1, #8388608 -; V4T-NEXT: movls r0, r1 +; V4T-NEXT: movle r0, r1 ; V4T-NEXT: bx lr ; ; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2: @@ -922,7 +889,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) { ; V6-NEXT: mov r0, #1 ; V6-NEXT: orr r0, r0, #8388608 ; V6-NEXT: cmp r1, #8388608 -; V6-NEXT: movls r0, r1 +; V6-NEXT: movle r0, r1 ; V6-NEXT: bx lr ; ; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2: @@ -931,7 +898,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) { ; V6T2-NEXT: movw r0, #1 ; V6T2-NEXT: movt r0, #128 ; V6T2-NEXT: cmp r1, #8388608 -; V6T2-NEXT: movls r0, r1 +; V6T2-NEXT: movle r0, r1 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -981,6 +948,29 @@ entry: ret i32 %1 } +define i32 @test_umin_smax_usat(i32 %x) { +; V4T-LABEL: test_umin_smax_usat: +; V4T: @ %bb.0: @ %entry +; V4T-NEXT: bic r0, r0, r0, asr #31 +; V4T-NEXT: cmp r0, #255 +; V4T-NEXT: movge r0, #255 +; V4T-NEXT: bx lr +; +; V6-LABEL: test_umin_smax_usat: +; V6: @ %bb.0: @ %entry +; V6-NEXT: usat r0, #8, r0 +; V6-NEXT: bx lr +; +; V6T2-LABEL: test_umin_smax_usat: +; V6T2: @ %bb.0: @ %entry +; V6T2-NEXT: usat r0, #8, r0 +; V6T2-NEXT: bx lr +entry: + %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0) + %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255) + ret i32 %v2 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i16 @llvm.smin.i16(i16, i16)