[InstCombine] fold cmp+select using select operand equivalence

As discussed in PR42696: https://bugs.llvm.org/show_bug.cgi?id=42696 ...but won't help that case yet. We have an odd situation where a select operand equivalence fold was implemented in InstSimplify when it could have been done more generally in InstCombine if we allow dropping of {nsw,nuw,exact} from a binop operand. Here's an example: https://rise4fun.com/Alive/Xplr %cmp = icmp eq i32 %x, 2147483647 %add = add nsw i32 %x, 1 %sel = select i1 %cmp, i32 -2147483648, i32 %add => %sel = add i32 %x, 1 I've left the InstSimplify code in place for now, but my guess is that we'd prefer to remove that as a follow-up to save on code duplication and compile-time. Differential Revision: https://reviews.llvm.org/D65576 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367695 91177308-0d34-0410-b5e6-96231b3b80d8
earl · Aug 2, 2019 · ef8f60b · ef8f60b
1 parent 7596206
commit ef8f60b
Show file tree

Hide file tree

Showing 5 changed files with 253 additions and 336 deletions.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
@@ -3529,6 +3529,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
     //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
     //
     // We can't replace %sel with %add unless we strip away the flags.
+    // TODO: This is an unusual limitation because better analysis results in
+    //       worse simplification. InstCombine can do this fold more generally
+    //       by dropping the flags. Remove this fold to save compile-time?
     if (isa<OverflowingBinaryOperator>(B))
       if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
         return nullptr;

diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1060,11 +1060,69 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
   return &Sel;
 }
 
+static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *ReplaceOp,
+                                     const SimplifyQuery &Q) {
+  // If this is a binary operator, try to simplify it with the replaced op
+  // because we know Op and ReplaceOp are equivalant.
+  // For example: V = X + 1, Op = X, ReplaceOp = 42
+  // Simplifies as: add(42, 1) --> 43
+  if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+    if (BO->getOperand(0) == Op)
+      return SimplifyBinOp(BO->getOpcode(), ReplaceOp, BO->getOperand(1), Q);
+    if (BO->getOperand(1) == Op)
+      return SimplifyBinOp(BO->getOpcode(), BO->getOperand(0), ReplaceOp, Q);
+  }
+
+  return nullptr;
+}
+
+/// If we have a select with an equality comparison, then we know the value in
+/// one of the arms of the select. See if substituting this value into an arm
+/// and simplifying the result yields the same value as the other arm.
+///
+/// To make this transform safe, we must drop poison-generating flags
+/// (nsw, etc) if we simplified to a binop because the select may be guarding
+/// that poison from propagating. If the existing binop already had no
+/// poison-generating flags, then this transform can be done by instsimplify.
+///
+/// Consider:
+///   %cmp = icmp eq i32 %x, 2147483647
+///   %add = add nsw i32 %x, 1
+///   %sel = select i1 %cmp, i32 -2147483648, i32 %add
+///
+/// We can't replace %sel with %add unless we strip away the flags.
+static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
+                                         const SimplifyQuery &Q) {
+  if (!Cmp.isEquality())
+    return nullptr;
+
+  // Canonicalize the pattern to ICMP_EQ by swapping the select operands.
+  Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
+  if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
+    std::swap(TrueVal, FalseVal);
+
+  // Try each equivalence substitution possibility.
+  // We have an 'EQ' comparison, so the select's false value will propagate.
+  // Example:
+  // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
+  // (X == 42) ? (X + 1) : 43 --> (X == 42) ? (42 + 1) : 43 --> 43
+  Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
+  if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q) == TrueVal ||
+      simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q) == TrueVal ||
+      simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q) == FalseVal ||
+      simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q) == FalseVal) {
+    if (auto *FalseInst = dyn_cast<Instruction>(FalseVal))
+      FalseInst->dropPoisonGeneratingFlags();
+    return FalseVal;
+  }
+  return nullptr;
+}
+
 /// Visit a SelectInst that has an ICmpInst as its first operand.
 Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
                                                   ICmpInst *ICI) {
-  Value *TrueVal = SI.getTrueValue();
-  Value *FalseVal = SI.getFalseValue();
+  if (Value *V = foldSelectValueEquivalence(SI, *ICI, SQ))
+    return replaceInstUsesWith(SI, V);
 
   if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
     return NewSel;
@@ -1078,6 +1136,8 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
     return replaceInstUsesWith(SI, V);
 
   // NOTE: if we wanted to, this is where to detect integer MIN/MAX
+  Value *TrueVal = SI.getTrueValue();
+  Value *FalseVal = SI.getFalseValue();
   ICmpInst::Predicate Pred = ICI->getPredicate();
   Value *CmpLHS = ICI->getOperand(0);
   Value *CmpRHS = ICI->getOperand(1);

diff --git a/test/Transforms/InstCombine/select-bitext-bitwise-ops.ll b/test/Transforms/InstCombine/select-bitext-bitwise-ops.ll
@@ -3,13 +3,11 @@
 
 define i64 @sel_false_val_is_a_masked_shl_of_true_val1(i32 %x, i64 %y) {
 ; CHECK-LABEL: @sel_false_val_is_a_masked_shl_of_true_val1(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 15
-; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
-; CHECK-NEXT:    ret i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 60
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
   %1 = and i32 %x, 15
   %2 = shl nuw nsw i32 %1, 2
@@ -39,13 +37,11 @@ define i64 @sel_false_val_is_a_masked_shl_of_true_val2(i32 %x, i64 %y) {
 
 define i64 @sel_false_val_is_a_masked_lshr_of_true_val1(i32 %x, i64 %y) {
 ; CHECK-LABEL: @sel_false_val_is_a_masked_lshr_of_true_val1(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 60
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
-; CHECK-NEXT:    ret i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
   %1 = and i32 %x, 60
   %2 = lshr i32 %1, 2
@@ -75,13 +71,11 @@ define i64 @sel_false_val_is_a_masked_lshr_of_true_val2(i32 %x, i64 %y) {
 
 define i64 @sel_false_val_is_a_masked_ashr_of_true_val1(i32 %x, i64 %y) {
 ; CHECK-LABEL: @sel_false_val_is_a_masked_ashr_of_true_val1(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2147483588
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
-; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[NARROW]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = ashr i64 [[Y:%.*]], [[TMP4]]
-; CHECK-NEXT:    ret i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], -536870897
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr i64 [[Y:%.*]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
   %1 = and i32 %x, -2147483588
   %2 = ashr i32 %1, 2