Skip to content

Commit

Permalink
[X86][InstCombine] Teach SimplifyDemandedVectorElts to handle scalar …
Browse files Browse the repository at this point in the history
…min/max/cmp intrinsics more correctly.

Now we only pass bit 0 of the DemandedElts to optimize operand 1 as we recurse since the upper bits are unused.

Also calculate UndefElts correctly.

Simplify InstCombineCalls for these instrinics to just call SimplifyDemandedVectorElts for the call instrution to reuse this support.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289628 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
topperc committed Dec 14, 2016
1 parent 5e5a55a commit 52ed606
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 20 deletions.
23 changes: 6 additions & 17 deletions lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1760,23 +1760,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}

case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
case Intrinsic::x86_sse_cmp_ss:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
case Intrinsic::x86_sse2_cmp_sd: {
// These intrinsics only demand the lowest element of the second input
// vector.
Value *Arg1 = II->getArgOperand(1);
unsigned VWidth = Arg1->getType()->getVectorNumElements();
if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
II->setArgOperand(1, V);
return II;
}
break;
}

case Intrinsic::x86_fma_vfmadd_ss:
case Intrinsic::x86_fma_vfmsub_ss:
case Intrinsic::x86_fma_vfnmadd_ss:
Expand Down Expand Up @@ -1837,6 +1820,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}

case Intrinsic::x86_sse_cmp_ss:
case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
case Intrinsic::x86_sse2_cmp_sd:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd: {
unsigned VWidth = II->getType()->getVectorNumElements();
Expand Down
31 changes: 28 additions & 3 deletions lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1290,14 +1290,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// checks).
break;

// Binary scalar-as-vector operations that work column-wise. A dest element
// is a function of the corresponding input elements from the two inputs.
// Binary scalar-as-vector operations that work column-wise. The high
// elements come from operand 0. The low element is a function of both
// operands.
case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
case Intrinsic::x86_sse_cmp_ss:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
case Intrinsic::x86_sse2_cmp_sd:
case Intrinsic::x86_sse2_cmp_sd: {
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }

// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0])
return II->getArgOperand(0);

// Only lower element is used for operand 1.
DemandedElts = 1;
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
UndefElts2, Depth + 1);
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }

// Lower element is undefined if both lower elements are undefined.
// Consider things like undef&0. The result is known zero, not undef.
if (!UndefElts2[0])
UndefElts.clearBit(0);

break;
}

// Binary scalar-as-vector operations that work column-wise. A dest element
// is a function of the corresponding input elements from the two inputs.
case Intrinsic::x86_sse41_round_ss:
case Intrinsic::x86_sse41_round_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
Expand Down

0 comments on commit 52ed606

Please sign in to comment.