From 20732d55c2fda872a32b2335febafa7327f8b066 Mon Sep 17 00:00:00 2001
From: Juergen Ributzka <juergen@apple.com>
Date: Mon, 23 Jun 2014 21:55:44 +0000
Subject: [PATCH] [FastISel][X86] Lower unsupported selects to control-flow.

The extends the select lowering coverage by emiting pseudo cmov
instructions. These insturction will be later on lowered to control-flow to
simulate the select.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211545 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86FastISel.cpp                |  71 +++++++++
 .../X86/fast-isel-select-pseudo-cmov.ll       | 138 ++++++++++++++++++
 2 files changed, 209 insertions(+)
 create mode 100644 test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index af9eaf32a332..6625a706d240 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -115,6 +115,8 @@ class X86FastISel final : public FastISel {
 
   bool X86FastEmitSSESelect(const Instruction *I);
 
+  bool X86FastEmitPseudoSelect(const Instruction *I);
+
   bool X86SelectSelect(const Instruction *I);
 
   bool X86SelectTrunc(const Instruction *I);
@@ -1852,6 +1854,70 @@ bool X86FastISel::X86FastEmitSSESelect(const Instruction *I) {
   return true;
 }
 
+bool X86FastISel::X86FastEmitPseudoSelect(const Instruction *I) {
+  MVT RetVT;
+  if (!isTypeLegal(I->getType(), RetVT))
+    return false;
+
+  // These are pseudo CMOV instructions and will be later expanded into control-
+  // flow.
+  unsigned Opc;
+  switch (RetVT.SimpleTy) {
+  default: return false;
+  case MVT::i8:  Opc = X86::CMOV_GR8;  break;
+  case MVT::i16: Opc = X86::CMOV_GR16; break;
+  case MVT::i32: Opc = X86::CMOV_GR32; break;
+  case MVT::f32: Opc = X86::CMOV_FR32; break;
+  case MVT::f64: Opc = X86::CMOV_FR64; break;
+  }
+
+  const Value *Cond = I->getOperand(0);
+  X86::CondCode CC = X86::COND_NE;
+  // Don't emit a test if the condition comes from a compare.
+  if (const auto *CI = dyn_cast<CmpInst>(Cond)) {
+    bool NeedSwap;
+    std::tie(CC, NeedSwap) = getX86ConditonCode(CI->getPredicate());
+    if (CC > X86::LAST_VALID_COND)
+      return false;
+
+    const Value *CmpLHS = CI->getOperand(0);
+    const Value *CmpRHS = CI->getOperand(1);
+
+    if (NeedSwap)
+      std::swap(CmpLHS, CmpRHS);
+
+    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+    if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+      return false;
+  } else {
+    unsigned CondReg = getRegForValue(Cond);
+    if (CondReg == 0)
+      return false;
+    bool CondIsKill = hasTrivialKill(Cond);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+  }
+
+  const Value *LHS = I->getOperand(1);
+  const Value *RHS = I->getOperand(2);
+
+  unsigned LHSReg = getRegForValue(LHS);
+  bool LHSIsKill = hasTrivialKill(LHS);
+
+  unsigned RHSReg = getRegForValue(RHS);
+  bool RHSIsKill = hasTrivialKill(RHS);
+
+  if (!LHSReg || !RHSReg)
+    return false;
+
+  const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+
+  unsigned ResultReg =
+    FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
 bool X86FastISel::X86SelectSelect(const Instruction *I) {
   MVT RetVT;
   if (!isTypeLegal(I->getType(), RetVT))
@@ -1890,6 +1956,11 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
   if (X86FastEmitSSESelect(I))
     return true;
 
+  // Fall-back to pseudo conditional move instructions, which will be later
+  // converted to control-flow.
+  if (X86FastEmitPseudoSelect(I))
+    return true;
+
   return false;
 }
 
diff --git a/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll b/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
new file mode 100644
index 000000000000..1ec4d64fe209
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                                              | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort                  | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10                             -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s
+
+
+define float @select_fcmp_one_f32(float %a, float %b, float %c, float %d) {
+; CHECK-LABEL: select_fcmp_one_f32
+; CHECK:       ucomiss %xmm1, %xmm0
+; CHECK-NEXT:  jne [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  movaps %xmm2, %xmm0
+  %1 = fcmp one float %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define double @select_fcmp_one_f64(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: select_fcmp_one_f64
+; CHECK:       ucomisd %xmm1, %xmm0
+; CHECK-NEXT:  jne [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  movaps  %xmm2, %xmm0
+  %1 = fcmp one double %a, %b
+  %2 = select i1 %1, double %c, double %d
+  ret double %2
+}
+
+define float @select_icmp_eq_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_eq_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  je [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp eq i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ne_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ne_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jne [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ne i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ugt_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ugt_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  ja [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ugt i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_uge_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_uge_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jae [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp uge i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ult_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ult_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jb [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ult i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_ule_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_ule_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jbe [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp ule i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_sgt_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_sgt_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jg [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp sgt i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_sge_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_sge_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jge [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp sge i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_slt_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_slt_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jl [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp slt i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+
+define float @select_icmp_sle_f32(i64 %a, i64 %b, float %c, float %d) {
+; CHECK-LABEL: select_icmp_sle_f32
+; CHECK:       cmpq %rsi, %rdi
+; CHECK-NEXT:  jle [[BB:LBB[0-9]+_2]]
+; CHECK:       [[BB]]
+; CHECK-NEXT:  retq
+  %1 = icmp sle i64 %a, %b
+  %2 = select i1 %1, float %c, float %d
+  ret float %2
+}
+