[SystemZ] Add some generic (floating point support) load instructions.

Add generic instructions for load complement, load negative and load positive for fp32 and fp64, and let isel prefer them. They do not clobber CC, and so give scheduler more freedom. SystemZElimCompare pass will convert them when it can to the CC-setting variants. Regression tests updated to expect the new opcodes in places where the old ones where used. New test case SystemZ/fp-cmp-05.ll checks that SystemZCompareElim.cpp can handle the new opcodes. README.txt updated (bullet removed). Note that fp128 is not yet handled, because it is relatively rare, and is a bit trickier, because of the fact that l.dfr would operate on the sign bit of one of the subregisters of a fp128, but we would not want to copy the other sub-reg in case src and dst regs are not the same. Reviewed by Ulrich Weigand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249046 91177308-0d34-0410-b5e6-96231b3b80d8
zoren · Oct 1, 2015 · 89b44df · 89b44df
1 parent 3fffb69
commit 89b44df
Show file tree

Hide file tree

Showing 12 changed files with 132 additions and 31 deletions.
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
@@ -52,12 +52,6 @@ We don't use the TEST DATA CLASS instructions.
 
 --
 
-We could use the generic floating-point forms of LOAD COMPLEMENT,
-LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the
-condition codes.  For example, we could use LCDFR instead of LCDBR.
-
---
-
 We only use MVC, XC and CLC for constant-length block operations.
 We could extend them to variable-length operations too,
 using EXECUTE RELATIVE LONG.

diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -238,26 +238,46 @@ let Predicates = [FeatureFPExtension] in {
 // Unary arithmetic
 //===----------------------------------------------------------------------===//
 
+// We prefer generic instructions during isel, because they do not
+// clobber CC and therefore give the scheduler more freedom. In cases
+// the CC is actually useful, the SystemZElimCompare pass will try to
+// convert generic instructions into opcodes that also set CC. Note
+// that lcdf / lpdf / lndf only affect the sign bit, and can therefore
+// be used with fp32 as well. This could be done for fp128, in which
+// case the operands would have to be tied.
+
 // Negation (Load Complement).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32,  FP32>;
-  def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64,  FP64>;
+  def LCEBR : UnaryRRE<"lceb", 0xB303, null_frag, FP32,  FP32>;
+  def LCDBR : UnaryRRE<"lcdb", 0xB313, null_frag, FP64,  FP64>;
   def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>;
 }
+// Generic form, which does not set CC.
+def LCDFR : UnaryRRE<"lcdf", 0xB373, fneg, FP64,  FP64>;
+let isCodeGenOnly = 1 in
+  def LCDFR_32 : UnaryRRE<"lcdf", 0xB373, fneg, FP32,  FP32>;
 
 // Absolute value (Load Positive).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32,  FP32>;
-  def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64,  FP64>;
+  def LPEBR : UnaryRRE<"lpeb", 0xB300, null_frag, FP32,  FP32>;
+  def LPDBR : UnaryRRE<"lpdb", 0xB310, null_frag, FP64,  FP64>;
   def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>;
 }
+// Generic form, which does not set CC.
+def LPDFR : UnaryRRE<"lpdf", 0xB370, fabs, FP64,  FP64>;
+let isCodeGenOnly = 1 in
+  def LPDFR_32 : UnaryRRE<"lpdf", 0xB370, fabs, FP32,  FP32>;
 
 // Negative absolute value (Load Negative).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32,  FP32>;
-  def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64,  FP64>;
+  def LNEBR : UnaryRRE<"lneb", 0xB301, null_frag, FP32,  FP32>;
+  def LNDBR : UnaryRRE<"lndb", 0xB311, null_frag, FP64,  FP64>;
   def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>;
 }
+// Generic form, which does not set CC.
+def LNDFR : UnaryRRE<"lndf", 0xB371, fnabs, FP64,  FP64>;
+let isCodeGenOnly = 1 in
+  def LNDFR_32 : UnaryRRE<"lndf", 0xB371, fnabs, FP32,  FP32>;
 
 // Square root.
 def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32,  FP32>;
@@ -414,6 +434,6 @@ let Defs = [CC], CCValues = 0xF in {
 // Peepholes
 //===----------------------------------------------------------------------===//
 
-def : Pat<(f32  fpimmneg0), (LCEBR (LZER))>;
-def : Pat<(f64  fpimmneg0), (LCDBR (LZDR))>;
+def : Pat<(f32  fpimmneg0), (LCDFR_32 (LZER))>;
+def : Pat<(f64  fpimmneg0), (LCDFR (LZDR))>;
 def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1186,6 +1186,12 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   case SystemZ::LER:    return SystemZ::LTEBR;
   case SystemZ::LDR:    return SystemZ::LTDBR;
   case SystemZ::LXR:    return SystemZ::LTXBR;
+  case SystemZ::LCDFR:  return SystemZ::LCDBR;
+  case SystemZ::LPDFR:  return SystemZ::LPDBR;
+  case SystemZ::LNDFR:  return SystemZ::LNDBR;
+  case SystemZ::LCDFR_32:  return SystemZ::LCEBR;
+  case SystemZ::LPDFR_32:  return SystemZ::LPEBR;
+  case SystemZ::LNDFR_32:  return SystemZ::LNEBR;
   // On zEC12 we prefer to use RISBGN.  But if there is a chance to
   // actually use the condition code, we may turn it back into RISGB.
   // Note that RISBG is not really a "load-and-test" instruction,

diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -215,15 +215,15 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
       break;
 
     case SystemZ::WFLCDB:
-      Changed |= shortenOn01(MI, SystemZ::LCDBR);
+      Changed |= shortenOn01(MI, SystemZ::LCDFR);
       break;
 
     case SystemZ::WFLNDB:
-      Changed |= shortenOn01(MI, SystemZ::LNDBR);
+      Changed |= shortenOn01(MI, SystemZ::LNDFR);
       break;
 
     case SystemZ::WFLPDB:
-      Changed |= shortenOn01(MI, SystemZ::LPDBR);
+      Changed |= shortenOn01(MI, SystemZ::LPDFR);
       break;
 
     case SystemZ::WFSQDB:

diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll
@@ -30,12 +30,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:

diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll
@@ -31,12 +31,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:

diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll
@@ -31,12 +31,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:

diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -7,7 +7,7 @@
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lpebr %f0, %f0
+; CHECK: lpdfr %f0, %f0
 ; CHECK: br %r14
   %res = call float @llvm.fabs.f32(float %f)
   ret float %res
@@ -17,7 +17,7 @@ define float @f1(float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lpdbr %f0, %f0
+; CHECK: lpdfr %f0, %f0
 ; CHECK: br %r14
   %res = call double @llvm.fabs.f64(double %f)
   ret double %res

diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -7,7 +7,7 @@
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lnebr %f0, %f0
+; CHECK: lndfr %f0, %f0
 ; CHECK: br %r14
   %abs = call float @llvm.fabs.f32(float %f)
   %res = fsub float -0.0, %abs
@@ -18,7 +18,7 @@ define float @f1(float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lndbr %f0, %f0
+; CHECK: lndfr %f0, %f0
 ; CHECK: br %r14
   %abs = call double @llvm.fabs.f64(double %f)
   %res = fsub double -0.0, %abs

diff --git a/test/CodeGen/SystemZ/fp-cmp-05.ll b/test/CodeGen/SystemZ/fp-cmp-05.ll
@@ -0,0 +1,81 @@
+; Test that floating-point instructions that set cc are used to
+; eliminate compares for load complement, load negative and load
+; positive. Right now, the WFL.DB (vector) instructions are not
+; handled by SystemZElimcompare, so for Z13 this is currently
+; unimplemented.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -check-prefix=CHECK-Z10
+
+; Load complement (sign-bit flipped).
+; Test f32
+define float @f1(float %a, float %b, float %f) {
+; CHECK-LABEL: f1:
+; CHECK-Z10: lcebr
+; CHECK-Z10-NEXT: je
+  %neg = fsub float -0.0, %f
+  %cond = fcmp oeq float %neg, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+define double @f2(double %a, double %b, double %f) {
+; CHECK-LABEL: f2:
+; CHECK-Z10: lcdbr
+; CHECK-Z10-NEXT: je
+  %neg = fsub double -0.0, %f
+  %cond = fcmp oeq double %neg, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Negation of floating-point absolute.
+; Test f32
+declare float @llvm.fabs.f32(float %f)
+define float @f3(float %a, float %b, float %f) {
+; CHECK-LABEL: f3:
+; CHECK-Z10: lnebr
+; CHECK-Z10-NEXT: je
+  %abs = call float @llvm.fabs.f32(float %f)
+  %neg = fsub float -0.0, %abs
+  %cond = fcmp oeq float %neg, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+declare double @llvm.fabs.f64(double %f)
+define double @f4(double %a, double %b, double %f) {
+; CHECK-LABEL: f4:
+; CHECK-Z10: lndbr
+; CHECK-Z10-NEXT: je
+  %abs = call double @llvm.fabs.f64(double %f)
+  %neg = fsub double -0.0, %abs
+  %cond = fcmp oeq double %neg, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Absolute floating-point value.
+; Test f32
+define float @f5(float %a, float %b, float %f) {
+; CHECK-LABEL: f5:
+; CHECK-Z10: lpebr
+; CHECK-Z10-NEXT: je
+  %abs = call float @llvm.fabs.f32(float %f)
+  %cond = fcmp oeq float %abs, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+define double @f6(double %a, double %b, double %f) {
+; CHECK-LABEL: f6:
+; CHECK-Z10: lpdbr
+; CHECK-Z10-NEXT: je
+  %abs = call double @llvm.fabs.f64(double %f)
+  %cond = fcmp oeq double %abs, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll
@@ -6,7 +6,7 @@
 define float @f1() {
 ; CHECK-LABEL: f1:
 ; CHECK: lzer [[REGISTER:%f[0-5]+]]
-; CHECK: lcebr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
 ; CHECK: br %r14
   ret float -0.0
 }
@@ -15,7 +15,7 @@ define float @f1() {
 define double @f2() {
 ; CHECK-LABEL: f2:
 ; CHECK: lzdr [[REGISTER:%f[0-5]+]]
-; CHECK: lcdbr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
 ; CHECK: br %r14
   ret double -0.0
 }

diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -6,7 +6,7 @@
 ; Test f32.
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lcebr %f0, %f0
+; CHECK: lcdfr %f0, %f0
 ; CHECK: br %r14
   %res = fsub float -0.0, %f
   ret float %res
@@ -15,7 +15,7 @@ define float @f1(float %f) {
 ; Test f64.
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lcdbr %f0, %f0
+; CHECK: lcdfr %f0, %f0
 ; CHECK: br %r14
   %res = fsub double -0.0, %f
   ret double %res