[AArch64] Switch regression tests to test features not CPUs

Summary: We have switched to using features for all heuristics, but the tests for these are still using -mcpu, which means we are not directly testing the features. This converts at least some of the existing regression tests to use the new features. This still leaves the following features untested: merge-narrow-ld predictable-select-expensive alternate-sextload-cvt-f32-pattern disable-latency-sched-heuristic Reviewers: mcrosier, t.p.northover, rengolin Subscribers: MatzeB, aemerson, llvm-commits, rengolin Differential Revision: http://reviews.llvm.org/D21288 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273271 91177308-0d34-0410-b5e6-96231b3b80d8
qzmfranklin · Jun 21, 2016 · 7e4cf0a · 7e4cf0a
1 parent b4f51c3
commit 7e4cf0a
Show file tree

Hide file tree

Showing 8 changed files with 98 additions and 81 deletions.
diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN
-; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD
 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
 
+; The following tests use the balance-fp-ops feature, and should be independent of
+; the target cpu.
+
+; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP
+; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops  -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP
+
 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
 ; our test strategy is to:
 ;   * Force the pass to always perform register swapping even if the dest register is of the
@@ -75,7 +81,7 @@ entry:
 ; CHECK: fmsub [[x]]
 ; CHECK: fmadd [[y]]
 ; CHECK: fmadd [[x]]
-; CHECK-A57: stp [[x]], [[y]]
+; CHECK-BALFP: stp [[x]], [[y]]
 ; CHECK-A53-DAG: str [[x]]
 ; CHECK-A53-DAG: str [[y]]
 
@@ -170,7 +176,7 @@ declare void @g(...) #1
 ; CHECK: fmsub [[x]]
 ; CHECK: fmadd [[y]]
 ; CHECK: fmadd [[x]]
-; CHECK-A57: stp [[x]], [[y]]
+; CHECK-BALFP: stp [[x]], [[y]]
 ; CHECK-A53-DAG: str [[x]]
 ; CHECK-A53-DAG: str [[y]]
 

diff --git a/test/CodeGen/AArch64/aarch64-gep-opt.ll b/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -O3 -aarch64-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -O3 -aarch64-gep-opt=true -mattr=-use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
+; RUN: llc -O3 -aarch64-gep-opt=true -mattr=+use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
 ; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
 ; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
+
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-linux-gnueabi"
 

diff --git a/test/CodeGen/AArch64/merge-store-dependency.ll b/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mcpu cortex-a53 -march aarch64 %s -o - | FileCheck %s --check-prefix=A53
+
+; PR26827 - Merge stores causes wrong dependency.
+%struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* }
+@gv0 = internal unnamed_addr global i32 0, align 4
+@gv1 = internal unnamed_addr global %struct1** null, align 8
+
+define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg)  {
+;CHECK-LABEL: test
+entry:
+; A53: mov [[DATA:w[0-9]+]], w1
+; A53: str q{{[0-9]+}}, {{.*}}
+; A53: str q{{[0-9]+}}, {{.*}}
+; A53: str [[DATA]], {{.*}}
+
+  %0 = bitcast %struct1* %fde to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false)
+  %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4
+  store i16 256, i16* %state, align 8
+  %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2
+  store i32 %fd, i32* %fd1, align 8
+  %force_eof = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 3
+  store i32 0, i32* %force_eof, align 4
+  %func2 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 6
+  store void (i32, i32, i8*)* %func, void (i32, i32, i8*)** %func2, align 8
+  %arg3 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 7
+  store i8* %arg, i8** %arg3, align 8
+  %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, i8* %0) #6
+  %1 = load i32, i32* %fd1, align 8
+  %cmp.i = icmp slt i32 %1, 0
+  br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader
+if.then.i:
+  unreachable
+
+while.body.i.preheader:
+  %2 = load i32, i32* @gv0, align 4
+  %3 = icmp eq i32* %fd1, @gv0
+  br i1 %3, label %while.body.i.split, label %while.body.i.split.ver.us.preheader
+
+while.body.i.split.ver.us.preheader:
+  br label %while.body.i.split.ver.us
+
+while.body.i.split.ver.us:
+  %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %2, %while.body.i.split.ver.us.preheader ]
+  %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1
+  %4 = icmp sgt i32 %mul.i.ver.us, %1
+  br i1 %4, label %while.end.i, label %while.body.i.split.ver.us
+
+while.body.i.split:
+  br label %while.body.i.split
+
+while.end.i:
+  %call.i = tail call i8* @foo()
+  store i8* %call.i, i8** bitcast (%struct1*** @gv1 to i8**), align 8
+  br label %exit
+
+exit:
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+declare i32 @fcntl(i32, i32, ...)
+declare noalias i8* @foo()
diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll
@@ -1,6 +1,5 @@
-; RUN: llc -march aarch64 %s -o - | FileCheck %s
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown  -mcpu=cyclone  | FileCheck %s --check-prefix=CYCLONE
-; RUN: llc -mcpu cortex-a53 -march aarch64 %s -o - | FileCheck %s --check-prefix=A53
+; RUN: llc -march aarch64 %s -mcpu=cyclone -o - | FileCheck %s --check-prefix=CYCLONE --check-prefix=CHECK
+; RUN: llc -march aarch64 %s -mattr=-slow-misaligned-128store -o - | FileCheck %s --check-prefix=MISALIGNED --check-prefix=CHECK
 
 @g0 = external global <3 x float>, align 16
 @g1 = external global <3 x float>, align 4
@@ -39,76 +38,16 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
   store <2 x float> %shuffle1, <2 x float>* %idx1, align 8
   ret void
 
-; CHECK-LABEL:    merge_vec_extract_stores
-; CHECK:          stur   q0, [x0, #24]
-; CHECK-NEXT:     ret
+; MISALIGNED-LABEL:    merge_vec_extract_stores
+; MISALIGNED:          stur   q0, [x0, #24]
+; MISALIGNED-NEXT:     ret
+
+; FIXME: Ideally we would like to use a generic target for this test, but this relies
+; on suppressing store pairs.
 
 ; CYCLONE-LABEL:    merge_vec_extract_stores
 ; CYCLONE:          ext   v1.16b, v0.16b, v0.16b, #8
 ; CYCLONE-NEXT:     str   d0, [x0, #24]
 ; CYCLONE-NEXT:     str   d1, [x0, #32]
 ; CYCLONE-NEXT:     ret
 }
-
-
-; PR26827 - Merge stores causes wrong dependency.
-%struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* }
-@gv0 = internal unnamed_addr global i32 0, align 4
-@gv1 = internal unnamed_addr global %struct1** null, align 8
-
-define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg)  {
-;CHECK-LABEL: test
-entry:
-;A53: mov [[DATA:w[0-9]+]], w1
-;A53: str q{{[0-9]+}}, {{.*}}
-;A53: str q{{[0-9]+}}, {{.*}}
-;A53: str [[DATA]], {{.*}}
-
-  %0 = bitcast %struct1* %fde to i8*
-  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false)
-  %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4
-  store i16 256, i16* %state, align 8
-  %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2
-  store i32 %fd, i32* %fd1, align 8
-  %force_eof = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 3
-  store i32 0, i32* %force_eof, align 4
-  %func2 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 6
-  store void (i32, i32, i8*)* %func, void (i32, i32, i8*)** %func2, align 8
-  %arg3 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 7
-  store i8* %arg, i8** %arg3, align 8
-  %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, i8* %0) #6
-  %1 = load i32, i32* %fd1, align 8
-  %cmp.i = icmp slt i32 %1, 0
-  br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader
-if.then.i:
-  unreachable
-
-while.body.i.preheader:
-  %2 = load i32, i32* @gv0, align 4
-  %3 = icmp eq i32* %fd1, @gv0
-  br i1 %3, label %while.body.i.split, label %while.body.i.split.ver.us.preheader
-
-while.body.i.split.ver.us.preheader:
-  br label %while.body.i.split.ver.us
-
-while.body.i.split.ver.us:
-  %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %2, %while.body.i.split.ver.us.preheader ]
-  %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1
-  %4 = icmp sgt i32 %mul.i.ver.us, %1
-  br i1 %4, label %while.end.i, label %while.body.i.split.ver.us
-
-while.body.i.split:
-  br label %while.body.i.split
-
-while.end.i:
-  %call.i = tail call i8* @foo()
-  store i8* %call.i, i8** bitcast (%struct1*** @gv1 to i8**), align 8
-  br label %exit
-
-exit:
-  ret void
-}
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
-declare i32 @fcntl(i32, i32, ...)
-declare noalias i8* @foo()
diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll
@@ -1,4 +1,6 @@
+; RUN: llc -o - %s -mattr=+macroop-fusion,+use-postra-scheduler | FileCheck %s
 ; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s
+
 target triple = "arm64-apple-ios"
 
 declare void @foobar(i32 %v0, i32 %v1)
@@ -8,12 +10,12 @@ declare void @foobar(i32 %v0, i32 %v1)
 ; CHECK: add w[[ADDRES:[0-9]+]], w1, #7
 ; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13
 ; CHECK-NEXT: cbnz w[[SUBRES]], [[SKIPBLOCK:LBB[0-9_]+]]
-; CHECK: mov x0, x[[ADDRES]]
-; CHECK: mov x1, x[[SUBRES]]
+; CHECK: mov [[REGTY:[x,w]]]0, [[REGTY]][[ADDRES]]
+; CHECK: mov [[REGTY]]1, [[REGTY]][[SUBRES]]
 ; CHECK: bl _foobar
 ; CHECK: [[SKIPBLOCK]]:
-; CHECK: mov x0, x[[SUBRES]]
-; CHECK: mov x1, x[[ADDRES]]
+; CHECK: mov [[REGTY]]0, [[REGTY]][[SUBRES]]
+; CHECK: mov [[REGTY]]1, [[REGTY]][[ADDRES]]
 ; CHECK: bl _foobar
 define void @test_sub_cbz(i32 %a0, i32 %a1) {
 entry:

diff --git a/test/CodeGen/AArch64/exynos-quad-ldp-stp.ll → test/CodeGen/AArch64/no-quad-ldp-stp.ll b/test/CodeGen/AArch64/exynos-quad-ldp-stp.ll → test/CodeGen/AArch64/no-quad-ldp-stp.ll
@@ -1,10 +1,11 @@
+; RUN: llc < %s -march=aarch64 -mattr=+no-quad-ldst-pairs -verify-machineinstrs -asm-verbose=false | FileCheck %s
 ; RUN: llc < %s -march=aarch64 -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s
 
-; CHECK-LABEL: test_exynos_nopair_st
+; CHECK-LABEL: test_nopair_st
 ; CHECK: str
 ; CHECK: stur
 ; CHECK-NOT: stp
-define void @test_exynos_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> %v2) {
+define void @test_nopair_st(double* %ptr, <2 x double> %v1, <2 x double> %v2) {
   %tmp1 = bitcast double* %ptr to <2 x double>*
   store <2 x double> %v2, <2 x double>* %tmp1, align 16
   %add.ptr = getelementptr inbounds double, double* %ptr, i64 -2
@@ -13,11 +14,11 @@ define void @test_exynos_nopair_st(double* %ptr, <2 x double> %v1, <2 x double>
   ret void
 }
 
-; CHECK-LABEL: test_exynos_nopair_ld
+; CHECK-LABEL: test_nopair_ld
 ; CHECK: ldr
 ; CHECK: ldr
 ; CHECK-NOT: ldp
-define <2 x i64> @test_exynos_nopair_ld(i64* %p) {
+define <2 x i64> @test_nopair_ld(i64* %p) {
   %a1 = bitcast i64* %p to <2 x i64>*
   %tmp1 = load <2 x i64>, < 2 x i64>* %a1, align 8
   %add.ptr2 = getelementptr inbounds i64, i64* %p, i64 2

diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
@@ -6,6 +6,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=exynos-m1 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=vulcan -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
 
 %X = type { i64, i64, i64 }
 declare void @f(%X*)

diff --git a/test/CodeGen/AArch64/sqrt-fastmath.ll b/test/CodeGen/AArch64/sqrt-fastmath.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=!sqrt,!vec-sqrt | FileCheck %s --check-prefix=FAULT
 ; RUN: llc < %s -mtriple=aarch64 -mattr=neon -recip=sqrt,vec-sqrt   | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon,-use-reverse-square-root  | FileCheck %s --check-prefix=FAULT
+; RUN: llc < %s -mtriple=aarch64 -mattr=neon,+use-reverse-square-root | FileCheck %s
 
 declare float @llvm.sqrt.f32(float) #1
 declare double @llvm.sqrt.f64(double) #1