forked from pingcap/tiflash
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcpu_features.cmake
225 lines (205 loc) · 10.3 KB
/
cpu_features.cmake
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Copyright 2022 PingCAP, Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# https://software.intel.com/sites/landingpage/IntrinsicsGuide/
include (CheckCXXSourceCompiles)
include (CMakePushCheckState)
cmake_push_check_state ()
# The variables HAVE_* determine if compiler has support for the flag to use the corresponding instruction set.
# The options ENABLE_* determine if we will tell compiler to actually use the corresponding instruction set if compiler can do it.
# All of them are unrelated to the instruction set at the host machine
# (you can compile for newer instruction set on old machines and vice versa).
option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." OFF)
if (ARCH_AARCH64)
# ARM publishes almost every year a new revision of it's ISA [1]. Each version comes with new mandatory and optional features from
# which CPU vendors can pick and choose. This creates a lot of variability ... We provide two build "profiles", one for maximum
# compatibility intended to run on all 64-bit ARM hardware released after 2013 (e.g. Raspberry Pi 4), and one for modern ARM server
# CPUs, (e.g. Graviton).
#
# [1] https://en.wikipedia.org/wiki/AArch64
option (TIFLASH_ENABLE_ASIMD_SUPPORT "Enable Advanced SIMD support." ON)
option (TIFLASH_ENABLE_SVE_SUPPORT "Enable Scalable Vector Extension support." OFF)
# TODO: default ON, to be changed after CI is updated
option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." ON)
if (NO_ARMV81_OR_HIGHER)
# crc32 is optional in v8.0 and mandatory in v8.1. Enable it as __crc32()* is used in lot's of places and even very old ARM CPUs
# support it.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc")
if (TIFLASH_ENABLE_ASIMD_SUPPORT)
set (COMPILER_FLAGS "${COMPILER_FLAGS}+simd")
add_definitions(-DTIFLASH_ENABLE_ASIMD_SUPPORT=1)
endif ()
else ()
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it
# includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag
# "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on
# old OSs.
#
# simd: NEON, introduced as optional in v8.0, A few extensions were added with v8.1 but it's still not mandatory. Enables the
# compiler to auto-vectorize.
# sve: Scalable Vector Extensions, introduced as optional in v8.2. Available in Graviton 3 but not in Graviton 2, and most likely
# also not in CI machines. Compiler support for autovectorization is rudimentary at the time of writing, see [5]. Can be
# enabled one-fine-day (TM) but not now.
# ssbs: "Speculative Store Bypass Safe". Optional in v8.0, mandatory in v8.5. Meltdown/spectre countermeasure.
# crypto: SHA1, SHA256, AES. Optional in v8.0. In v8.4, further algorithms were added but it's still optional, see [6].
# dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits
# but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was
# introduced as optional, either in v8.2 [7] or in v8.4 [8].
# ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15.
#
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
set (TEST_FLAG "-march=armv8.2-a+crypto+ssbs+dotprod")
if (TIFLASH_ENABLE_ASIMD_SUPPORT)
set (TEST_FLAG "${TEST_FLAG}+simd")
add_definitions(-DTIFLASH_ENABLE_ASIMD_SUPPORT=1)
endif ()
if (TIFLASH_ENABLE_SVE_SUPPORT)
set (TEST_FLAG "${TEST_FLAG}+sve")
add_definitions(-DTIFLASH_ENABLE_SVE_SUPPORT=1)
endif ()
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
elseif (ARCH_AMD64)
# enable sse4.2 or lower default, check avx, avx2, avx512
# set the flags for specific files rather than globally
# in order to avoid SSE-AVX Transition Penalty
# and note that the compile will succeed even if the host machine does not support the instruction set
# so we do not set the flags to avoid core dump in old machines
option (TIFLASH_ENABLE_AVX_SUPPORT "Use AVX/AVX2 instructions on x86_64" ON)
option (TIFLASH_ENABLE_AVX512_SUPPORT "Use AVX512 instructions on x86_64" ON)
# `haswell` was released since 2013 with cpu feature avx2, bmi2. It's a practical arch for optimizer
option (TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT "Use instructions based on architecture `haswell` on x86_64" ON)
option (NO_AVX_OR_HIGHER "Disable AVX or higher on x86_64 for maximum compatibility with older/embedded hardware." OFF)
if (NO_AVX_OR_HIGHER)
SET(TIFLASH_ENABLE_AVX_SUPPORT OFF)
SET(TIFLASH_ENABLE_AVX512_SUPPORT OFF)
SET (TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT OFF)
endif()
set (TEST_FLAG "-mssse3")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
#include <tmmintrin.h>
int main() {
__m64 a = _mm_abs_pi8(__m64());
(void)a;
return 0;
}
" HAVE_SSSE3)
if (HAVE_SSSE3)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
set (TEST_FLAG "-msse4.1")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
#include <smmintrin.h>
int main() {
auto a = _mm_insert_epi8(__m128i(), 0, 0);
(void)a;
return 0;
}
" HAVE_SSE41)
if (HAVE_SSE41)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
set (TEST_FLAG "-msse4.2")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
#include <nmmintrin.h>
int main() {
auto a = _mm_crc32_u64(0, 0);
(void)a;
return 0;
}
" HAVE_SSE42)
if (HAVE_SSE42)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
set (TEST_FLAG "-mpclmul")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
#include <wmmintrin.h>
int main() {
auto a = _mm_clmulepi64_si128(__m128i(), __m128i(), 0);
(void)a;
return 0;
}
" HAVE_PCLMULQDQ)
if (HAVE_PCLMULQDQ)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
set (TEST_FLAG "-mpopcnt")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
int main() {
auto a = __builtin_popcountll(0);
(void)a;
return 0;
}
" HAVE_POPCNT)
if (HAVE_POPCNT)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
set (TIFLASH_COMPILER_AVX2_FLAG "-mavx2")
set (TEST_FLAG "${TIFLASH_COMPILER_AVX2_FLAG}")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
#include <immintrin.h>
int main() {
auto a = _mm256_insert_epi8(__m256i(), 0, 0);
(void)a;
auto b = _mm256_add_epi16(__m256i(), __m256i());
(void)b;
return 0;
}
" HAVE_AVX2)
if (HAVE_AVX2 AND TIFLASH_ENABLE_AVX_SUPPORT)
# set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
add_definitions(-DTIFLASH_ENABLE_AVX_SUPPORT=1)
endif ()
set (TEST_FLAG "-mavx512f -mavx512bw -mavx512vl")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("
#include <immintrin.h>
int main() {
auto a = _mm512_setzero_epi32();
(void)a;
auto b = _mm512_add_epi16(__m512i(), __m512i());
(void)b;
auto c = _mm_cmp_epi8_mask(__m128i(), __m128i(), 0);
(void)c;
return 0;
}
" HAVE_AVX512)
message(STATUS "HAVE_AVX512: ${HAVE_AVX512}")
if (HAVE_AVX512 AND TIFLASH_ENABLE_AVX512_SUPPORT)
# set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
add_definitions(-DTIFLASH_ENABLE_AVX512_SUPPORT=1)
endif ()
set (TIFLASH_COMPILER_ARCH_HASWELL_FLAG "-march=haswell")
check_cxx_compiler_flag("${TIFLASH_COMPILER_ARCH_HASWELL_FLAG}" COMPILER_SUPPORT_ARCH_HASWELL)
if (NOT COMPILER_SUPPORT_ARCH_HASWELL)
set (TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT OFF)
endif ()
else ()
# ignore all other platforms
endif ()
cmake_pop_check_state ()