From db342f42a4b00f858cb43328c9fdaff5fe2b5788 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 23 Feb 2022 18:03:40 -0800 Subject: [PATCH] CMake: update the x86 intrinsic checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge all the existing checks into a single one, which is a simple pass or fail, since all our supported compilers support all the intrinsics up to Cannon Lake. The two I've recently added (AVX512VBMI2 and VAES) aren't yet supported everywhere, so they stay. For some reason, all intrinsics seem to be disabled on Android. It looks like some support was missing during the CMake port and this was never again looked at. I'm leaving it be. As for WASM, discussion with maintainers is that the WASM emulation of x86 intrinsics is too hit-and-miss. No one is testing the performance, particularly the person writing such code (me). They also have some non-obvious selection of what is supported natively and what is merely emulated. Using the actual WASM intrinsics is preferred, but someone else's job. Change-Id: Ib42b3adc93bf4d43bd55fffd16c10d66208e8384 Reviewed-by: Tor Arne Vestbø Reviewed-by: Morten Johan Sørvig Reviewed-by: Lorn Potter Reviewed-by: Kai Koehne --- cmake/QtBaseGlobalTargets.cmake | 8 +- cmake/QtCompilerOptimization.cmake | 1 - config.tests/x86_simd/main.cpp | 216 -------------------------- config.tests/x86intrin/CMakeLists.txt | 9 ++ config.tests/x86intrin/main.cpp | 46 ++++++ config_help.txt | 4 - configure.cmake | 191 ++++++++--------------- src/corelib/global/qsimd_p.h | 64 ++++---- 8 files changed, 158 insertions(+), 381 deletions(-) create mode 100644 config.tests/x86intrin/CMakeLists.txt create mode 100644 config.tests/x86intrin/main.cpp diff --git a/cmake/QtBaseGlobalTargets.cmake b/cmake/QtBaseGlobalTargets.cmake index cec440dd917..43170f163fa 100644 --- a/cmake/QtBaseGlobalTargets.cmake +++ b/cmake/QtBaseGlobalTargets.cmake @@ -91,11 +91,11 @@ if(MACOS AND QT_IS_MACOS_UNIVERSAL AND __qt_osx_first_arch IN_LIST __qt_apple_silicon_arches) # The test in configure.cmake will not be run, but we know that # the compiler supports these intrinsics - set(QT_FORCE_FEATURE_sse2 ON CACHE INTERNAL "Force enable sse2 due to platform requirements.") + set(QT_FORCE_FEATURE_x86intrin ON CACHE INTERNAL "Force-enable x86 intrinsics due to platform requirements.") set(__QtFeature_custom_enabled_cache_variables - TEST_subarch_sse2 - FEATURE_sse2 - QT_FEATURE_sse2) + TEST_x86intrin + FEATURE_x86intrin + QT_FEATURE_x86intrin) endif() if(MACOS AND QT_IS_MACOS_UNIVERSAL AND __qt_osx_first_arch STREQUAL "x86_64") diff --git a/cmake/QtCompilerOptimization.cmake b/cmake/QtCompilerOptimization.cmake index ea073309451..4233aece762 100644 --- a/cmake/QtCompilerOptimization.cmake +++ b/cmake/QtCompilerOptimization.cmake @@ -13,7 +13,6 @@ if (MSVC) set(QT_CFLAGS_AESNI "${QT_CFLAGS_SSE2}") set(QT_CFLAGS_SHANI "${QT_CFLAGS_SSE2}") - # FIXME to be Visual Studio version specific, like in mkspecs/common/msvc-version.conf set(QT_CFLAGS_AVX "-arch:AVX") set(QT_CFLAGS_AVX2 "-arch:AVX2") set(QT_CFLAGS_F16C "-arch:AVX") diff --git a/config.tests/x86_simd/main.cpp b/config.tests/x86_simd/main.cpp index c7032317dde..5af3496769d 100644 --- a/config.tests/x86_simd/main.cpp +++ b/config.tests/x86_simd/main.cpp @@ -12,222 +12,6 @@ # define attribute_target(x) #endif -#if T(SSE2) -attribute_target("sse2") void test_sse2() -{ - __m128i a = _mm_setzero_si128(); - _mm_maskmoveu_si128(a, _mm_setzero_si128(), 0); -} -#endif - -#if T(SSE3) -attribute_target("sse3") void test_sse3() -{ - __m128d a = _mm_set1_pd(6.28); - __m128d b = _mm_set1_pd(3.14); - __m128d result = _mm_addsub_pd(a, b); - (void) _mm_movedup_pd(result); -} -#endif - -#if T(SSSE3) -attribute_target("ssse3") void test_ssse3() -{ - __m128i a = _mm_set1_epi32(42); - _mm_abs_epi8(a); - (void) _mm_sign_epi16(a, _mm_set1_epi32(64)); -} -#endif - -#if T(SSE4_1) -attribute_target("sse4.1") void test_sse4_1() -{ - __m128 a = _mm_setzero_ps(); - _mm_ceil_ps(a); - __m128i result = _mm_mullo_epi32(_mm_set1_epi32(42), _mm_set1_epi32(64)); - (void)result; -} -#endif - -#if T(SSE4_2) -attribute_target("sse4.2") void test_sse4_2() -{ - __m128i a = _mm_setzero_si128(); - __m128i b = _mm_set1_epi32(42); - (void) _mm_cmpestrm(a, 16, b, 16, 0); -} -#endif - -#if T(AESNI) -attribute_target("aes,sse4.2") void test_aesni() -{ - __m128i a = _mm_setzero_si128(); - __m128i b = _mm_aesenc_si128(a, a); - __m128i c = _mm_aesdec_si128(a, b); - (void)c; -} -#endif - -#if T(F16C) -attribute_target("f16c") void test_f16c() -{ - __m128i a = _mm_setzero_si128(); - __m128 b = _mm_cvtph_ps(a); - __m256 b256 = _mm256_cvtph_ps(a); - (void) _mm_cvtps_ph(b, 0); - (void) _mm256_cvtps_ph(b256, 0); -} -#endif - -#if T(RDRND) -attribute_target("rdrnd") int test_rdrnd() -{ - unsigned short us; - unsigned int ui; - if (_rdrand16_step(&us)) - return 1; - if (_rdrand32_step(&ui)) - return 1; -# if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) - unsigned long long ull; - if (_rdrand64_step(&ull)) - return 1; -# endif -} -#endif - -#if T(RDSEED) -attribute_target("rdseed") int test_rdseed() -{ - unsigned short us; - unsigned int ui; - if (_rdseed16_step(&us)) - return 1; - if (_rdseed32_step(&ui)) - return 1; -# if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) - unsigned long long ull; - if (_rdseed64_step(&ull)) - return 1; -# endif -} -#endif - -#if T(SHANI) -attribute_target("sha") void test_shani() -{ - __m128i a = _mm_setzero_si128(); - __m128i b = _mm_sha1rnds4_epu32(a, a, 0); - __m128i c = _mm_sha1msg1_epu32(a, b); - __m128i d = _mm_sha256msg2_epu32(b, c); - (void)d; -} -#endif - -#if T(AVX) -attribute_target("avx") void test_avx() -{ - __m256d a = _mm256_setzero_pd(); - __m256d b = _mm256_set1_pd(42.42); - (void) _mm256_add_pd(a, b); -} -#endif - -#if T(AVX2) -attribute_target("avx2") void test_avx2() -{ - _mm256_zeroall(); - __m256i a = _mm256_setzero_si256(); - __m256i b = _mm256_and_si256(a, a); - (void) _mm256_add_epi8(a, b); -} -#endif - -#if T(AVX512F) -attribute_target("avx512f") void test_avx512f(char *ptr) -{ - /* AVX512 Foundation */ - __mmask16 m = ~1; - __m512i i; - __m512d d; - __m512 f; - i = _mm512_maskz_loadu_epi32(0, ptr); - d = _mm512_loadu_pd((double *)ptr + 64); - f = _mm512_loadu_ps((float *)ptr + 128); - _mm512_mask_storeu_epi64(ptr, m, i); - _mm512_mask_storeu_ps(ptr + 64, m, f); - _mm512_mask_storeu_pd(ptr + 128, m, d); -} -#endif - -#if T(AVX512ER) -attribute_target("avx512er") void test_avx512er() -{ - /* AVX512 Exponential and Reciprocal */ - __m512 f; - f = _mm512_exp2a23_round_ps(f, 8); -} -#endif - -#if T(AVX512CD) -attribute_target("avx512cd") void test_avx512cd() -{ - /* AVX512 Conflict Detection */ - __mmask16 m = ~1; - __m512i i; - i = _mm512_maskz_conflict_epi32(m, i); -} -#endif - -#if T(AVX512PF) -attribute_target("avx512pf") void test_avx512pf(void *ptr) -{ - /* AVX512 Prefetch */ - __m512i i; - __mmask16 m = 0xf; - _mm512_mask_prefetch_i64scatter_pd(ptr, m, i, 2, 2); -} -#endif - -#if T(AVX512DQ) -attribute_target("avx512dq") void test_avx512dq() -{ - /* AVX512 Doubleword and Quadword support */ - __m512i i; - __mmask16 m = ~1; - m = _mm512_movepi32_mask(i); -} -#endif - -#if T(AVX512BW) -attribute_target("avx512bw") void test_avx512bw(char *ptr) -{ - /* AVX512 Byte and Word support */ - __m512i i; - __mmask16 m = ~1; - i = _mm512_mask_loadu_epi8(i, m, ptr - 8); -} -#endif - -#if T(AVX512VL) -attribute_target("avx512vl") void test_avx512vl(char *ptr) -{ - /* AVX512 Vector Length */ - __mmask16 m = ~1; - __m256i i2 = _mm256_maskz_loadu_epi32(0, ptr); - _mm256_mask_storeu_epi32(ptr + 1, m, i2); -} -#endif - -#if T(AVX512IFMA) -attribute_target("avx512ifma") void test_avx512ifma() -{ - /* AVX512 Integer Fused Multiply-Add */ - __m512i i; - i = _mm512_madd52lo_epu64(i, i, i); -} -#endif - #if T(AVX512VBMI2) attribute_target("avx512vl,avx512vbmi2") void test_avx512vbmi2() { diff --git a/config.tests/x86intrin/CMakeLists.txt b/config.tests/x86intrin/CMakeLists.txt new file mode 100644 index 00000000000..475d062f64e --- /dev/null +++ b/config.tests/x86intrin/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.16) +project(x86intrin LANGUAGES CXX) +add_executable(x86intrin main.cpp) +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU|IntelLLVM|QCC") + target_compile_options(x86intrin PUBLIC + "-march=cannonlake" "-mrdrnd" "-mrdseed" "-maes" "-msha" "-w") +elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(x86intrin PUBLIC "-arch:AVX512" "-W0") +endif() diff --git a/config.tests/x86intrin/main.cpp b/config.tests/x86intrin/main.cpp new file mode 100644 index 00000000000..4306a4721c9 --- /dev/null +++ b/config.tests/x86intrin/main.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2022 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#include + +// Skylake AVX512 was added to GCC 4.9, Clang 3.7, and MSVC 2015. +// Cannon Lake was added to GCC 5, Clang 3.8, and MSVC 2017 15.7, +// so that's our minimum. +// Ice Lake was completed with GCC 8, Clang 6, and MSVC 2017 15.8. + +int test(int argc, char **argv) +{ + unsigned randomvalue; + _rdrand32_step(&randomvalue); // RDRND (IVB) + _rdseed32_step(&randomvalue); // RDSEED (BDW) + unsigned mask = _blsmsk_u32(argc); // BMI (HSW) + int clz = _lzcnt_u32(mask); // LZCNT (HSW) + int ctz = _tzcnt_u32(mask); // BMI (HSW) + mask = _bzhi_u32(-1, argc); // BMI2 (HSW) + + __m128d d = _mm_setzero_pd(); // SSE2 + d = _mm_cvtsi32_sd(d, argc); // SSE2 + __m256d d2 = _mm256_broadcastsd_pd(d); // AVX (SNB) + d2 = _mm256_fmadd_pd(d2, d2, d2); // FMA (HSW) + + __m128 f = _mm256_cvtpd_ps(d2); // AVX (SNB) + __m128i a = _mm_cvtps_ph(f, 0); // F16C (IVB) + __m128i b = _mm_aesenc_si128(a, a); // AESNI (WSM) + __m128i c = _mm_sha1rnds4_epu32(a, a, 0); // SHA (CNL) + __m128i e = _mm_sha1msg1_epu32(a, b); // SHA (CNL) + __m128i g = _mm_sha256msg2_epu32(b, c); // SHA (CNL) + + __m512i zero = _mm512_setzero_si512(); // AVX512F (SKX) + __m512i data = _mm512_maskz_loadu_epi8(mask, argv[0]); // AVX512BW (SKX) + __m256i ptrs = _mm256_maskz_loadu_epi64(mask, argv); // AVX512VL (SKX) + __m512i data2 = _mm512_broadcast_i64x4(ptrs); // AVX512DQ (SKX) + __m256i data3 = _mm256_madd52lo_epu64(ptrs, ptrs, ptrs);// AVX512IFMA (CNL) + data2 = _mm512_multishift_epi64_epi8(data, data2); // AVX512VBMI (CNL) + + return _mm256_extract_epi32(data3, 0); // AVX2 (HSW) +} + +int main(int argc, char **argv) +{ + return test(argc, argv); +} diff --git a/config_help.txt b/config_help.txt index aabf2aab4ad..bd229c43a4d 100644 --- a/config_help.txt +++ b/config_help.txt @@ -112,10 +112,6 @@ Build options: -c++std .... Select C++ standard [c++2b/c++20/c++17/c++14/c++11] - -sse2 ................ Use SSE2 instructions [auto] - -sse3/-ssse3/-sse4.1/-sse4.2/-avx/-avx2/-avx512 - Enable use of particular x86 instructions [auto] - Enabled ones are still subject to runtime detection. -mips_dsp/-mips_dspr2 Use MIPS DSP/rev2 instructions [auto] -qreal ........ typedef qreal to the specified type. [double] diff --git a/configure.cmake b/configure.cmake index c75f07fd405..32b4abb228b 100644 --- a/configure.cmake +++ b/configure.cmake @@ -264,68 +264,11 @@ static_assert(B::has_signaling_NaN, \"System lacks signaling NaN\"); } ") -# sse2 -qt_config_compile_test_x86simd(sse2 "SSE2 instructions") - -# sse3 -qt_config_compile_test_x86simd(sse3 "SSE3 instructions") - -# ssse3 -qt_config_compile_test_x86simd(ssse3 "SSSE3 instructions") - -# sse4_1 -qt_config_compile_test_x86simd(sse4_1 "SSE4.1 instructions") - -# sse4_2 -qt_config_compile_test_x86simd(sse4_2 "SSE4.2 instructions") - -# aesni -qt_config_compile_test_x86simd(aesni "AES new instructions") - -# f16c -qt_config_compile_test_x86simd(f16c "F16C instructions") - -# rdrnd -qt_config_compile_test_x86simd(rdrnd "RDRAND instruction") - -# rdseed -qt_config_compile_test_x86simd(rdseed "RDSEED instruction") - -# shani -qt_config_compile_test_x86simd(shani "SHA new instructions") - -# avx -qt_config_compile_test_x86simd(avx "AVX instructions") - -# avx2 -qt_config_compile_test_x86simd(avx2 "AVX2 instructions") - -# avx512f -qt_config_compile_test_x86simd(avx512f "AVX512 F instructions") - -# avx512er -qt_config_compile_test_x86simd(avx512er "AVX512 ER instructions") - -# avx512cd -qt_config_compile_test_x86simd(avx512cd "AVX512 CD instructions") - -# avx512pf -qt_config_compile_test_x86simd(avx512pf "AVX512 PF instructions") - -# avx512dq -qt_config_compile_test_x86simd(avx512dq "AVX512 DQ instructions") - -# avx512bw -qt_config_compile_test_x86simd(avx512bw "AVX512 BW instructions") - -# avx512vl -qt_config_compile_test_x86simd(avx512vl "AVX512 VL instructions") - -# avx512ifma -qt_config_compile_test_x86simd(avx512ifma "AVX512 IFMA instructions") - -# avx512vbmi -qt_config_compile_test_x86simd(avx512vbmi "AVX512 VBMI instructions") +# basic x86 intrinsics support +qt_config_compile_test(x86intrin + LABEL "Basic x86 intrinsics" + PROJECT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/config.tests/x86intrin" +) # x86: avx512vbmi2 qt_config_compile_test_x86simd(avx512vbmi2 "AVX512VBMI2") @@ -741,143 +684,126 @@ qt_feature("signaling_nan" PUBLIC LABEL "Signaling NaN" CONDITION TEST_signaling_nan ) -qt_feature("sse2" PRIVATE - LABEL "SSE2" - CONDITION ( ( ( TEST_architecture_arch STREQUAL i386 ) - OR ( TEST_architecture_arch STREQUAL x86_64 ) ) AND TEST_subarch_sse2 ) OR QT_FORCE_FEATURE_sse2 OR WASM +qt_feature("x86intrin" PRIVATE + LABEL "Basic" + CONDITION (((TEST_architecture_arch STREQUAL i386) OR (TEST_architecture_arch STREQUAL x86_64)) + AND (QT_FORCE_FEATURE_x86intrin OR TEST_x86intrin)) AUTODETECT NOT WASM ) +qt_feature("sse2" PRIVATE + CONDITION QT_FEATURE_x86intrin +) qt_feature_definition("sse2" "QT_COMPILER_SUPPORTS_SSE2" VALUE "1") qt_feature_config("sse2" QMAKE_PRIVATE_CONFIG) qt_feature("sse3" PRIVATE - LABEL "SSE3" - CONDITION QT_FEATURE_sse2 AND TEST_subarch_sse3 + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("sse3" "QT_COMPILER_SUPPORTS_SSE3" VALUE "1") qt_feature_config("sse3" QMAKE_PRIVATE_CONFIG) qt_feature("ssse3" PRIVATE - LABEL "SSSE3" - CONDITION QT_FEATURE_sse3 AND TEST_subarch_ssse3 + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("ssse3" "QT_COMPILER_SUPPORTS_SSSE3" VALUE "1") qt_feature_config("ssse3" QMAKE_PRIVATE_CONFIG) qt_feature("sse4_1" PRIVATE - LABEL "SSE4.1" - CONDITION QT_FEATURE_ssse3 AND TEST_subarch_sse4_1 + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("sse4_1" "QT_COMPILER_SUPPORTS_SSE4_1" VALUE "1") qt_feature_config("sse4_1" QMAKE_PRIVATE_CONFIG) qt_feature("sse4_2" PRIVATE - LABEL "SSE4.2" - CONDITION QT_FEATURE_sse4_1 AND TEST_subarch_sse4_2 + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("sse4_2" "QT_COMPILER_SUPPORTS_SSE4_2" VALUE "1") qt_feature_config("sse4_2" QMAKE_PRIVATE_CONFIG) qt_feature("avx" PRIVATE - LABEL "AVX" - CONDITION QT_FEATURE_sse4_2 AND TEST_subarch_avx AND ( NOT ANDROID OR NOT ( TEST_architecture_arch STREQUAL x86_64 ) ) + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx" "QT_COMPILER_SUPPORTS_AVX" VALUE "1") qt_feature_config("avx" QMAKE_PRIVATE_CONFIG) qt_feature("f16c" PRIVATE - LABEL "F16C" - CONDITION QT_FEATURE_avx AND TEST_subarch_f16c + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("f16c" "QT_COMPILER_SUPPORTS_F16C" VALUE "1") qt_feature_config("f16c" QMAKE_PRIVATE_CONFIG) qt_feature("avx2" PRIVATE - LABEL "AVX2" - CONDITION QT_FEATURE_avx AND TEST_subarch_avx2 AND ( NOT ANDROID OR NOT ( TEST_architecture_arch STREQUAL x86_64 ) ) + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx2" "QT_COMPILER_SUPPORTS_AVX2" VALUE "1") qt_feature_config("avx2" QMAKE_PRIVATE_CONFIG) qt_feature("avx512f" PRIVATE - LABEL "F" - CONDITION QT_FEATURE_avx2 AND TEST_subarch_avx512f + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512f" "QT_COMPILER_SUPPORTS_AVX512F" VALUE "1") qt_feature_config("avx512f" QMAKE_PRIVATE_CONFIG) qt_feature("avx512er" PRIVATE - LABEL "ER" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512er + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512er" "QT_COMPILER_SUPPORTS_AVX512ER" VALUE "1") qt_feature_config("avx512er" QMAKE_PRIVATE_CONFIG) qt_feature("avx512cd" PRIVATE - LABEL "CD" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512cd + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512cd" "QT_COMPILER_SUPPORTS_AVX512CD" VALUE "1") qt_feature_config("avx512cd" QMAKE_PRIVATE_CONFIG) qt_feature("avx512pf" PRIVATE - LABEL "PF" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512pf + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512pf" "QT_COMPILER_SUPPORTS_AVX512PF" VALUE "1") qt_feature_config("avx512pf" QMAKE_PRIVATE_CONFIG) qt_feature("avx512dq" PRIVATE - LABEL "DQ" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512dq + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512dq" "QT_COMPILER_SUPPORTS_AVX512DQ" VALUE "1") qt_feature_config("avx512dq" QMAKE_PRIVATE_CONFIG) qt_feature("avx512bw" PRIVATE - LABEL "BW" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512bw + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512bw" "QT_COMPILER_SUPPORTS_AVX512BW" VALUE "1") qt_feature_config("avx512bw" QMAKE_PRIVATE_CONFIG) qt_feature("avx512vl" PRIVATE - LABEL "VL" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512vl + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512vl" "QT_COMPILER_SUPPORTS_AVX512VL" VALUE "1") qt_feature_config("avx512vl" QMAKE_PRIVATE_CONFIG) qt_feature("avx512ifma" PRIVATE - LABEL "IFMA" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512ifma + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512ifma" "QT_COMPILER_SUPPORTS_AVX512IFMA" VALUE "1") qt_feature_config("avx512ifma" QMAKE_PRIVATE_CONFIG) qt_feature("avx512vbmi" PRIVATE - LABEL "VBMI" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512vbmi + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("avx512vbmi" "QT_COMPILER_SUPPORTS_AVX512VBMI" VALUE "1") qt_feature_config("avx512vbmi" QMAKE_PRIVATE_CONFIG) qt_feature("avx512vbmi2" PRIVATE LABEL "AVX512VBMI2" - CONDITION QT_FEATURE_avx512f AND TEST_subarch_avx512vbmi2 + CONDITION QT_FEATURE_x86intrin AND TEST_subarch_avx512vbmi2 ) qt_feature_definition("avx512vbmi2" "QT_COMPILER_SUPPORTS_AVX512VBMI2" VALUE "1") qt_feature_config("avx512vbmi2" QMAKE_PRIVATE_CONFIG) qt_feature("aesni" PRIVATE - LABEL "AES" - CONDITION QT_FEATURE_sse2 AND TEST_subarch_aesni + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("aesni" "QT_COMPILER_SUPPORTS_AES" VALUE "1") qt_feature_config("aesni" QMAKE_PRIVATE_CONFIG) qt_feature("vaes" PRIVATE LABEL "VAES" - CONDITION QT_FEATURE_avx2 AND TEST_subarch_vaes + CONDITION QT_FEATURE_x86intrin AND TEST_subarch_vaes ) qt_feature_definition("vaes" "QT_COMPILER_SUPPORTS_VAES" VALUE "1") qt_feature_config("vaes" QMAKE_PRIVATE_CONFIG) qt_feature("rdrnd" PRIVATE - LABEL "RDRAND" - CONDITION TEST_subarch_rdrnd + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("rdrnd" "QT_COMPILER_SUPPORTS_RDRND" VALUE "1") qt_feature_config("rdrnd" QMAKE_PRIVATE_CONFIG) qt_feature("rdseed" PRIVATE - LABEL "RDSEED" - CONDITION TEST_subarch_rdseed + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("rdseed" "QT_COMPILER_SUPPORTS_RDSEED" VALUE "1") qt_feature_config("rdseed" QMAKE_PRIVATE_CONFIG) qt_feature("shani" PRIVATE - LABEL "SHA" - CONDITION QT_FEATURE_sse2 AND TEST_subarch_shani + CONDITION QT_FEATURE_x86intrin ) qt_feature_definition("shani" "QT_COMPILER_SUPPORTS_SHA" VALUE "1") qt_feature_config("shani" QMAKE_PRIVATE_CONFIG) @@ -1114,32 +1040,14 @@ qt_configure_add_summary_entry(ARGS "intelcet") qt_configure_add_summary_section(NAME "Target compiler supports") qt_configure_add_summary_entry( TYPE "featureList" - ARGS "sse2 sse3 ssse3 sse4_1 sse4_2" - MESSAGE "SSE" - CONDITION ( ( TEST_architecture_arch STREQUAL i386 ) OR ( TEST_architecture_arch STREQUAL x86_64 ) OR ( TEST_architecture_arch STREQUAL wasm ) ) -) -qt_configure_add_summary_entry( - TYPE "featureList" - ARGS "avx avx2 vaes" - MESSAGE "AVX" - CONDITION ( ( TEST_architecture_arch STREQUAL i386 ) OR ( TEST_architecture_arch STREQUAL x86_64 ) ) -) -qt_configure_add_summary_entry( - TYPE "featureList" - ARGS "avx512f avx512er avx512cd avx512pf avx512dq avx512bw avx512vl avx512ifma avx512vbmi avx512vbmi2" - MESSAGE "AVX512" - CONDITION ( ( TEST_architecture_arch STREQUAL i386 ) OR ( TEST_architecture_arch STREQUAL x86_64 ) ) -) -qt_configure_add_summary_entry( - TYPE "featureList" - ARGS "aesni f16c rdrnd shani" - MESSAGE "Other x86" + ARGS "x86intrin vaes avx512vbmi2" + MESSAGE "x86 Intrinsics" CONDITION ( ( TEST_architecture_arch STREQUAL i386 ) OR ( TEST_architecture_arch STREQUAL x86_64 ) ) ) qt_configure_add_summary_entry( TYPE "featureList" ARGS "neon arm_crc32 arm_crypto" - MESSAGE "Extensions" + MESSAGE "ARM Extensions" CONDITION ( TEST_architecture_arch STREQUAL arm ) OR ( TEST_architecture_arch STREQUAL arm64 ) ) qt_configure_add_summary_entry( @@ -1215,6 +1123,31 @@ qt_configure_add_report_entry( MESSAGE "Command line option -sanitize fuzzer-no-link is only supported with clang compilers." CONDITION QT_FEATURE_sanitize_fuzzer_no_link AND NOT CLANG ) +if (TEST_architecture_arch STREQUAL x86_64 OR TEST_architecture_arch STREQUAL i386) + if ((TEST_architecture_arch STREQUAL i386) OR QNX OR WASM) + # Warn only + qt_configure_add_report_entry( + TYPE WARNING + CONDITION (NOT QT_FEATURE_x86intrin) + MESSAGE [=[ +All x86 intrinsics and SIMD support were disabled. If this was in error, check +the result of the build in config.tests/x86intrin and report at https://bugreports.qt.io. +]=] + ) + else() + qt_configure_add_report_entry( + TYPE ERROR + CONDITION (NOT QT_FEATURE_x86intrin) + MESSAGE [========[ +x86 intrinsics support missing. Check your compiler settings. If this is an +error, report at https://bugreports.qt.io with your compiler ID and version, +and this output: + +${TEST_x86intrin_OUTPUT} +]========] + ) + endif() +endif() # special case begin qt_configure_add_report_entry( TYPE ERROR diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h index 5df2faf4e43..af557bc976d 100644 --- a/src/corelib/global/qsimd_p.h +++ b/src/corelib/global/qsimd_p.h @@ -34,33 +34,6 @@ QT_WARNING_DISABLE_INTEL(103) for (int _i = 0; _i < max && i < length; ++i, ++_i) /* - * qt_module_config.prf defines the QT_COMPILER_SUPPORTS_XXX macros. - * They mean the compiler supports the necessary flags and the headers - * for the x86 and ARM intrinsics: - * - GCC: the -mXXX or march=YYY flag is necessary before #include - * up to 4.8; GCC >= 4.9 can include unconditionally - * - Intel CC: #include can happen unconditionally - * - MSVC: #include can happen unconditionally - * - RVCT: ??? - * - * We will try to include all headers possible under this configuration. - * - * MSVC does not define __SSE2__ & family, so we will define them. MSVC 2013 & - * up do define __AVX__ if the -arch:AVX option is passed on the command-line. - * - * Supported XXX are: - * Flag | Arch | GCC | Intel CC | MSVC | - * ARM_NEON | ARM | I & C | None | ? | - * SSE2 | x86 | I & C | I & C | I & C | - * SSE3 | x86 | I & C | I & C | I only | - * SSSE3 | x86 | I & C | I & C | I only | - * SSE4_1 | x86 | I & C | I & C | I only | - * SSE4_2 | x86 | I & C | I & C | I only | - * AVX | x86 | I & C | I & C | I & C | - * AVX2 | x86 | I & C | I & C | I only | - * AVX512xx | x86 | I & C | I & C | I only | - * I = intrinsics; C = code generation - * * Code can use the following constructs to determine compiler support & status: * - #ifdef __XXX__ (e.g: #ifdef __AVX__ or #ifdef __ARM_NEON__) * If this test passes, then the compiler is already generating code for that @@ -160,6 +133,43 @@ QT_WARNING_DISABLE_INTEL(103) # define QT_FUNCTION_TARGET(x) #endif +#if defined(__SSE2__) && !defined(QT_COMPILER_SUPPORTS_SSE2) && !defined(QT_BOOTSTRAPPED) +// Intrinsic support appears to be missing, so pretend these features don't exist +# undef __SSE__ +# undef __SSE2__ +# undef __SSE3__ +# undef __SSSE3__ +# undef __SSE4_1__ +# undef __SSE4_2__ +# undef __AES__ +# undef __POPCNT__ +# undef __AVX__ +# undef __F16C__ +# undef __RDRND__ +# undef __AVX2__ +# undef __BMI__ +# undef __BMI2__ +# undef __FMA__ +# undef __MOVBE__ +# undef __RDSEED__ +# undef __AVX512F__ +# undef __AVX512ER__ +# undef __AVX512CD__ +# undef __AVX512PF__ +# undef __AVX512DQ__ +# undef __AVX512BW__ +# undef __AVX512VL__ +# undef __AVX512IFMA__ +# undef __AVX512VBMI__ +# undef __SHA__ +# undef __AVX512VBMI2__ +# undef __AVX512BITALG__ +# undef __AVX512VNNI__ +# undef __AVX512VPOPCNTDQ__ +# undef __GFNI__ +# undef __VAES__ +#endif + #ifdef Q_PROCESSOR_X86 /* -- x86 intrinsic support -- */