|
16 | 16 |
|
17 | 17 | namespace ovms {
|
18 | 18 |
|
19 |
| - #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300) |
20 |
| - #pragma once |
21 |
| - #include <immintrin.h> |
22 |
| - |
23 |
| - |
24 |
| - int check_4th_gen_intel_core_features() { |
25 |
| - const int the_4th_gen_features = |
26 |
| - (_FEATURE_AVX); |
27 |
| - // Removed _FEATURE_AVX2| _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE |
28 |
| - return _may_i_use_cpu_feature(the_4th_gen_features); |
29 |
| - } |
30 |
| - |
31 |
| - #else /* non-Intel compiler */ |
32 |
| - #pragma once |
33 |
| - #include <stdint.h> |
34 |
| - #if defined(_MSC_VER) |
35 |
| - # include <intrin.h> |
36 |
| - #endif |
37 |
| - |
38 |
| - |
39 |
| - void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) { |
40 |
| - #if defined(_MSC_VER) |
41 |
| - __cpuidex(abcd, eax, ecx); |
42 |
| - #else |
43 |
| - uint32_t ebx, edx; |
44 |
| - # if defined( __i386__ ) && defined ( __PIC__ ) |
45 |
| - /* in case of PIC under 32-bit EBX cannot be clobbered */ |
46 |
| - __asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx), |
47 |
| - # else |
48 |
| - __asm__ ( "cpuid" : "+b" (ebx), |
49 |
| - # endif |
50 |
| - "+a" (eax), "+c" (ecx), "=d" (edx)); |
51 |
| - abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx; |
52 |
| - #endif |
53 |
| - } |
54 |
| - |
55 |
| - |
56 |
| - int check_xcr0_ymm() { |
57 |
| - uint32_t xcr0; |
58 |
| - #if defined(_MSC_VER) |
59 |
| - xcr0 = (uint32_t)_xgetbv(0);/* min VS2010 SP1 compiler is required */ |
60 |
| - #else |
61 |
| - __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); |
62 |
| - #endif |
63 |
| - return ((xcr0 & 6) == 6);/* checking if xmm and ymm state are enabled in XCR0 */ |
64 |
| - } |
65 |
| - |
66 |
| - |
67 |
| - |
68 |
| - int check_4th_gen_intel_core_features() { |
69 |
| - uint32_t abcd[4]; |
70 |
| - uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); |
71 |
| - uint32_t avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8); |
72 |
| - uint32_t avx_osxsave_mask = (1 << 27) | (1 << 28); |
73 |
| - |
74 |
| - /* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1 && |
| 19 | +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300) |
| 20 | +#pragma once |
| 21 | +#include <immintrin.h> |
| 22 | + |
| 23 | +int check_4th_gen_intel_core_features() { |
| 24 | + const int the_4th_gen_features = |
| 25 | + (_FEATURE_AVX); |
| 26 | + // Removed _FEATURE_AVX2| _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE |
| 27 | + return _may_i_use_cpu_feature(the_4th_gen_features); |
| 28 | +} |
| 29 | + |
| 30 | +#else /* non-Intel compiler */ |
| 31 | +#pragma once |
| 32 | +#include <stdint.h> |
| 33 | +#if defined(_MSC_VER) |
| 34 | +#include <intrin.h> |
| 35 | +#endif |
| 36 | + |
| 37 | +void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) { |
| 38 | +#if defined(_MSC_VER) |
| 39 | + __cpuidex(abcd, eax, ecx); |
| 40 | +#else |
| 41 | + uint32_t ebx, edx; |
| 42 | +#if defined(__i386__) && defined(__PIC__) |
| 43 | + /* in case of PIC under 32-bit EBX cannot be clobbered */ |
| 44 | + __asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" |
| 45 | + : "=D"(ebx), |
| 46 | +#else |
| 47 | + __asm__("cpuid" |
| 48 | + : "+b"(ebx), |
| 49 | +#endif |
| 50 | + "+a"(eax), "+c"(ecx), "=d"(edx)); |
| 51 | + abcd[0] = eax; |
| 52 | + abcd[1] = ebx; |
| 53 | + abcd[2] = ecx; |
| 54 | + abcd[3] = edx; |
| 55 | +#endif |
| 56 | +} |
| 57 | + |
| 58 | +int check_xcr0_ymm() { |
| 59 | + uint32_t xcr0; |
| 60 | +#if defined(_MSC_VER) |
| 61 | + xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */ |
| 62 | +#else |
| 63 | + __asm__("xgetbv" |
| 64 | + : "=a"(xcr0) |
| 65 | + : "c"(0) |
| 66 | + : "%edx"); |
| 67 | +#endif |
| 68 | + return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */ |
| 69 | +} |
| 70 | + |
| 71 | +int check_4th_gen_intel_core_features() { |
| 72 | + uint32_t abcd[4]; |
| 73 | + uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); |
| 74 | + uint32_t avx2_bmi12_mask = (1 << 5) | (1 << 3) | (1 << 8); |
| 75 | + uint32_t avx_osxsave_mask = (1 << 27) | (1 << 28); |
| 76 | + |
| 77 | + /* CPUID.(EAX=01H, ECX=0H):ECX.FMA[bit 12]==1 && |
75 | 78 | CPUID.(EAX=01H, ECX=0H):ECX.MOVBE[bit 22]==1 &&
|
76 | 79 | CPUID.(EAX=01H, ECX=0H):ECX.OSXSAVE[bit 27]==1 */
|
77 |
| - /* run_cpuid(1, 0, abcd); |
| 80 | + /* run_cpuid(1, 0, abcd); |
78 | 81 | if ((abcd[2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask)
|
79 | 82 | return 0; */
|
80 | 83 |
|
81 |
| - /* if (!check_xcr0_ymm()) |
| 84 | + /* if (!check_xcr0_ymm()) |
82 | 85 | return 0; */
|
83 | 86 |
|
84 |
| - /* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 && |
| 87 | + /* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 && |
85 | 88 | CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]==1 &&
|
86 | 89 | CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
|
87 |
| - /* run_cpuid(7, 0, abcd); |
| 90 | + /* run_cpuid(7, 0, abcd); |
88 | 91 | if ((abcd[1] & avx2_bmi12_mask) != avx2_bmi12_mask)
|
89 | 92 | return 0; */
|
90 | 93 |
|
91 |
| - /* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */ |
92 |
| - /* run_cpuid(0x80000001, 0, abcd); |
| 94 | + /* CPUID.(EAX=80000001H):ECX.LZCNT[bit 5]==1 */ |
| 95 | + /* run_cpuid(0x80000001, 0, abcd); |
93 | 96 | if ((abcd[2] & (1 << 5)) == 0)
|
94 | 97 | return 0; */
|
95 | 98 |
|
96 |
| - // Check AVX support |
97 |
| - run_cpuid(1, 0, abcd); |
98 |
| - if ((abcd[2] & avx_osxsave_mask) != avx_osxsave_mask) |
99 |
| - return 0; |
| 99 | + // Check AVX support |
| 100 | + run_cpuid(1, 0, abcd); |
| 101 | + if ((abcd[2] & avx_osxsave_mask) != avx_osxsave_mask) |
| 102 | + return 0; |
100 | 103 |
|
101 |
| - return 1; |
102 |
| - } |
| 104 | + return 1; |
| 105 | +} |
103 | 106 |
|
104 |
| - #endif /* non-Intel compiler */ |
| 107 | +#endif /* non-Intel compiler */ |
105 | 108 |
|
106 | 109 | } // namespace ovms
|
0 commit comments