Botan 2.19.3
Crypto and TLS for C&
cpuid_x86.cpp
Go to the documentation of this file.
1/*
2* Runtime CPU detection for x86
3* (C) 2009,2010,2013,2017 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/cpuid.h>
9#include <botan/mem_ops.h>
10#include <botan/loadstor.h>
11
12#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13
14#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15 #include <intrin.h>
16#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17 #include <ia32intrin.h>
18#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19 #include <cpuid.h>
20#endif
21
22#endif
23
24namespace Botan {
25
26#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27
28uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
29 {
30#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
31 #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
32 #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
33
34#elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35 #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
36 #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
37
38#elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
39 #define X86_CPUID(type, out) \
40 asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
41 : "0" (type))
42
43 #define X86_CPUID_SUBLEVEL(type, level, out) \
44 asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
45 : "0" (type), "2" (level))
46
47#elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
48 #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
49
50 #define X86_CPUID_SUBLEVEL(type, level, out) \
51 do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
52#else
53 #warning "No way of calling x86 cpuid instruction for this compiler"
54 #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
55 #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
56#endif
57
58 uint64_t features_detected = 0;
59 uint32_t cpuid[4] = { 0 };
60 bool has_avx = 0;
61
62 // CPUID 0: vendor identification, max sublevel
63 X86_CPUID(0, cpuid);
64
65 const uint32_t max_supported_sublevel = cpuid[0];
66
67 const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
68 const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
69 const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
70 const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
71
72 if(max_supported_sublevel >= 1)
73 {
74 // CPUID 1: feature bits
75 X86_CPUID(1, cpuid);
76 const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
77
78 enum x86_CPUID_1_bits : uint64_t {
79 RDTSC = (1ULL << 4),
80 SSE2 = (1ULL << 26),
81 CLMUL = (1ULL << 33),
82 SSSE3 = (1ULL << 41),
83 SSE41 = (1ULL << 51),
84 SSE42 = (1ULL << 52),
85 AESNI = (1ULL << 57),
86 OSXSAVE = (1ULL << 59),
87 AVX = (1ULL << 60),
88 RDRAND = (1ULL << 62)
89 };
90
91 if(flags0 & x86_CPUID_1_bits::RDTSC)
92 features_detected |= CPUID::CPUID_RDTSC_BIT;
93 if(flags0 & x86_CPUID_1_bits::SSE2)
94 features_detected |= CPUID::CPUID_SSE2_BIT;
95 if(flags0 & x86_CPUID_1_bits::CLMUL)
96 features_detected |= CPUID::CPUID_CLMUL_BIT;
97 if(flags0 & x86_CPUID_1_bits::SSSE3)
98 features_detected |= CPUID::CPUID_SSSE3_BIT;
99 if(flags0 & x86_CPUID_1_bits::SSE41)
100 features_detected |= CPUID::CPUID_SSE41_BIT;
101 if(flags0 & x86_CPUID_1_bits::SSE42)
102 features_detected |= CPUID::CPUID_SSE42_BIT;
103 if(flags0 & x86_CPUID_1_bits::AESNI)
104 features_detected |= CPUID::CPUID_AESNI_BIT;
105 if(flags0 & x86_CPUID_1_bits::RDRAND)
106 features_detected |= CPUID::CPUID_RDRAND_BIT;
107 if((flags0 & x86_CPUID_1_bits::AVX) &&
108 (flags0 & x86_CPUID_1_bits::OSXSAVE))
109 has_avx = 1;
110 }
111
112 if(is_intel)
113 {
114 // Intel cache line size is in cpuid(1) output
115 *cache_line_size = 8 * get_byte(2, cpuid[1]);
116 }
117 else if(is_amd)
118 {
119 // AMD puts it in vendor zone
120 X86_CPUID(0x80000005, cpuid);
121 *cache_line_size = get_byte(3, cpuid[2]);
122 }
123
124 if(max_supported_sublevel >= 7)
125 {
126 clear_mem(cpuid, 4);
127 X86_CPUID_SUBLEVEL(7, 0, cpuid);
128
129 enum x86_CPUID_7_bits : uint64_t {
130 BMI1 = (1ULL << 3),
131 AVX2 = (1ULL << 5),
132 BMI2 = (1ULL << 8),
133 AVX512_F = (1ULL << 16),
134 AVX512_DQ = (1ULL << 17),
135 RDSEED = (1ULL << 18),
136 ADX = (1ULL << 19),
137 AVX512_IFMA = (1ULL << 21),
138 SHA = (1ULL << 29),
139 AVX512_BW = (1ULL << 30),
140 AVX512_VL = (1ULL << 31),
141 AVX512_VBMI = (1ULL << 33),
142 AVX512_VBMI2 = (1ULL << 38),
143 AVX512_VAES = (1ULL << 41),
144 AVX512_VCLMUL = (1ULL << 42),
145 AVX512_VBITALG = (1ULL << 44),
146 };
147
148 const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
149
150 if((flags7 & x86_CPUID_7_bits::AVX2) && has_avx)
151 features_detected |= CPUID::CPUID_AVX2_BIT;
152 if(flags7 & x86_CPUID_7_bits::BMI1)
153 {
154 features_detected |= CPUID::CPUID_BMI1_BIT;
155 /*
156 We only set the BMI2 bit if BMI1 is also supported, so BMI2
157 code can safely use both extensions. No known processor
158 implements BMI2 but not BMI1.
159 */
160 if(flags7 & x86_CPUID_7_bits::BMI2)
161 features_detected |= CPUID::CPUID_BMI2_BIT;
162 }
163
164 if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_avx)
165 {
166 features_detected |= CPUID::CPUID_AVX512F_BIT;
167
168 if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
169 features_detected |= CPUID::CPUID_AVX512DQ_BIT;
170 if(flags7 & x86_CPUID_7_bits::AVX512_BW)
171 features_detected |= CPUID::CPUID_AVX512BW_BIT;
172
173 const uint64_t ICELAKE_FLAGS =
174 x86_CPUID_7_bits::AVX512_F |
175 x86_CPUID_7_bits::AVX512_DQ |
176 x86_CPUID_7_bits::AVX512_IFMA |
177 x86_CPUID_7_bits::AVX512_BW |
178 x86_CPUID_7_bits::AVX512_VL |
179 x86_CPUID_7_bits::AVX512_VBMI |
180 x86_CPUID_7_bits::AVX512_VBMI2 |
181 x86_CPUID_7_bits::AVX512_VBITALG;
182
183 if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
184 features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
185
186 if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
187 features_detected |= CPUID::CPUID_AVX512_AES_BIT;
188 if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
189 features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
190 }
191
192 if(flags7 & x86_CPUID_7_bits::RDSEED)
193 features_detected |= CPUID::CPUID_RDSEED_BIT;
194 if(flags7 & x86_CPUID_7_bits::ADX)
195 features_detected |= CPUID::CPUID_ADX_BIT;
196 if(flags7 & x86_CPUID_7_bits::SHA)
197 features_detected |= CPUID::CPUID_SHA_BIT;
198 }
199
200#undef X86_CPUID
201#undef X86_CPUID_SUBLEVEL
202
203 /*
204 * If we don't have access to CPUID, we can still safely assume that
205 * any x86-64 processor has SSE2 and RDTSC
206 */
207#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
208 if(features_detected == 0)
209 {
210 features_detected |= CPUID::CPUID_SSE2_BIT;
211 features_detected |= CPUID::CPUID_RDTSC_BIT;
212 }
213#endif
214
215 return features_detected;
216 }
217
218#endif
219
220}
bool same_mem(const T *p1, const T *p2, size_t n)
Definition mem_ops.h:217
constexpr uint8_t get_byte(size_t byte_num, T input)
Definition loadstor.h:41
void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:115