1/* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2016 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <cpuid.h>
20#include <cpu-features.h>
21#include <libc-internal.h>
22
23static void
24get_common_indeces (struct cpu_features *cpu_features,
25 unsigned int *family, unsigned int *model,
26 unsigned int *extended_model)
27{
28 if (family)
29 {
30 unsigned int eax;
31 __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
32 cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
33 cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
34 cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax;
35 *family = (eax >> 8) & 0x0f;
36 *model = (eax >> 4) & 0x0f;
37 *extended_model = (eax >> 12) & 0xf0;
38 if (*family == 0x0f)
39 {
40 *family += (eax >> 20) & 0xff;
41 *model += *extended_model;
42 }
43 }
44
45 if (cpu_features->max_cpuid >= 7)
46 __cpuid_count (7, 0,
47 cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
48 cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
49 cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
50 cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
51
52 /* Can we call xgetbv? */
53 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
54 {
55 unsigned int xcrlow;
56 unsigned int xcrhigh;
57 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
58 /* Is YMM and XMM state usable? */
59 if ((xcrlow & (bit_YMM_state | bit_XMM_state)) ==
60 (bit_YMM_state | bit_XMM_state))
61 {
62 /* Determine if AVX is usable. */
63 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
64 cpu_features->feature[index_arch_AVX_Usable]
65 |= bit_arch_AVX_Usable;
66 /* Determine if AVX2 is usable. */
67 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
68 cpu_features->feature[index_arch_AVX2_Usable]
69 |= bit_arch_AVX2_Usable;
70 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
71 ZMM16-ZMM31 state are enabled. */
72 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
73 | bit_ZMM16_31_state)) ==
74 (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
75 {
76 /* Determine if AVX512F is usable. */
77 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
78 {
79 cpu_features->feature[index_arch_AVX512F_Usable]
80 |= bit_arch_AVX512F_Usable;
81 /* Determine if AVX512DQ is usable. */
82 if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
83 cpu_features->feature[index_arch_AVX512DQ_Usable]
84 |= bit_arch_AVX512DQ_Usable;
85 }
86 }
87 /* Determine if FMA is usable. */
88 if (CPU_FEATURES_CPU_P (cpu_features, FMA))
89 cpu_features->feature[index_arch_FMA_Usable]
90 |= bit_arch_FMA_Usable;
91 }
92
93 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
94 size + integer register save size and align it to 64 bytes. */
95 if (cpu_features->max_cpuid >= 0xd)
96 {
97 unsigned int eax, ebx, ecx, edx;
98
99 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
100 if (ebx != 0)
101 {
102 cpu_features->xsave_state_size
103 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
104
105 __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
106
107 /* Check if XSAVEC is available. */
108 if ((eax & (1 << 1)) != 0)
109 {
110 unsigned int xstate_comp_offsets[32];
111 unsigned int xstate_comp_sizes[32];
112 unsigned int i;
113
114 xstate_comp_offsets[0] = 0;
115 xstate_comp_offsets[1] = 160;
116 xstate_comp_offsets[2] = 576;
117 xstate_comp_sizes[0] = 160;
118 xstate_comp_sizes[1] = 256;
119
120 for (i = 2; i < 32; i++)
121 {
122 if ((STATE_SAVE_MASK & (1 << i)) != 0)
123 {
124 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
125 xstate_comp_sizes[i] = eax;
126 }
127 else
128 {
129 ecx = 0;
130 xstate_comp_sizes[i] = 0;
131 }
132
133 if (i > 2)
134 {
135 xstate_comp_offsets[i]
136 = (xstate_comp_offsets[i - 1]
137 + xstate_comp_sizes[i -1]);
138 if ((ecx & (1 << 1)) != 0)
139 xstate_comp_offsets[i]
140 = ALIGN_UP (xstate_comp_offsets[i], 64);
141 }
142 }
143
144 /* Use XSAVEC. */
145 unsigned int size
146 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
147 if (size)
148 {
149 cpu_features->xsave_state_size
150 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
151 cpu_features->feature[index_arch_XSAVEC_Usable]
152 |= bit_arch_XSAVEC_Usable;
153 }
154 }
155 }
156 }
157 }
158}
159
160static inline void
161init_cpu_features (struct cpu_features *cpu_features)
162{
163 unsigned int ebx, ecx, edx;
164 unsigned int family = 0;
165 unsigned int model = 0;
166 enum cpu_features_kind kind;
167
168#if !HAS_CPUID
169 if (__get_cpuid_max (0, 0) == 0)
170 {
171 kind = arch_kind_other;
172 goto no_cpuid;
173 }
174#endif
175
176 __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
177
178 /* This spells out "GenuineIntel". */
179 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
180 {
181 unsigned int extended_model;
182
183 kind = arch_kind_intel;
184
185 get_common_indeces (cpu_features, &family, &model, &extended_model);
186
187 if (family == 0x06)
188 {
189 ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
190 model += extended_model;
191 switch (model)
192 {
193 case 0x1c:
194 case 0x26:
195 /* BSF is slow on Atom. */
196 cpu_features->feature[index_arch_Slow_BSF]
197 |= bit_arch_Slow_BSF;
198 break;
199
200 case 0x57:
201 /* Knights Landing. Enable Silvermont optimizations. */
202
203 case 0x5c:
204 case 0x5f:
205 /* Unaligned load versions are faster than SSSE3
206 on Goldmont. */
207
208 case 0x4c:
209 /* Airmont is a die shrink of Silvermont. */
210
211 case 0x37:
212 case 0x4a:
213 case 0x4d:
214 case 0x5a:
215 case 0x5d:
216 /* Unaligned load versions are faster than SSSE3
217 on Silvermont. */
218#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
219# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
220#endif
221#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
222# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
223#endif
224#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
225# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
226#endif
227 cpu_features->feature[index_arch_Fast_Unaligned_Load]
228 |= (bit_arch_Fast_Unaligned_Load
229 | bit_arch_Fast_Unaligned_Copy
230 | bit_arch_Prefer_PMINUB_for_stringop
231 | bit_arch_Slow_SSE4_2);
232 break;
233
234 default:
235 /* Unknown family 0x06 processors. Assuming this is one
236 of Core i3/i5/i7 processors if AVX is available. */
237 if ((ecx & bit_cpu_AVX) == 0)
238 break;
239
240 case 0x1a:
241 case 0x1e:
242 case 0x1f:
243 case 0x25:
244 case 0x2c:
245 case 0x2e:
246 case 0x2f:
247 /* Rep string instructions, unaligned load, unaligned copy,
248 and pminub are fast on Intel Core i3, i5 and i7. */
249#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
250# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
251#endif
252#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
253# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
254#endif
255#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
256# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
257#endif
258 cpu_features->feature[index_arch_Fast_Rep_String]
259 |= (bit_arch_Fast_Rep_String
260 | bit_arch_Fast_Unaligned_Load
261 | bit_arch_Fast_Unaligned_Copy
262 | bit_arch_Prefer_PMINUB_for_stringop);
263 break;
264 }
265 }
266
267 /* Unaligned load with 256-bit AVX registers are faster on
268 Intel processors with AVX2. */
269 if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
270 cpu_features->feature[index_arch_AVX_Fast_Unaligned_Load]
271 |= bit_arch_AVX_Fast_Unaligned_Load;
272
273 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
274 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
275 frequency if AVX512ER isn't available. */
276 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
277 cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
278 |= bit_arch_Prefer_No_VZEROUPPER;
279 else
280 cpu_features->feature[index_arch_Prefer_No_AVX512]
281 |= bit_arch_Prefer_No_AVX512;
282 }
283 /* This spells out "AuthenticAMD". */
284 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
285 {
286 unsigned int extended_model;
287
288 kind = arch_kind_amd;
289
290 get_common_indeces (cpu_features, &family, &model, &extended_model);
291
292 ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
293
294 unsigned int eax;
295 __cpuid (0x80000000, eax, ebx, ecx, edx);
296 if (eax >= 0x80000001)
297 __cpuid (0x80000001,
298 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax,
299 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
300 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
301 cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
302
303 if (HAS_ARCH_FEATURE (AVX_Usable))
304 {
305 /* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and
306 FMA4 requires AVX, determine if FMA4 is usable here. */
307 if (CPU_FEATURES_CPU_P (cpu_features, FMA4))
308 cpu_features->feature[index_arch_FMA4_Usable]
309 |= bit_arch_FMA4_Usable;
310 }
311
312 if (family == 0x15)
313 {
314#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
315# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
316#endif
317 /* "Excavator" */
318 if (model >= 0x60 && model <= 0x7f)
319 cpu_features->feature[index_arch_Fast_Unaligned_Load]
320 |= (bit_arch_Fast_Unaligned_Load
321 | bit_arch_Fast_Copy_Backward);
322 }
323 }
324 else
325 {
326 kind = arch_kind_other;
327 get_common_indeces (cpu_features, NULL, NULL, NULL);
328 }
329
330 /* Support i586 if CX8 is available. */
331 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
332 cpu_features->feature[index_arch_I586] |= bit_arch_I586;
333
334 /* Support i686 if CMOV is available. */
335 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
336 cpu_features->feature[index_arch_I686] |= bit_arch_I686;
337
338#if !HAS_CPUID
339no_cpuid:
340#endif
341
342 cpu_features->family = family;
343 cpu_features->model = model;
344 cpu_features->kind = kind;
345}
346