1/* x86 CPU feature tuning.
2 This file is part of the GNU C Library.
3 Copyright (C) 2017-2019 Free Software Foundation, Inc.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#if HAVE_TUNABLES
20# define TUNABLE_NAMESPACE cpu
21# include <stdbool.h>
22# include <stdint.h>
23# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
24# include <elf/dl-tunables.h>
25# include <string.h>
26# include <cpu-features.h>
27# include <ldsodefs.h>
28
29/* We can't use IFUNC memcmp nor strlen in init_cpu_features from libc.a
30 since IFUNC must be set up by init_cpu_features. */
31# if defined USE_MULTIARCH && !defined SHARED
32# ifdef __x86_64__
33# define DEFAULT_MEMCMP __memcmp_sse2
34# else
35# define DEFAULT_MEMCMP __memcmp_ia32
36# endif
37extern __typeof (memcmp) DEFAULT_MEMCMP;
38# else
39# define DEFAULT_MEMCMP memcmp
40# endif
41
42# define CHECK_GLIBC_IFUNC_CPU_OFF(f, cpu_features, name, len) \
43 _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
44 if (!DEFAULT_MEMCMP (f, #name, len)) \
45 { \
46 cpu_features->cpuid[index_cpu_##name].reg_##name \
47 &= ~bit_cpu_##name; \
48 break; \
49 }
50
51/* Disable an ARCH feature NAME. We don't enable an ARCH feature which
52 isn't available. */
53# define CHECK_GLIBC_IFUNC_ARCH_OFF(f, cpu_features, name, len) \
54 _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
55 if (!DEFAULT_MEMCMP (f, #name, len)) \
56 { \
57 cpu_features->feature[index_arch_##name] \
58 &= ~bit_arch_##name; \
59 break; \
60 }
61
62/* Enable/disable an ARCH feature NAME. */
63# define CHECK_GLIBC_IFUNC_ARCH_BOTH(f, cpu_features, name, disable, \
64 len) \
65 _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
66 if (!DEFAULT_MEMCMP (f, #name, len)) \
67 { \
68 if (disable) \
69 cpu_features->feature[index_arch_##name] \
70 &= ~bit_arch_##name; \
71 else \
72 cpu_features->feature[index_arch_##name] \
73 |= bit_arch_##name; \
74 break; \
75 }
76
77/* Enable/disable an ARCH feature NAME. Enable an ARCH feature only
78 if the ARCH feature NEED is also enabled. */
79# define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(f, cpu_features, name, \
80 need, disable, len) \
81 _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
82 if (!DEFAULT_MEMCMP (f, #name, len)) \
83 { \
84 if (disable) \
85 cpu_features->feature[index_arch_##name] \
86 &= ~bit_arch_##name; \
87 else if (CPU_FEATURES_ARCH_P (cpu_features, need)) \
88 cpu_features->feature[index_arch_##name] \
89 |= bit_arch_##name; \
90 break; \
91 }
92
93/* Enable/disable an ARCH feature NAME. Enable an ARCH feature only
94 if the CPU feature NEED is also enabled. */
95# define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(f, cpu_features, name, \
96 need, disable, len) \
97 _Static_assert (sizeof (#name) - 1 == len, #name " != " #len); \
98 if (!DEFAULT_MEMCMP (f, #name, len)) \
99 { \
100 if (disable) \
101 cpu_features->feature[index_arch_##name] \
102 &= ~bit_arch_##name; \
103 else if (CPU_FEATURES_CPU_P (cpu_features, need)) \
104 cpu_features->feature[index_arch_##name] \
105 |= bit_arch_##name; \
106 break; \
107 }
108
109attribute_hidden
110void
111TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
112{
113 /* The current IFUNC selection is based on microbenchmarks in glibc.
114 It should give the best performance for most workloads. But other
115 choices may have better performance for a particular workload or on
116 the hardware which wasn't available when the selection was made.
117 The environment variable:
118
119 GLIBC_TUNABLES=glibc.cpu.hwcaps=-xxx,yyy,-zzz,....
120
121 can be used to enable CPU/ARCH feature yyy, disable CPU/ARCH feature
122 yyy and zzz, where the feature name is case-sensitive and has to
123 match the ones in cpu-features.h. It can be used by glibc developers
124 to tune for a new processor or override the IFUNC selection to
125 improve performance for a particular workload.
126
127 NOTE: the IFUNC selection may change over time. Please check all
128 multiarch implementations when experimenting. */
129
130 const char *p = valp->strval;
131 struct cpu_features *cpu_features = &GLRO(dl_x86_cpu_features);
132 size_t len;
133
134 do
135 {
136 const char *c, *n;
137 bool disable;
138 size_t nl;
139
140 for (c = p; *c != ','; c++)
141 if (*c == '\0')
142 break;
143
144 len = c - p;
145 disable = *p == '-';
146 if (disable)
147 {
148 n = p + 1;
149 nl = len - 1;
150 }
151 else
152 {
153 n = p;
154 nl = len;
155 }
156 switch (nl)
157 {
158 default:
159 break;
160 case 3:
161 if (disable)
162 {
163 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX, 3);
164 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CX8, 3);
165 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA, 3);
166 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, HTT, 3);
167 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, IBT, 3);
168 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, RTM, 3);
169 }
170 break;
171 case 4:
172 if (disable)
173 {
174 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX2, 4);
175 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI1, 4);
176 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, BMI2, 4);
177 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, CMOV, 4);
178 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, ERMS, 4);
179 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, FMA4, 4);
180 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE2, 4);
181 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, I586, 4);
182 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, I686, 4);
183 }
184 break;
185 case 5:
186 if (disable)
187 {
188 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, LZCNT, 5);
189 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, MOVBE, 5);
190 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SHSTK, 5);
191 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSSE3, 5);
192 }
193 break;
194 case 6:
195 if (disable)
196 {
197 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, POPCNT, 6);
198 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_1, 6);
199 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, SSE4_2, 6);
200 }
201 break;
202 case 7:
203 if (disable)
204 {
205 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512F, 7);
206 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, OSXSAVE, 7);
207 }
208 break;
209 case 8:
210 if (disable)
211 {
212 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512CD, 8);
213 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512BW, 8);
214 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512DQ, 8);
215 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512ER, 8);
216 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512PF, 8);
217 CHECK_GLIBC_IFUNC_CPU_OFF (n, cpu_features, AVX512VL, 8);
218 }
219 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Slow_BSF,
220 disable, 8);
221 break;
222 case 10:
223 if (disable)
224 {
225 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, AVX_Usable,
226 10);
227 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, FMA_Usable,
228 10);
229 }
230 break;
231 case 11:
232 if (disable)
233 {
234 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, AVX2_Usable,
235 11);
236 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features, FMA4_Usable,
237 11);
238 }
239 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Prefer_ERMS,
240 disable, 11);
241 CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (n, cpu_features,
242 Slow_SSE4_2, SSE4_2,
243 disable, 11);
244 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Prefer_FSRM,
245 disable, 11);
246 break;
247 case 13:
248 if (disable)
249 {
250 /* Update xsave_state_size to XSAVE state size. */
251 cpu_features->xsave_state_size
252 = cpu_features->xsave_state_full_size;
253 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
254 XSAVEC_Usable, 13);
255 }
256 break;
257 case 14:
258 if (disable)
259 {
260 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
261 AVX512F_Usable, 14);
262 }
263 break;
264 case 15:
265 if (disable)
266 {
267 CHECK_GLIBC_IFUNC_ARCH_OFF (n, cpu_features,
268 AVX512DQ_Usable, 15);
269 }
270 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Fast_Rep_String,
271 disable, 15);
272 break;
273 case 16:
274 {
275 CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
276 (n, cpu_features, Prefer_No_AVX512, AVX512F_Usable,
277 disable, 16);
278 }
279 break;
280 case 18:
281 {
282 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
283 Fast_Copy_Backward, disable,
284 18);
285 }
286 break;
287 case 19:
288 {
289 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
290 Fast_Unaligned_Load, disable,
291 19);
292 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
293 Fast_Unaligned_Copy, disable,
294 19);
295 }
296 break;
297 case 20:
298 {
299 CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
300 (n, cpu_features, Prefer_No_VZEROUPPER, AVX_Usable,
301 disable, 20);
302 }
303 break;
304 case 21:
305 {
306 CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
307 Prefer_MAP_32BIT_EXEC, disable,
308 21);
309 }
310 break;
311 case 23:
312 {
313 CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
314 (n, cpu_features, AVX_Fast_Unaligned_Load, AVX_Usable,
315 disable, 23);
316 }
317 break;
318 case 24:
319 {
320 CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
321 (n, cpu_features, MathVec_Prefer_No_AVX512,
322 AVX512F_Usable, disable, 24);
323 }
324 break;
325 case 26:
326 {
327 CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH
328 (n, cpu_features, Prefer_PMINUB_for_stringop, SSE2,
329 disable, 26);
330 }
331 break;
332 }
333 p += len + 1;
334 }
335 while (*p != '\0');
336}
337
338# if CET_ENABLED
339# include <cet-tunables.h>
340
341attribute_hidden
342void
343TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *valp)
344{
345 if (DEFAULT_MEMCMP (valp->strval, "on", sizeof ("on")) == 0)
346 {
347 GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1);
348 GL(dl_x86_feature_1)[1] |= CET_ALWAYS_ON;
349 }
350 else if (DEFAULT_MEMCMP (valp->strval, "off", sizeof ("off")) == 0)
351 {
352 GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1);
353 GL(dl_x86_feature_1)[1] |= CET_ALWAYS_OFF;
354 }
355 else if (DEFAULT_MEMCMP (valp->strval, "permissive",
356 sizeof ("permissive")) == 0)
357 {
358 GL(dl_x86_feature_1)[1] &= ~((1 << CET_MAX) - 1);
359 GL(dl_x86_feature_1)[1] |= CET_PERMISSIVE;
360 }
361}
362
363attribute_hidden
364void
365TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *valp)
366{
367 if (DEFAULT_MEMCMP (valp->strval, "on", sizeof ("on")) == 0)
368 {
369 GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX);
370 GL(dl_x86_feature_1)[1] |= (CET_ALWAYS_ON << CET_MAX);
371 }
372 else if (DEFAULT_MEMCMP (valp->strval, "off", sizeof ("off")) == 0)
373 {
374 GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX);
375 GL(dl_x86_feature_1)[1] |= (CET_ALWAYS_OFF << CET_MAX);
376 }
377 else if (DEFAULT_MEMCMP (valp->strval, "permissive",
378 sizeof ("permissive")) == 0)
379 {
380 GL(dl_x86_feature_1)[1] &= ~(((1 << CET_MAX) - 1) << CET_MAX);
381 GL(dl_x86_feature_1)[1] |= (CET_PERMISSIVE << CET_MAX);
382 }
383}
384# endif
385#endif
386