1/* This file is part of the GNU C Library.
2 Copyright (C) 2008-2018 Free Software Foundation, Inc.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#ifndef cpu_features_h
19#define cpu_features_h
20
21#define bit_arch_Fast_Rep_String (1 << 0)
22#define bit_arch_Fast_Copy_Backward (1 << 1)
23#define bit_arch_Slow_BSF (1 << 2)
24#define bit_arch_Fast_Unaligned_Load (1 << 4)
25#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5)
26#define bit_arch_AVX_Usable (1 << 6)
27#define bit_arch_FMA_Usable (1 << 7)
28#define bit_arch_FMA4_Usable (1 << 8)
29#define bit_arch_Slow_SSE4_2 (1 << 9)
30#define bit_arch_AVX2_Usable (1 << 10)
31#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11)
32#define bit_arch_AVX512F_Usable (1 << 12)
33#define bit_arch_AVX512DQ_Usable (1 << 13)
34#define bit_arch_I586 (1 << 14)
35#define bit_arch_I686 (1 << 15)
36#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
37#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
38#define bit_arch_Fast_Unaligned_Copy (1 << 18)
39#define bit_arch_Prefer_ERMS (1 << 19)
40#define bit_arch_Prefer_No_AVX512 (1 << 20)
41#define bit_arch_MathVec_Prefer_No_AVX512 (1 << 21)
42#define bit_arch_XSAVEC_Usable (1 << 22)
43
44/* CPUID Feature flags. */
45
46/* COMMON_CPUID_INDEX_1. */
47#define bit_cpu_CX8 (1 << 8)
48#define bit_cpu_CMOV (1 << 15)
49#define bit_cpu_SSE (1 << 25)
50#define bit_cpu_SSE2 (1 << 26)
51#define bit_cpu_SSSE3 (1 << 9)
52#define bit_cpu_SSE4_1 (1 << 19)
53#define bit_cpu_SSE4_2 (1 << 20)
54#define bit_cpu_OSXSAVE (1 << 27)
55#define bit_cpu_AVX (1 << 28)
56#define bit_cpu_POPCOUNT (1 << 23)
57#define bit_cpu_FMA (1 << 12)
58#define bit_cpu_FMA4 (1 << 16)
59#define bit_cpu_HTT (1 << 28)
60#define bit_cpu_LZCNT (1 << 5)
61#define bit_cpu_MOVBE (1 << 22)
62#define bit_cpu_POPCNT (1 << 23)
63
64/* COMMON_CPUID_INDEX_7. */
65#define bit_cpu_BMI1 (1 << 3)
66#define bit_cpu_BMI2 (1 << 8)
67#define bit_cpu_ERMS (1 << 9)
68#define bit_cpu_RTM (1 << 11)
69#define bit_cpu_AVX2 (1 << 5)
70#define bit_cpu_AVX512F (1 << 16)
71#define bit_cpu_AVX512DQ (1 << 17)
72#define bit_cpu_AVX512PF (1 << 26)
73#define bit_cpu_AVX512ER (1 << 27)
74#define bit_cpu_AVX512CD (1 << 28)
75#define bit_cpu_AVX512BW (1 << 30)
76#define bit_cpu_AVX512VL (1u << 31)
77#define bit_cpu_IBT (1u << 20)
78#define bit_cpu_SHSTK (1u << 7)
79
80/* XCR0 Feature flags. */
81#define bit_XMM_state (1 << 1)
82#define bit_YMM_state (1 << 2)
83#define bit_Opmask_state (1 << 5)
84#define bit_ZMM0_15_state (1 << 6)
85#define bit_ZMM16_31_state (1 << 7)
86
87/* The integer bit array index for the first set of internal feature bits. */
88#define FEATURE_INDEX_1 0
89
90/* The current maximum size of the feature integer bit array. */
91#define FEATURE_INDEX_MAX 1
92
93/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
94 space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
95 aligned to 16 bytes for fxsave and 64 bytes for xsave. */
96#define STATE_SAVE_OFFSET (8 * 7 + 8)
97
98/* Save SSE, AVX, AVX512, mask and bound registers. */
99#define STATE_SAVE_MASK \
100 ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
101
102#ifdef __ASSEMBLER__
103# include <cpu-features-offsets.h>
104#else /* __ASSEMBLER__ */
105enum
106 {
107 COMMON_CPUID_INDEX_1 = 0,
108 COMMON_CPUID_INDEX_7,
109 COMMON_CPUID_INDEX_80000001,
110 /* Keep the following line at the end. */
111 COMMON_CPUID_INDEX_MAX
112 };
113
114struct cpu_features
115{
116 enum cpu_features_kind
117 {
118 arch_kind_unknown = 0,
119 arch_kind_intel,
120 arch_kind_amd,
121 arch_kind_other
122 } kind;
123 int max_cpuid;
124 struct cpuid_registers
125 {
126 unsigned int eax;
127 unsigned int ebx;
128 unsigned int ecx;
129 unsigned int edx;
130 } cpuid[COMMON_CPUID_INDEX_MAX];
131 unsigned int family;
132 unsigned int model;
133 /* The state size for XSAVEC or XSAVE. The type must be unsigned long
134 int so that we use
135
136 sub xsave_state_size_offset(%rip) %RSP_LP
137
138 in _dl_runtime_resolve. */
139 unsigned long int xsave_state_size;
140 /* The full state size for XSAVE when XSAVEC is disabled by
141
142 GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable
143 */
144 unsigned int xsave_state_full_size;
145 unsigned int feature[FEATURE_INDEX_MAX];
146 /* Data cache size for use in memory and string routines, typically
147 L1 size. */
148 unsigned long int data_cache_size;
149 /* Shared cache size for use in memory and string routines, typically
150 L2 or L3 size. */
151 unsigned long int shared_cache_size;
152 /* Threshold to use non temporal store. */
153 unsigned long int non_temporal_threshold;
154};
155
156/* Used from outside of glibc to get access to the CPU features
157 structure. */
158extern const struct cpu_features *__get_cpu_features (void)
159 __attribute__ ((const));
160
161# if defined (_LIBC) && !IS_IN (nonlib)
162/* Unused for x86. */
163# define INIT_ARCH()
164# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
165# endif
166
167
168/* Only used directly in cpu-features.c. */
169# define CPU_FEATURES_CPU_P(ptr, name) \
170 ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
171# define CPU_FEATURES_ARCH_P(ptr, name) \
172 ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
173
174/* HAS_* evaluates to true if we may use the feature at runtime. */
175# define HAS_CPU_FEATURE(name) \
176 CPU_FEATURES_CPU_P (__get_cpu_features (), name)
177# define HAS_ARCH_FEATURE(name) \
178 CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
179
180# define index_cpu_CX8 COMMON_CPUID_INDEX_1
181# define index_cpu_CMOV COMMON_CPUID_INDEX_1
182# define index_cpu_SSE COMMON_CPUID_INDEX_1
183# define index_cpu_SSE2 COMMON_CPUID_INDEX_1
184# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1
185# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1
186# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1
187# define index_cpu_AVX COMMON_CPUID_INDEX_1
188# define index_cpu_AVX2 COMMON_CPUID_INDEX_7
189# define index_cpu_AVX512F COMMON_CPUID_INDEX_7
190# define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7
191# define index_cpu_AVX512PF COMMON_CPUID_INDEX_7
192# define index_cpu_AVX512ER COMMON_CPUID_INDEX_7
193# define index_cpu_AVX512CD COMMON_CPUID_INDEX_7
194# define index_cpu_AVX512BW COMMON_CPUID_INDEX_7
195# define index_cpu_AVX512VL COMMON_CPUID_INDEX_7
196# define index_cpu_ERMS COMMON_CPUID_INDEX_7
197# define index_cpu_RTM COMMON_CPUID_INDEX_7
198# define index_cpu_FMA COMMON_CPUID_INDEX_1
199# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
200# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
201# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
202# define index_cpu_HTT COMMON_CPUID_INDEX_1
203# define index_cpu_BMI1 COMMON_CPUID_INDEX_7
204# define index_cpu_BMI2 COMMON_CPUID_INDEX_7
205# define index_cpu_LZCNT COMMON_CPUID_INDEX_80000001
206# define index_cpu_MOVBE COMMON_CPUID_INDEX_1
207# define index_cpu_POPCNT COMMON_CPUID_INDEX_1
208# define index_cpu_IBT COMMON_CPUID_INDEX_7
209# define index_cpu_SHSTK COMMON_CPUID_INDEX_7
210
211# define reg_CX8 edx
212# define reg_CMOV edx
213# define reg_SSE edx
214# define reg_SSE2 edx
215# define reg_SSSE3 ecx
216# define reg_SSE4_1 ecx
217# define reg_SSE4_2 ecx
218# define reg_AVX ecx
219# define reg_AVX2 ebx
220# define reg_AVX512F ebx
221# define reg_AVX512DQ ebx
222# define reg_AVX512PF ebx
223# define reg_AVX512ER ebx
224# define reg_AVX512CD ebx
225# define reg_AVX512BW ebx
226# define reg_AVX512VL ebx
227# define reg_ERMS ebx
228# define reg_RTM ebx
229# define reg_FMA ecx
230# define reg_FMA4 ecx
231# define reg_POPCOUNT ecx
232# define reg_OSXSAVE ecx
233# define reg_HTT edx
234# define reg_BMI1 ebx
235# define reg_BMI2 ebx
236# define reg_LZCNT ecx
237# define reg_MOVBE ecx
238# define reg_POPCNT ecx
239# define reg_IBT edx
240# define reg_SHSTK ecx
241
242# define index_arch_Fast_Rep_String FEATURE_INDEX_1
243# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
244# define index_arch_Slow_BSF FEATURE_INDEX_1
245# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1
246# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
247# define index_arch_AVX_Usable FEATURE_INDEX_1
248# define index_arch_FMA_Usable FEATURE_INDEX_1
249# define index_arch_FMA4_Usable FEATURE_INDEX_1
250# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1
251# define index_arch_AVX2_Usable FEATURE_INDEX_1
252# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
253# define index_arch_AVX512F_Usable FEATURE_INDEX_1
254# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1
255# define index_arch_I586 FEATURE_INDEX_1
256# define index_arch_I686 FEATURE_INDEX_1
257# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
258# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
259# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1
260# define index_arch_Prefer_ERMS FEATURE_INDEX_1
261# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1
262# define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_1
263# define index_arch_XSAVEC_Usable FEATURE_INDEX_1
264
265#endif /* !__ASSEMBLER__ */
266
267#ifdef __x86_64__
268# define HAS_CPUID 1
269#elif defined __i586__ || defined __pentium__
270# define HAS_CPUID 1
271# define HAS_I586 1
272# define HAS_I686 HAS_ARCH_FEATURE (I686)
273#elif (defined __i686__ || defined __pentiumpro__ \
274 || defined __pentium4__ || defined __nocona__ \
275 || defined __atom__ || defined __core2__ \
276 || defined __corei7__ || defined __corei7_avx__ \
277 || defined __core_avx2__ || defined __nehalem__ \
278 || defined __sandybridge__ || defined __haswell__ \
279 || defined __knl__ || defined __bonnell__ \
280 || defined __silvermont__ \
281 || defined __k6__ || defined __k8__ \
282 || defined __athlon__ || defined __amdfam10__ \
283 || defined __bdver1__ || defined __bdver2__ \
284 || defined __bdver3__ || defined __bdver4__ \
285 || defined __btver1__ || defined __btver2__)
286# define HAS_CPUID 1
287# define HAS_I586 1
288# define HAS_I686 1
289#else
290# define HAS_CPUID 0
291# define HAS_I586 HAS_ARCH_FEATURE (I586)
292# define HAS_I686 HAS_ARCH_FEATURE (I686)
293#endif
294
295#endif /* cpu_features_h */
296