1/* x86_64 cache info.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21#include <assert.h>
22#include <stdbool.h>
23#include <stdlib.h>
24#include <unistd.h>
25#include <cpuid.h>
26#include <init-arch.h>
27
28static const struct intel_02_cache_info
29{
30 unsigned char idx;
31 unsigned char assoc;
32 unsigned char linesize;
33 unsigned char rel_name;
34 unsigned int size;
35} intel_02_known [] =
36 {
37#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
38 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
39 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
40 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
41 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
42 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
43 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
44 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
45 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
46 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
47 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
48 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
49 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
50 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
51 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
52 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
53 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
54 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
55 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
56 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
57 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
58 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
59 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
60 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
61 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
62 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
63 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
64 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
65 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
66 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
67 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
68 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
69 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
70 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
71 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
72 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
73 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
74 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
75 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
77 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
78 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
79 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
80 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
81 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
82 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
83 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
84 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
85 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
86 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
87 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
88 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
89 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
90 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
91 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
92 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
93 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
94 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
95 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
96 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
97 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
98 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
99 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
100 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
101 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
102 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
103 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
104 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
105 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
106 };
107
108#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
109
110static int
111intel_02_known_compare (const void *p1, const void *p2)
112{
113 const struct intel_02_cache_info *i1;
114 const struct intel_02_cache_info *i2;
115
116 i1 = (const struct intel_02_cache_info *) p1;
117 i2 = (const struct intel_02_cache_info *) p2;
118
119 if (i1->idx == i2->idx)
120 return 0;
121
122 return i1->idx < i2->idx ? -1 : 1;
123}
124
125
126static long int
127__attribute__ ((noinline))
128intel_check_word (int name, unsigned int value, bool *has_level_2,
129 bool *no_level_2_or_3,
130 const struct cpu_features *cpu_features)
131{
132 if ((value & 0x80000000) != 0)
133 /* The register value is reserved. */
134 return 0;
135
136 /* Fold the name. The _SC_ constants are always in the order SIZE,
137 ASSOC, LINESIZE. */
138 int folded_rel_name = (M(name) / 3) * 3;
139
140 while (value != 0)
141 {
142 unsigned int byte = value & 0xff;
143
144 if (byte == 0x40)
145 {
146 *no_level_2_or_3 = true;
147
148 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
149 /* No need to look further. */
150 break;
151 }
152 else if (byte == 0xff)
153 {
154 /* CPUID leaf 0x4 contains all the information. We need to
155 iterate over it. */
156 unsigned int eax;
157 unsigned int ebx;
158 unsigned int ecx;
159 unsigned int edx;
160
161 unsigned int round = 0;
162 while (1)
163 {
164 __cpuid_count (4, round, eax, ebx, ecx, edx);
165
166 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
167 if (type == null)
168 /* That was the end. */
169 break;
170
171 unsigned int level = (eax >> 5) & 0x7;
172
173 if ((level == 1 && type == data
174 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
175 || (level == 1 && type == inst
176 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
177 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
178 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
179 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
180 {
181 unsigned int offset = M(name) - folded_rel_name;
182
183 if (offset == 0)
184 /* Cache size. */
185 return (((ebx >> 22) + 1)
186 * (((ebx >> 12) & 0x3ff) + 1)
187 * ((ebx & 0xfff) + 1)
188 * (ecx + 1));
189 if (offset == 1)
190 return (ebx >> 22) + 1;
191
192 assert (offset == 2);
193 return (ebx & 0xfff) + 1;
194 }
195
196 ++round;
197 }
198 /* There is no other cache information anywhere else. */
199 break;
200 }
201 else
202 {
203 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
204 {
205 /* Intel reused this value. For family 15, model 6 it
206 specifies the 3rd level cache. Otherwise the 2nd
207 level cache. */
208 unsigned int family = cpu_features->family;
209 unsigned int model = cpu_features->model;
210
211 if (family == 15 && model == 6)
212 {
213 /* The level 3 cache is encoded for this model like
214 the level 2 cache is for other models. Pretend
215 the caller asked for the level 2 cache. */
216 name = (_SC_LEVEL2_CACHE_SIZE
217 + (name - _SC_LEVEL3_CACHE_SIZE));
218 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
219 }
220 }
221
222 struct intel_02_cache_info *found;
223 struct intel_02_cache_info search;
224
225 search.idx = byte;
226 found = bsearch (&search, intel_02_known, nintel_02_known,
227 sizeof (intel_02_known[0]), intel_02_known_compare);
228 if (found != NULL)
229 {
230 if (found->rel_name == folded_rel_name)
231 {
232 unsigned int offset = M(name) - folded_rel_name;
233
234 if (offset == 0)
235 /* Cache size. */
236 return found->size;
237 if (offset == 1)
238 return found->assoc;
239
240 assert (offset == 2);
241 return found->linesize;
242 }
243
244 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
245 *has_level_2 = true;
246 }
247 }
248
249 /* Next byte for the next round. */
250 value >>= 8;
251 }
252
253 /* Nothing found. */
254 return 0;
255}
256
257
258static long int __attribute__ ((noinline))
259handle_intel (int name, const struct cpu_features *cpu_features)
260{
261 unsigned int maxidx = cpu_features->max_cpuid;
262
263 /* Return -1 for older CPUs. */
264 if (maxidx < 2)
265 return -1;
266
267 /* OK, we can use the CPUID instruction to get all info about the
268 caches. */
269 unsigned int cnt = 0;
270 unsigned int max = 1;
271 long int result = 0;
272 bool no_level_2_or_3 = false;
273 bool has_level_2 = false;
274
275 while (cnt++ < max)
276 {
277 unsigned int eax;
278 unsigned int ebx;
279 unsigned int ecx;
280 unsigned int edx;
281 __cpuid (2, eax, ebx, ecx, edx);
282
283 /* The low byte of EAX in the first round contain the number of
284 rounds we have to make. At least one, the one we are already
285 doing. */
286 if (cnt == 1)
287 {
288 max = eax & 0xff;
289 eax &= 0xffffff00;
290 }
291
292 /* Process the individual registers' value. */
293 result = intel_check_word (name, eax, &has_level_2,
294 &no_level_2_or_3, cpu_features);
295 if (result != 0)
296 return result;
297
298 result = intel_check_word (name, ebx, &has_level_2,
299 &no_level_2_or_3, cpu_features);
300 if (result != 0)
301 return result;
302
303 result = intel_check_word (name, ecx, &has_level_2,
304 &no_level_2_or_3, cpu_features);
305 if (result != 0)
306 return result;
307
308 result = intel_check_word (name, edx, &has_level_2,
309 &no_level_2_or_3, cpu_features);
310 if (result != 0)
311 return result;
312 }
313
314 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
315 && no_level_2_or_3)
316 return -1;
317
318 return 0;
319}
320
321
322static long int __attribute__ ((noinline))
323handle_amd (int name)
324{
325 unsigned int eax;
326 unsigned int ebx;
327 unsigned int ecx;
328 unsigned int edx;
329 __cpuid (0x80000000, eax, ebx, ecx, edx);
330
331 /* No level 4 cache (yet). */
332 if (name > _SC_LEVEL3_CACHE_LINESIZE)
333 return 0;
334
335 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
336 if (eax < fn)
337 return 0;
338
339 __cpuid (fn, eax, ebx, ecx, edx);
340
341 if (name < _SC_LEVEL1_DCACHE_SIZE)
342 {
343 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
344 ecx = edx;
345 }
346
347 switch (name)
348 {
349 case _SC_LEVEL1_DCACHE_SIZE:
350 return (ecx >> 14) & 0x3fc00;
351
352 case _SC_LEVEL1_DCACHE_ASSOC:
353 ecx >>= 16;
354 if ((ecx & 0xff) == 0xff)
355 /* Fully associative. */
356 return (ecx << 2) & 0x3fc00;
357 return ecx & 0xff;
358
359 case _SC_LEVEL1_DCACHE_LINESIZE:
360 return ecx & 0xff;
361
362 case _SC_LEVEL2_CACHE_SIZE:
363 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
364
365 case _SC_LEVEL2_CACHE_ASSOC:
366 switch ((ecx >> 12) & 0xf)
367 {
368 case 0:
369 case 1:
370 case 2:
371 case 4:
372 return (ecx >> 12) & 0xf;
373 case 6:
374 return 8;
375 case 8:
376 return 16;
377 case 10:
378 return 32;
379 case 11:
380 return 48;
381 case 12:
382 return 64;
383 case 13:
384 return 96;
385 case 14:
386 return 128;
387 case 15:
388 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
389 default:
390 return 0;
391 }
392 /* NOTREACHED */
393
394 case _SC_LEVEL2_CACHE_LINESIZE:
395 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
396
397 case _SC_LEVEL3_CACHE_SIZE:
398 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
399
400 case _SC_LEVEL3_CACHE_ASSOC:
401 switch ((edx >> 12) & 0xf)
402 {
403 case 0:
404 case 1:
405 case 2:
406 case 4:
407 return (edx >> 12) & 0xf;
408 case 6:
409 return 8;
410 case 8:
411 return 16;
412 case 10:
413 return 32;
414 case 11:
415 return 48;
416 case 12:
417 return 64;
418 case 13:
419 return 96;
420 case 14:
421 return 128;
422 case 15:
423 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
424 default:
425 return 0;
426 }
427 /* NOTREACHED */
428
429 case _SC_LEVEL3_CACHE_LINESIZE:
430 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
431
432 default:
433 assert (! "cannot happen");
434 }
435 return -1;
436}
437
438
439/* Get the value of the system variable NAME. */
440long int
441attribute_hidden
442__cache_sysconf (int name)
443{
444 const struct cpu_features *cpu_features = __get_cpu_features ();
445
446 if (cpu_features->kind == arch_kind_intel)
447 return handle_intel (name, cpu_features);
448
449 if (cpu_features->kind == arch_kind_amd)
450 return handle_amd (name);
451
452 // XXX Fill in more vendors.
453
454 /* CPU not known, we have no information. */
455 return 0;
456}
457
458
459/* Data cache size for use in memory and string routines, typically
460 L1 size, rounded to multiple of 256 bytes. */
461long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
462long int __x86_data_cache_size attribute_hidden = 32 * 1024;
463/* Similar to __x86_data_cache_size_half, but not rounded. */
464long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
465/* Similar to __x86_data_cache_size, but not rounded. */
466long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
467/* Shared cache size for use in memory and string routines, typically
468 L2 or L3 size, rounded to multiple of 256 bytes. */
469long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
470long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
471/* Similar to __x86_shared_cache_size_half, but not rounded. */
472long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
473/* Similar to __x86_shared_cache_size, but not rounded. */
474long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
475
476/* Threshold to use non temporal store. */
477long int __x86_shared_non_temporal_threshold attribute_hidden;
478
479#ifndef DISABLE_PREFETCHW
480/* PREFETCHW support flag for use in memory and string routines. */
481int __x86_prefetchw attribute_hidden;
482#endif
483
484
485static void
486__attribute__((constructor))
487init_cacheinfo (void)
488{
489 /* Find out what brand of processor. */
490 unsigned int eax;
491 unsigned int ebx;
492 unsigned int ecx;
493 unsigned int edx;
494 int max_cpuid_ex;
495 long int data = -1;
496 long int shared = -1;
497 unsigned int level;
498 unsigned int threads = 0;
499 const struct cpu_features *cpu_features = __get_cpu_features ();
500 int max_cpuid = cpu_features->max_cpuid;
501
502 if (cpu_features->kind == arch_kind_intel)
503 {
504 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
505
506 long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
507 bool inclusive_cache = true;
508
509 /* Try L3 first. */
510 level = 3;
511 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
512
513 /* Number of logical processors sharing L2 cache. */
514 int threads_l2;
515
516 /* Number of logical processors sharing L3 cache. */
517 int threads_l3;
518
519 if (shared <= 0)
520 {
521 /* Try L2 otherwise. */
522 level = 2;
523 shared = core;
524 threads_l2 = 0;
525 threads_l3 = -1;
526 }
527 else
528 {
529 threads_l2 = 0;
530 threads_l3 = 0;
531 }
532
533 /* A value of 0 for the HTT bit indicates there is only a single
534 logical processor. */
535 if (HAS_CPU_FEATURE (HTT))
536 {
537 /* Figure out the number of logical threads that share the
538 highest cache level. */
539 if (max_cpuid >= 4)
540 {
541 unsigned int family = cpu_features->family;
542 unsigned int model = cpu_features->model;
543
544 int i = 0;
545
546 /* Query until cache level 2 and 3 are enumerated. */
547 int check = 0x1 | (threads_l3 == 0) << 1;
548 do
549 {
550 __cpuid_count (4, i++, eax, ebx, ecx, edx);
551
552 /* There seems to be a bug in at least some Pentium Ds
553 which sometimes fail to iterate all cache parameters.
554 Do not loop indefinitely here, stop in this case and
555 assume there is no such information. */
556 if ((eax & 0x1f) == 0)
557 goto intel_bug_no_cache_info;
558
559 switch ((eax >> 5) & 0x7)
560 {
561 default:
562 break;
563 case 2:
564 if ((check & 0x1))
565 {
566 /* Get maximum number of logical processors
567 sharing L2 cache. */
568 threads_l2 = (eax >> 14) & 0x3ff;
569 check &= ~0x1;
570 }
571 break;
572 case 3:
573 if ((check & (0x1 << 1)))
574 {
575 /* Get maximum number of logical processors
576 sharing L3 cache. */
577 threads_l3 = (eax >> 14) & 0x3ff;
578
579 /* Check if L2 and L3 caches are inclusive. */
580 inclusive_cache = (edx & 0x2) != 0;
581 check &= ~(0x1 << 1);
582 }
583 break;
584 }
585 }
586 while (check);
587
588 /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
589 numbers of addressable IDs for logical processors sharing
590 the cache, instead of the maximum number of threads
591 sharing the cache. */
592 if (max_cpuid >= 11)
593 {
594 /* Find the number of logical processors shipped in
595 one core and apply count mask. */
596 i = 0;
597
598 /* Count SMT only if there is L3 cache. Always count
599 core if there is no L3 cache. */
600 int count = ((threads_l2 > 0 && level == 3)
601 | ((threads_l3 > 0
602 || (threads_l2 > 0 && level == 2)) << 1));
603
604 while (count)
605 {
606 __cpuid_count (11, i++, eax, ebx, ecx, edx);
607
608 int shipped = ebx & 0xff;
609 int type = ecx & 0xff00;
610 if (shipped == 0 || type == 0)
611 break;
612 else if (type == 0x100)
613 {
614 /* Count SMT. */
615 if ((count & 0x1))
616 {
617 int count_mask;
618
619 /* Compute count mask. */
620 asm ("bsr %1, %0"
621 : "=r" (count_mask) : "g" (threads_l2));
622 count_mask = ~(-1 << (count_mask + 1));
623 threads_l2 = (shipped - 1) & count_mask;
624 count &= ~0x1;
625 }
626 }
627 else if (type == 0x200)
628 {
629 /* Count core. */
630 if ((count & (0x1 << 1)))
631 {
632 int count_mask;
633 int threads_core
634 = (level == 2 ? threads_l2 : threads_l3);
635
636 /* Compute count mask. */
637 asm ("bsr %1, %0"
638 : "=r" (count_mask) : "g" (threads_core));
639 count_mask = ~(-1 << (count_mask + 1));
640 threads_core = (shipped - 1) & count_mask;
641 if (level == 2)
642 threads_l2 = threads_core;
643 else
644 threads_l3 = threads_core;
645 count &= ~(0x1 << 1);
646 }
647 }
648 }
649 }
650 if (threads_l2 > 0)
651 threads_l2 += 1;
652 if (threads_l3 > 0)
653 threads_l3 += 1;
654 if (level == 2)
655 {
656 if (threads_l2)
657 {
658 threads = threads_l2;
659 if (threads > 2 && family == 6)
660 switch (model)
661 {
662 case 0x37:
663 case 0x4a:
664 case 0x4d:
665 case 0x5a:
666 case 0x5d:
667 /* Silvermont has L2 cache shared by 2 cores. */
668 threads = 2;
669 break;
670 default:
671 break;
672 }
673 }
674 }
675 else if (threads_l3)
676 threads = threads_l3;
677 }
678 else
679 {
680intel_bug_no_cache_info:
681 /* Assume that all logical threads share the highest cache
682 level. */
683
684 threads
685 = ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx
686 >> 16) & 0xff);
687 }
688
689 /* Cap usage of highest cache level to the number of supported
690 threads. */
691 if (shared > 0 && threads > 0)
692 shared /= threads;
693 }
694
695 /* Account for non-inclusive L2 and L3 caches. */
696 if (!inclusive_cache)
697 {
698 if (threads_l2 > 0)
699 core /= threads_l2;
700 shared += core;
701 }
702 }
703 else if (cpu_features->kind == arch_kind_amd)
704 {
705 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
706 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
707 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
708
709 /* Get maximum extended function. */
710 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
711
712 if (shared <= 0)
713 /* No shared L3 cache. All we have is the L2 cache. */
714 shared = core;
715 else
716 {
717 /* Figure out the number of logical threads that share L3. */
718 if (max_cpuid_ex >= 0x80000008)
719 {
720 /* Get width of APIC ID. */
721 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
722 threads = 1 << ((ecx >> 12) & 0x0f);
723 }
724
725 if (threads == 0)
726 {
727 /* If APIC ID width is not available, use logical
728 processor count. */
729 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
730
731 if ((edx & (1 << 28)) != 0)
732 threads = (ebx >> 16) & 0xff;
733 }
734
735 /* Cap usage of highest cache level to the number of
736 supported threads. */
737 if (threads > 0)
738 shared /= threads;
739
740 /* Account for exclusive L2 and L3 caches. */
741 shared += core;
742 }
743
744#ifndef DISABLE_PREFETCHW
745 if (max_cpuid_ex >= 0x80000001)
746 {
747 __cpuid (0x80000001, eax, ebx, ecx, edx);
748 /* PREFETCHW || 3DNow! */
749 if ((ecx & 0x100) || (edx & 0x80000000))
750 __x86_prefetchw = -1;
751 }
752#endif
753 }
754
755 if (cpu_features->data_cache_size != 0)
756 data = cpu_features->data_cache_size;
757
758 if (data > 0)
759 {
760 __x86_raw_data_cache_size_half = data / 2;
761 __x86_raw_data_cache_size = data;
762 /* Round data cache size to multiple of 256 bytes. */
763 data = data & ~255L;
764 __x86_data_cache_size_half = data / 2;
765 __x86_data_cache_size = data;
766 }
767
768 if (cpu_features->shared_cache_size != 0)
769 shared = cpu_features->shared_cache_size;
770
771 if (shared > 0)
772 {
773 __x86_raw_shared_cache_size_half = shared / 2;
774 __x86_raw_shared_cache_size = shared;
775 /* Round shared cache size to multiple of 256 bytes. */
776 shared = shared & ~255L;
777 __x86_shared_cache_size_half = shared / 2;
778 __x86_shared_cache_size = shared;
779 }
780
781 /* The large memcpy micro benchmark in glibc shows that 6 times of
782 shared cache size is the approximate value above which non-temporal
783 store becomes faster on a 8-core processor. This is the 3/4 of the
784 total shared cache size. */
785 __x86_shared_non_temporal_threshold
786 = (cpu_features->non_temporal_threshold != 0
787 ? cpu_features->non_temporal_threshold
788 : __x86_shared_cache_size * threads * 3 / 4);
789}
790
791#endif
792