1/* Cache memory handling.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18
19#include <assert.h>
20#include <errno.h>
21#include <error.h>
22#include <fcntl.h>
23#include <inttypes.h>
24#include <libintl.h>
25#include <limits.h>
26#include <obstack.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30#include <sys/mman.h>
31#include <sys/param.h>
32
33#include "dbg_log.h"
34#include "nscd.h"
35
36
37static int
38sort_he (const void *p1, const void *p2)
39{
40 struct hashentry *h1 = *(struct hashentry **) p1;
41 struct hashentry *h2 = *(struct hashentry **) p2;
42
43 if (h1 < h2)
44 return -1;
45 if (h1 > h2)
46 return 1;
47 return 0;
48}
49
50
51static int
52sort_he_data (const void *p1, const void *p2)
53{
54 struct hashentry *h1 = *(struct hashentry **) p1;
55 struct hashentry *h2 = *(struct hashentry **) p2;
56
57 if (h1->packet < h2->packet)
58 return -1;
59 if (h1->packet > h2->packet)
60 return 1;
61 return 0;
62}
63
64
65/* Basic definitions for the bitmap implementation. Only BITMAP_T
66 needs to be changed to choose a different word size. */
67#define BITMAP_T uint8_t
68#define BITS (CHAR_BIT * sizeof (BITMAP_T))
69#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
70#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
71
72
73static void
74markrange (BITMAP_T *mark, ref_t start, size_t len)
75{
76 /* Adjust parameters for block alignment. */
77 assert ((start & BLOCK_ALIGN_M1) == 0);
78 start /= BLOCK_ALIGN;
79 len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
80
81 size_t elem = start / BITS;
82
83 if (start % BITS != 0)
84 {
85 if (start % BITS + len <= BITS)
86 {
87 /* All fits in the partial byte. */
88 mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
89 return;
90 }
91
92 mark[elem++] |= ALLBITS << (start % BITS);
93 len -= BITS - (start % BITS);
94 }
95
96 while (len >= BITS)
97 {
98 mark[elem++] = ALLBITS;
99 len -= BITS;
100 }
101
102 if (len > 0)
103 mark[elem] |= ALLBITS >> (BITS - len);
104}
105
106
107void
108gc (struct database_dyn *db)
109{
110 /* We need write access. */
111 pthread_rwlock_wrlock (&db->lock);
112
113 /* And the memory handling lock. */
114 pthread_mutex_lock (&db->memlock);
115
116 /* We need an array representing the data area. All memory
117 allocation is BLOCK_ALIGN aligned so this is the level at which
118 we have to look at the memory. We use a mark and sweep algorithm
119 where the marks are placed in this array. */
120 assert (db->head->first_free % BLOCK_ALIGN == 0);
121
122 BITMAP_T *mark;
123 bool mark_use_malloc;
124 /* In prune_cache we are also using a dynamically allocated array.
125 If the array in the caller is too large we have malloc'ed it. */
126 size_t stack_used = sizeof (bool) * db->head->module;
127 if (__glibc_unlikely (stack_used > MAX_STACK_USE))
128 stack_used = 0;
129 size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
130 size_t memory_needed = nmark * sizeof (BITMAP_T);
131 if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
132 {
133 mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
134 mark_use_malloc = false;
135 memset (mark, '\0', memory_needed);
136 }
137 else
138 {
139 mark = (BITMAP_T *) xcalloc (1, memory_needed);
140 mark_use_malloc = true;
141 }
142
143 /* Create an array which can hold pointer to all the entries in hash
144 entries. */
145 memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
146 struct hashentry **he;
147 struct hashentry **he_data;
148 bool he_use_malloc;
149 if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
150 {
151 he = alloca_account (memory_needed, stack_used);
152 he_use_malloc = false;
153 }
154 else
155 {
156 he = xmalloc (memory_needed);
157 he_use_malloc = true;
158 }
159 he_data = &he[db->head->nentries];
160
161 size_t cnt = 0;
162 for (size_t idx = 0; idx < db->head->module; ++idx)
163 {
164 ref_t *prevp = &db->head->array[idx];
165 ref_t run = *prevp;
166
167 while (run != ENDREF)
168 {
169 assert (cnt < db->head->nentries);
170 he[cnt] = (struct hashentry *) (db->data + run);
171
172 he[cnt]->prevp = prevp;
173 prevp = &he[cnt]->next;
174
175 /* This is the hash entry itself. */
176 markrange (mark, run, sizeof (struct hashentry));
177
178 /* Add the information for the data itself. We do this
179 only for the one special entry marked with FIRST. */
180 if (he[cnt]->first)
181 {
182 struct datahead *dh
183 = (struct datahead *) (db->data + he[cnt]->packet);
184 markrange (mark, he[cnt]->packet, dh->allocsize);
185 }
186
187 run = he[cnt]->next;
188
189 ++cnt;
190 }
191 }
192 assert (cnt == db->head->nentries);
193
194 /* Sort the entries by the addresses of the referenced data. All
195 the entries pointing to the same DATAHEAD object will have the
196 same key. Stability of the sorting is unimportant. */
197 memcpy (he_data, he, cnt * sizeof (struct hashentry *));
198 qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
199
200 /* Sort the entries by their address. */
201 qsort (he, cnt, sizeof (struct hashentry *), sort_he);
202
203#define obstack_chunk_alloc xmalloc
204#define obstack_chunk_free free
205 struct obstack ob;
206 obstack_init (&ob);
207
208 /* Determine the highest used address. */
209 size_t high = nmark;
210 while (high > 0 && mark[high - 1] == 0)
211 --high;
212
213 /* No memory used. */
214 if (high == 0)
215 {
216 db->head->first_free = 0;
217 goto out;
218 }
219
220 /* Determine the highest offset. */
221 BITMAP_T mask = HIGHBIT;
222 ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
223 while ((mark[high - 1] & mask) == 0)
224 {
225 mask >>= 1;
226 highref -= BLOCK_ALIGN;
227 }
228
229 /* Now we can iterate over the MARK array and find bits which are not
230 set. These represent memory which can be recovered. */
231 size_t byte = 0;
232 /* Find the first gap. */
233 while (byte < high && mark[byte] == ALLBITS)
234 ++byte;
235
236 if (byte == high
237 || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
238 /* No gap. */
239 goto out;
240
241 mask = 1;
242 cnt = 0;
243 while ((mark[byte] & mask) != 0)
244 {
245 ++cnt;
246 mask <<= 1;
247 }
248 ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
249 assert (off_free <= db->head->first_free);
250
251 struct hashentry **next_hash = he;
252 struct hashentry **next_data = he_data;
253
254 /* Skip over the hash entries in the first block which does not get
255 moved. */
256 while (next_hash < &he[db->head->nentries]
257 && *next_hash < (struct hashentry *) (db->data + off_free))
258 ++next_hash;
259
260 while (next_data < &he_data[db->head->nentries]
261 && (*next_data)->packet < off_free)
262 ++next_data;
263
264
265 /* Now we start modifying the data. Make sure all readers of the
266 data are aware of this and temporarily don't use the data. */
267 ++db->head->gc_cycle;
268 assert ((db->head->gc_cycle & 1) == 1);
269
270
271 /* We do not perform the move operations right away since the
272 he_data array is not sorted by the address of the data. */
273 struct moveinfo
274 {
275 void *from;
276 void *to;
277 size_t size;
278 struct moveinfo *next;
279 } *moves = NULL;
280
281 while (byte < high)
282 {
283 /* Search for the next filled block. BYTE is the index of the
284 entry in MARK, MASK is the bit, and CNT is the bit number.
285 OFF_FILLED is the corresponding offset. */
286 if ((mark[byte] & ~(mask - 1)) == 0)
287 {
288 /* No other bit set in the same element of MARK. Search in the
289 following memory. */
290 do
291 ++byte;
292 while (byte < high && mark[byte] == 0);
293
294 if (byte == high)
295 /* That was it. */
296 break;
297
298 mask = 1;
299 cnt = 0;
300 }
301 /* Find the exact bit. */
302 while ((mark[byte] & mask) == 0)
303 {
304 ++cnt;
305 mask <<= 1;
306 }
307
308 ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
309 assert (off_alloc <= db->head->first_free);
310
311 /* Find the end of the used area. */
312 if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
313 {
314 /* All other bits set. Search the next bytes in MARK. */
315 do
316 ++byte;
317 while (byte < high && mark[byte] == ALLBITS);
318
319 mask = 1;
320 cnt = 0;
321 }
322 if (byte < high)
323 {
324 /* Find the exact bit. */
325 while ((mark[byte] & mask) != 0)
326 {
327 ++cnt;
328 mask <<= 1;
329 }
330 }
331
332 ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
333 assert (off_allocend <= db->head->first_free);
334 /* Now we know that we can copy the area from OFF_ALLOC to
335 OFF_ALLOCEND (not included) to the memory starting at
336 OFF_FREE. First fix up all the entries for the
337 displacement. */
338 ref_t disp = off_alloc - off_free;
339
340 struct moveinfo *new_move;
341 if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
342 1))
343 new_move = alloca_account (sizeof (*new_move), stack_used);
344 else
345 new_move = obstack_alloc (&ob, sizeof (*new_move));
346 new_move->from = db->data + off_alloc;
347 new_move->to = db->data + off_free;
348 new_move->size = off_allocend - off_alloc;
349 /* Create a circular list to be always able to append at the end. */
350 if (moves == NULL)
351 moves = new_move->next = new_move;
352 else
353 {
354 new_move->next = moves->next;
355 moves = moves->next = new_move;
356 }
357
358 /* The following loop will prepare to move this much data. */
359 off_free += off_allocend - off_alloc;
360
361 while (off_alloc < off_allocend)
362 {
363 /* Determine whether the next entry is for a hash entry or
364 the data. */
365 if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
366 {
367 /* Just correct the forward reference. */
368 *(*next_hash++)->prevp -= disp;
369
370 off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
371 & ~BLOCK_ALIGN_M1);
372 }
373 else
374 {
375 assert (next_data < &he_data[db->head->nentries]);
376 assert ((*next_data)->packet == off_alloc);
377
378 struct datahead *dh = (struct datahead *) (db->data + off_alloc);
379 do
380 {
381 assert ((*next_data)->key >= (*next_data)->packet);
382 assert ((*next_data)->key + (*next_data)->len
383 <= (*next_data)->packet + dh->allocsize);
384
385 (*next_data)->packet -= disp;
386 (*next_data)->key -= disp;
387 ++next_data;
388 }
389 while (next_data < &he_data[db->head->nentries]
390 && (*next_data)->packet == off_alloc);
391
392 off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
393 }
394 }
395 assert (off_alloc == off_allocend);
396
397 assert (off_alloc <= db->head->first_free);
398 if (off_alloc == db->head->first_free)
399 /* We are done, that was the last block. */
400 break;
401 }
402 assert (next_hash == &he[db->head->nentries]);
403 assert (next_data == &he_data[db->head->nentries]);
404
405 /* Now perform the actual moves. */
406 if (moves != NULL)
407 {
408 struct moveinfo *runp = moves->next;
409 do
410 {
411 assert ((char *) runp->to >= db->data);
412 assert ((char *) runp->to + runp->size
413 <= db->data + db->head->first_free);
414 assert ((char *) runp->from >= db->data);
415 assert ((char *) runp->from + runp->size
416 <= db->data + db->head->first_free);
417
418 /* The regions may overlap. */
419 memmove (runp->to, runp->from, runp->size);
420 runp = runp->next;
421 }
422 while (runp != moves->next);
423
424 if (__glibc_unlikely (debug_level >= 3))
425 dbg_log (_("freed %zu bytes in %s cache"),
426 (size_t) (db->head->first_free
427 - ((char *) moves->to + moves->size - db->data)),
428 dbnames[db - dbs]);
429
430 /* The byte past the end of the last copied block is the next
431 available byte. */
432 db->head->first_free = (char *) moves->to + moves->size - db->data;
433
434 /* Consistency check. */
435 if (__glibc_unlikely (debug_level >= 3))
436 {
437 for (size_t idx = 0; idx < db->head->module; ++idx)
438 {
439 ref_t run = db->head->array[idx];
440 size_t cnt = 0;
441
442 while (run != ENDREF)
443 {
444 if (run + sizeof (struct hashentry) > db->head->first_free)
445 {
446 dbg_log ("entry %zu in hash bucket %zu out of bounds: "
447 "%" PRIu32 "+%zu > %zu\n",
448 cnt, idx, run, sizeof (struct hashentry),
449 (size_t) db->head->first_free);
450 break;
451 }
452
453 struct hashentry *he = (struct hashentry *) (db->data + run);
454
455 if (he->key + he->len > db->head->first_free)
456 dbg_log ("key of entry %zu in hash bucket %zu out of "
457 "bounds: %" PRIu32 "+%zu > %zu\n",
458 cnt, idx, he->key, (size_t) he->len,
459 (size_t) db->head->first_free);
460
461 if (he->packet + sizeof (struct datahead)
462 > db->head->first_free)
463 dbg_log ("packet of entry %zu in hash bucket %zu out of "
464 "bounds: %" PRIu32 "+%zu > %zu\n",
465 cnt, idx, he->packet, sizeof (struct datahead),
466 (size_t) db->head->first_free);
467 else
468 {
469 struct datahead *dh = (struct datahead *) (db->data
470 + he->packet);
471 if (he->packet + dh->allocsize
472 > db->head->first_free)
473 dbg_log ("full key of entry %zu in hash bucket %zu "
474 "out of bounds: %" PRIu32 "+%zu > %zu",
475 cnt, idx, he->packet, (size_t) dh->allocsize,
476 (size_t) db->head->first_free);
477 }
478
479 run = he->next;
480 ++cnt;
481 }
482 }
483 }
484 }
485
486 /* Make sure the data on disk is updated. */
487 if (db->persistent)
488 msync (db->head, db->data + db->head->first_free - (char *) db->head,
489 MS_ASYNC);
490
491
492 /* Now we are done modifying the data. */
493 ++db->head->gc_cycle;
494 assert ((db->head->gc_cycle & 1) == 0);
495
496 /* We are done. */
497 out:
498 pthread_mutex_unlock (&db->memlock);
499 pthread_rwlock_unlock (&db->lock);
500
501 if (he_use_malloc)
502 free (he);
503 if (mark_use_malloc)
504 free (mark);
505
506 obstack_free (&ob, NULL);
507}
508
509
510void *
511mempool_alloc (struct database_dyn *db, size_t len, int data_alloc)
512{
513 /* Make sure LEN is a multiple of our maximum alignment so we can
514 keep track of used memory is multiples of this alignment value. */
515 if ((len & BLOCK_ALIGN_M1) != 0)
516 len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
517
518 if (data_alloc)
519 pthread_rwlock_rdlock (&db->lock);
520
521 pthread_mutex_lock (&db->memlock);
522
523 assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
524
525 bool tried_resize = false;
526 void *res;
527 retry:
528 res = db->data + db->head->first_free;
529
530 if (__glibc_unlikely (db->head->first_free + len > db->head->data_size))
531 {
532 if (! tried_resize)
533 {
534 /* Try to resize the database. Grow size of 1/8th. */
535 size_t oldtotal = (sizeof (struct database_pers_head)
536 + roundup (db->head->module * sizeof (ref_t),
537 ALIGN)
538 + db->head->data_size);
539 size_t new_data_size = (db->head->data_size
540 + MAX (2 * len, db->head->data_size / 8));
541 size_t newtotal = (sizeof (struct database_pers_head)
542 + roundup (db->head->module * sizeof (ref_t), ALIGN)
543 + new_data_size);
544 if (newtotal > db->max_db_size)
545 {
546 new_data_size -= newtotal - db->max_db_size;
547 newtotal = db->max_db_size;
548 }
549
550 if (db->mmap_used && newtotal > oldtotal
551 /* We only have to adjust the file size. The new pages
552 become magically available. */
553 && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
554 newtotal
555 - oldtotal)) == 0)
556 {
557 db->head->data_size = new_data_size;
558 tried_resize = true;
559 goto retry;
560 }
561 }
562
563 if (data_alloc)
564 pthread_rwlock_unlock (&db->lock);
565
566 if (! db->last_alloc_failed)
567 {
568 dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
569
570 db->last_alloc_failed = true;
571 }
572
573 ++db->head->addfailed;
574
575 /* No luck. */
576 res = NULL;
577 }
578 else
579 {
580 db->head->first_free += len;
581
582 db->last_alloc_failed = false;
583
584 }
585
586 pthread_mutex_unlock (&db->memlock);
587
588 return res;
589}
590