1 | /* Copyright (C) 1995-2016 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ |
18 | |
19 | /* Tree search for red/black trees. |
20 | The algorithm for adding nodes is taken from one of the many "Algorithms" |
21 | books by Robert Sedgewick, although the implementation differs. |
22 | The algorithm for deleting nodes can probably be found in a book named |
23 | "Introduction to Algorithms" by Cormen/Leiserson/Rivest. At least that's |
24 | the book that my professor took most algorithms from during the "Data |
25 | Structures" course... |
26 | |
27 | Totally public domain. */ |
28 | |
29 | /* Red/black trees are binary trees in which the edges are colored either red |
30 | or black. They have the following properties: |
31 | 1. The number of black edges on every path from the root to a leaf is |
32 | constant. |
33 | 2. No two red edges are adjacent. |
34 | Therefore there is an upper bound on the length of every path, it's |
35 | O(log n) where n is the number of nodes in the tree. No path can be longer |
36 | than 1+2*P where P is the length of the shortest path in the tree. |
37 | Useful for the implementation: |
38 | 3. If one of the children of a node is NULL, then the other one is red |
39 | (if it exists). |
40 | |
41 | In the implementation, not the edges are colored, but the nodes. The color |
42 | interpreted as the color of the edge leading to this node. The color is |
43 | meaningless for the root node, but we color the root node black for |
44 | convenience. All added nodes are red initially. |
45 | |
46 | Adding to a red/black tree is rather easy. The right place is searched |
47 | with a usual binary tree search. Additionally, whenever a node N is |
48 | reached that has two red successors, the successors are colored black and |
49 | the node itself colored red. This moves red edges up the tree where they |
50 | pose less of a problem once we get to really insert the new node. Changing |
51 | N's color to red may violate rule 2, however, so rotations may become |
52 | necessary to restore the invariants. Adding a new red leaf may violate |
53 | the same rule, so afterwards an additional check is run and the tree |
54 | possibly rotated. |
55 | |
56 | Deleting is hairy. There are mainly two nodes involved: the node to be |
57 | deleted (n1), and another node that is to be unchained from the tree (n2). |
58 | If n1 has a successor (the node with a smallest key that is larger than |
59 | n1), then the successor becomes n2 and its contents are copied into n1, |
60 | otherwise n1 becomes n2. |
61 | Unchaining a node may violate rule 1: if n2 is black, one subtree is |
62 | missing one black edge afterwards. The algorithm must try to move this |
63 | error upwards towards the root, so that the subtree that does not have |
64 | enough black edges becomes the whole tree. Once that happens, the error |
65 | has disappeared. It may not be necessary to go all the way up, since it |
66 | is possible that rotations and recoloring can fix the error before that. |
67 | |
68 | Although the deletion algorithm must walk upwards through the tree, we |
69 | do not store parent pointers in the nodes. Instead, delete allocates a |
70 | small array of parent pointers and fills it while descending the tree. |
71 | Since we know that the length of a path is O(log n), where n is the number |
72 | of nodes, this is likely to use less memory. */ |
73 | |
74 | /* Tree rotations look like this: |
75 | A C |
76 | / \ / \ |
77 | B C A G |
78 | / \ / \ --> / \ |
79 | D E F G B F |
80 | / \ |
81 | D E |
82 | |
83 | In this case, A has been rotated left. This preserves the ordering of the |
84 | binary tree. */ |
85 | |
86 | #include <stdlib.h> |
87 | #include <string.h> |
88 | #include <search.h> |
89 | |
90 | typedef struct node_t |
91 | { |
92 | /* Callers expect this to be the first element in the structure - do not |
93 | move! */ |
94 | const void *key; |
95 | struct node_t *left; |
96 | struct node_t *right; |
97 | unsigned int red:1; |
98 | } *node; |
99 | typedef const struct node_t *const_node; |
100 | |
101 | #undef DEBUGGING |
102 | |
103 | #ifdef DEBUGGING |
104 | |
105 | /* Routines to check tree invariants. */ |
106 | |
107 | #include <assert.h> |
108 | |
109 | #define CHECK_TREE(a) check_tree(a) |
110 | |
111 | static void |
112 | check_tree_recurse (node p, int d_sofar, int d_total) |
113 | { |
114 | if (p == NULL) |
115 | { |
116 | assert (d_sofar == d_total); |
117 | return; |
118 | } |
119 | |
120 | check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total); |
121 | check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total); |
122 | if (p->left) |
123 | assert (!(p->left->red && p->red)); |
124 | if (p->right) |
125 | assert (!(p->right->red && p->red)); |
126 | } |
127 | |
128 | static void |
129 | check_tree (node root) |
130 | { |
131 | int cnt = 0; |
132 | node p; |
133 | if (root == NULL) |
134 | return; |
135 | root->red = 0; |
136 | for(p = root->left; p; p = p->left) |
137 | cnt += !p->red; |
138 | check_tree_recurse (root, 0, cnt); |
139 | } |
140 | |
141 | |
142 | #else |
143 | |
144 | #define CHECK_TREE(a) |
145 | |
146 | #endif |
147 | |
148 | /* Possibly "split" a node with two red successors, and/or fix up two red |
149 | edges in a row. ROOTP is a pointer to the lowest node we visited, PARENTP |
150 | and GPARENTP pointers to its parent/grandparent. P_R and GP_R contain the |
151 | comparison values that determined which way was taken in the tree to reach |
152 | ROOTP. MODE is 1 if we need not do the split, but must check for two red |
153 | edges between GPARENTP and ROOTP. */ |
154 | static void |
155 | maybe_split_for_insert (node *rootp, node *parentp, node *gparentp, |
156 | int p_r, int gp_r, int mode) |
157 | { |
158 | node root = *rootp; |
159 | node *rp, *lp; |
160 | rp = &(*rootp)->right; |
161 | lp = &(*rootp)->left; |
162 | |
163 | /* See if we have to split this node (both successors red). */ |
164 | if (mode == 1 |
165 | || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red)) |
166 | { |
167 | /* This node becomes red, its successors black. */ |
168 | root->red = 1; |
169 | if (*rp) |
170 | (*rp)->red = 0; |
171 | if (*lp) |
172 | (*lp)->red = 0; |
173 | |
174 | /* If the parent of this node is also red, we have to do |
175 | rotations. */ |
176 | if (parentp != NULL && (*parentp)->red) |
177 | { |
178 | node gp = *gparentp; |
179 | node p = *parentp; |
180 | /* There are two main cases: |
181 | 1. The edge types (left or right) of the two red edges differ. |
182 | 2. Both red edges are of the same type. |
183 | There exist two symmetries of each case, so there is a total of |
184 | 4 cases. */ |
185 | if ((p_r > 0) != (gp_r > 0)) |
186 | { |
187 | /* Put the child at the top of the tree, with its parent |
188 | and grandparent as successors. */ |
189 | p->red = 1; |
190 | gp->red = 1; |
191 | root->red = 0; |
192 | if (p_r < 0) |
193 | { |
194 | /* Child is left of parent. */ |
195 | p->left = *rp; |
196 | *rp = p; |
197 | gp->right = *lp; |
198 | *lp = gp; |
199 | } |
200 | else |
201 | { |
202 | /* Child is right of parent. */ |
203 | p->right = *lp; |
204 | *lp = p; |
205 | gp->left = *rp; |
206 | *rp = gp; |
207 | } |
208 | *gparentp = root; |
209 | } |
210 | else |
211 | { |
212 | *gparentp = *parentp; |
213 | /* Parent becomes the top of the tree, grandparent and |
214 | child are its successors. */ |
215 | p->red = 0; |
216 | gp->red = 1; |
217 | if (p_r < 0) |
218 | { |
219 | /* Left edges. */ |
220 | gp->left = p->right; |
221 | p->right = gp; |
222 | } |
223 | else |
224 | { |
225 | /* Right edges. */ |
226 | gp->right = p->left; |
227 | p->left = gp; |
228 | } |
229 | } |
230 | } |
231 | } |
232 | } |
233 | |
234 | /* Find or insert datum into search tree. |
235 | KEY is the key to be located, ROOTP is the address of tree root, |
236 | COMPAR the ordering function. */ |
237 | void * |
238 | __tsearch (const void *key, void **vrootp, __compar_fn_t compar) |
239 | { |
240 | node q; |
241 | node *parentp = NULL, *gparentp = NULL; |
242 | node *rootp = (node *) vrootp; |
243 | node *nextp; |
244 | int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler. */ |
245 | |
246 | if (rootp == NULL) |
247 | return NULL; |
248 | |
249 | /* This saves some additional tests below. */ |
250 | if (*rootp != NULL) |
251 | (*rootp)->red = 0; |
252 | |
253 | CHECK_TREE (*rootp); |
254 | |
255 | nextp = rootp; |
256 | while (*nextp != NULL) |
257 | { |
258 | node root = *rootp; |
259 | r = (*compar) (key, root->key); |
260 | if (r == 0) |
261 | return root; |
262 | |
263 | maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0); |
264 | /* If that did any rotations, parentp and gparentp are now garbage. |
265 | That doesn't matter, because the values they contain are never |
266 | used again in that case. */ |
267 | |
268 | nextp = r < 0 ? &root->left : &root->right; |
269 | if (*nextp == NULL) |
270 | break; |
271 | |
272 | gparentp = parentp; |
273 | parentp = rootp; |
274 | rootp = nextp; |
275 | |
276 | gp_r = p_r; |
277 | p_r = r; |
278 | } |
279 | |
280 | q = (struct node_t *) malloc (sizeof (struct node_t)); |
281 | if (q != NULL) |
282 | { |
283 | *nextp = q; /* link new node to old */ |
284 | q->key = key; /* initialize new node */ |
285 | q->red = 1; |
286 | q->left = q->right = NULL; |
287 | |
288 | if (nextp != rootp) |
289 | /* There may be two red edges in a row now, which we must avoid by |
290 | rotating the tree. */ |
291 | maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1); |
292 | } |
293 | |
294 | return q; |
295 | } |
296 | libc_hidden_def (__tsearch) |
297 | weak_alias (__tsearch, tsearch) |
298 | |
299 | |
300 | /* Find datum in search tree. |
301 | KEY is the key to be located, ROOTP is the address of tree root, |
302 | COMPAR the ordering function. */ |
303 | void * |
304 | __tfind (const void *key, void *const *vrootp, __compar_fn_t compar) |
305 | { |
306 | node *rootp = (node *) vrootp; |
307 | |
308 | if (rootp == NULL) |
309 | return NULL; |
310 | |
311 | CHECK_TREE (*rootp); |
312 | |
313 | while (*rootp != NULL) |
314 | { |
315 | node root = *rootp; |
316 | int r; |
317 | |
318 | r = (*compar) (key, root->key); |
319 | if (r == 0) |
320 | return root; |
321 | |
322 | rootp = r < 0 ? &root->left : &root->right; |
323 | } |
324 | return NULL; |
325 | } |
326 | libc_hidden_def (__tfind) |
327 | weak_alias (__tfind, tfind) |
328 | |
329 | |
330 | /* Delete node with given key. |
331 | KEY is the key to be deleted, ROOTP is the address of the root of tree, |
332 | COMPAR the comparison function. */ |
333 | void * |
334 | __tdelete (const void *key, void **vrootp, __compar_fn_t compar) |
335 | { |
336 | node p, q, r, retval; |
337 | int cmp; |
338 | node *rootp = (node *) vrootp; |
339 | node root, unchained; |
340 | /* Stack of nodes so we remember the parents without recursion. It's |
341 | _very_ unlikely that there are paths longer than 40 nodes. The tree |
342 | would need to have around 250.000 nodes. */ |
343 | int stacksize = 40; |
344 | int sp = 0; |
345 | node **nodestack = alloca (sizeof (node *) * stacksize); |
346 | |
347 | if (rootp == NULL) |
348 | return NULL; |
349 | p = *rootp; |
350 | if (p == NULL) |
351 | return NULL; |
352 | |
353 | CHECK_TREE (p); |
354 | |
355 | while ((cmp = (*compar) (key, (*rootp)->key)) != 0) |
356 | { |
357 | if (sp == stacksize) |
358 | { |
359 | node **newstack; |
360 | stacksize += 20; |
361 | newstack = alloca (sizeof (node *) * stacksize); |
362 | nodestack = memcpy (newstack, nodestack, sp * sizeof (node *)); |
363 | } |
364 | |
365 | nodestack[sp++] = rootp; |
366 | p = *rootp; |
367 | rootp = ((cmp < 0) |
368 | ? &(*rootp)->left |
369 | : &(*rootp)->right); |
370 | if (*rootp == NULL) |
371 | return NULL; |
372 | } |
373 | |
374 | /* This is bogus if the node to be deleted is the root... this routine |
375 | really should return an integer with 0 for success, -1 for failure |
376 | and errno = ESRCH or something. */ |
377 | retval = p; |
378 | |
379 | /* We don't unchain the node we want to delete. Instead, we overwrite |
380 | it with its successor and unchain the successor. If there is no |
381 | successor, we really unchain the node to be deleted. */ |
382 | |
383 | root = *rootp; |
384 | |
385 | r = root->right; |
386 | q = root->left; |
387 | |
388 | if (q == NULL || r == NULL) |
389 | unchained = root; |
390 | else |
391 | { |
392 | node *parent = rootp, *up = &root->right; |
393 | for (;;) |
394 | { |
395 | if (sp == stacksize) |
396 | { |
397 | node **newstack; |
398 | stacksize += 20; |
399 | newstack = alloca (sizeof (node *) * stacksize); |
400 | nodestack = memcpy (newstack, nodestack, sp * sizeof (node *)); |
401 | } |
402 | nodestack[sp++] = parent; |
403 | parent = up; |
404 | if ((*up)->left == NULL) |
405 | break; |
406 | up = &(*up)->left; |
407 | } |
408 | unchained = *up; |
409 | } |
410 | |
411 | /* We know that either the left or right successor of UNCHAINED is NULL. |
412 | R becomes the other one, it is chained into the parent of UNCHAINED. */ |
413 | r = unchained->left; |
414 | if (r == NULL) |
415 | r = unchained->right; |
416 | if (sp == 0) |
417 | *rootp = r; |
418 | else |
419 | { |
420 | q = *nodestack[sp-1]; |
421 | if (unchained == q->right) |
422 | q->right = r; |
423 | else |
424 | q->left = r; |
425 | } |
426 | |
427 | if (unchained != root) |
428 | root->key = unchained->key; |
429 | if (!unchained->red) |
430 | { |
431 | /* Now we lost a black edge, which means that the number of black |
432 | edges on every path is no longer constant. We must balance the |
433 | tree. */ |
434 | /* NODESTACK now contains all parents of R. R is likely to be NULL |
435 | in the first iteration. */ |
436 | /* NULL nodes are considered black throughout - this is necessary for |
437 | correctness. */ |
438 | while (sp > 0 && (r == NULL || !r->red)) |
439 | { |
440 | node *pp = nodestack[sp - 1]; |
441 | p = *pp; |
442 | /* Two symmetric cases. */ |
443 | if (r == p->left) |
444 | { |
445 | /* Q is R's brother, P is R's parent. The subtree with root |
446 | R has one black edge less than the subtree with root Q. */ |
447 | q = p->right; |
448 | if (q->red) |
449 | { |
450 | /* If Q is red, we know that P is black. We rotate P left |
451 | so that Q becomes the top node in the tree, with P below |
452 | it. P is colored red, Q is colored black. |
453 | This action does not change the black edge count for any |
454 | leaf in the tree, but we will be able to recognize one |
455 | of the following situations, which all require that Q |
456 | is black. */ |
457 | q->red = 0; |
458 | p->red = 1; |
459 | /* Left rotate p. */ |
460 | p->right = q->left; |
461 | q->left = p; |
462 | *pp = q; |
463 | /* Make sure pp is right if the case below tries to use |
464 | it. */ |
465 | nodestack[sp++] = pp = &q->left; |
466 | q = p->right; |
467 | } |
468 | /* We know that Q can't be NULL here. We also know that Q is |
469 | black. */ |
470 | if ((q->left == NULL || !q->left->red) |
471 | && (q->right == NULL || !q->right->red)) |
472 | { |
473 | /* Q has two black successors. We can simply color Q red. |
474 | The whole subtree with root P is now missing one black |
475 | edge. Note that this action can temporarily make the |
476 | tree invalid (if P is red). But we will exit the loop |
477 | in that case and set P black, which both makes the tree |
478 | valid and also makes the black edge count come out |
479 | right. If P is black, we are at least one step closer |
480 | to the root and we'll try again the next iteration. */ |
481 | q->red = 1; |
482 | r = p; |
483 | } |
484 | else |
485 | { |
486 | /* Q is black, one of Q's successors is red. We can |
487 | repair the tree with one operation and will exit the |
488 | loop afterwards. */ |
489 | if (q->right == NULL || !q->right->red) |
490 | { |
491 | /* The left one is red. We perform the same action as |
492 | in maybe_split_for_insert where two red edges are |
493 | adjacent but point in different directions: |
494 | Q's left successor (let's call it Q2) becomes the |
495 | top of the subtree we are looking at, its parent (Q) |
496 | and grandparent (P) become its successors. The former |
497 | successors of Q2 are placed below P and Q. |
498 | P becomes black, and Q2 gets the color that P had. |
499 | This changes the black edge count only for node R and |
500 | its successors. */ |
501 | node q2 = q->left; |
502 | q2->red = p->red; |
503 | p->right = q2->left; |
504 | q->left = q2->right; |
505 | q2->right = q; |
506 | q2->left = p; |
507 | *pp = q2; |
508 | p->red = 0; |
509 | } |
510 | else |
511 | { |
512 | /* It's the right one. Rotate P left. P becomes black, |
513 | and Q gets the color that P had. Q's right successor |
514 | also becomes black. This changes the black edge |
515 | count only for node R and its successors. */ |
516 | q->red = p->red; |
517 | p->red = 0; |
518 | |
519 | q->right->red = 0; |
520 | |
521 | /* left rotate p */ |
522 | p->right = q->left; |
523 | q->left = p; |
524 | *pp = q; |
525 | } |
526 | |
527 | /* We're done. */ |
528 | sp = 1; |
529 | r = NULL; |
530 | } |
531 | } |
532 | else |
533 | { |
534 | /* Comments: see above. */ |
535 | q = p->left; |
536 | if (q->red) |
537 | { |
538 | q->red = 0; |
539 | p->red = 1; |
540 | p->left = q->right; |
541 | q->right = p; |
542 | *pp = q; |
543 | nodestack[sp++] = pp = &q->right; |
544 | q = p->left; |
545 | } |
546 | if ((q->right == NULL || !q->right->red) |
547 | && (q->left == NULL || !q->left->red)) |
548 | { |
549 | q->red = 1; |
550 | r = p; |
551 | } |
552 | else |
553 | { |
554 | if (q->left == NULL || !q->left->red) |
555 | { |
556 | node q2 = q->right; |
557 | q2->red = p->red; |
558 | p->left = q2->right; |
559 | q->right = q2->left; |
560 | q2->left = q; |
561 | q2->right = p; |
562 | *pp = q2; |
563 | p->red = 0; |
564 | } |
565 | else |
566 | { |
567 | q->red = p->red; |
568 | p->red = 0; |
569 | q->left->red = 0; |
570 | p->left = q->right; |
571 | q->right = p; |
572 | *pp = q; |
573 | } |
574 | sp = 1; |
575 | r = NULL; |
576 | } |
577 | } |
578 | --sp; |
579 | } |
580 | if (r != NULL) |
581 | r->red = 0; |
582 | } |
583 | |
584 | free (unchained); |
585 | return retval; |
586 | } |
587 | libc_hidden_def (__tdelete) |
588 | weak_alias (__tdelete, tdelete) |
589 | |
590 | |
591 | /* Walk the nodes of a tree. |
592 | ROOT is the root of the tree to be walked, ACTION the function to be |
593 | called at each node. LEVEL is the level of ROOT in the whole tree. */ |
594 | static void |
595 | internal_function |
596 | trecurse (const void *vroot, __action_fn_t action, int level) |
597 | { |
598 | const_node root = (const_node) vroot; |
599 | |
600 | if (root->left == NULL && root->right == NULL) |
601 | (*action) (root, leaf, level); |
602 | else |
603 | { |
604 | (*action) (root, preorder, level); |
605 | if (root->left != NULL) |
606 | trecurse (root->left, action, level + 1); |
607 | (*action) (root, postorder, level); |
608 | if (root->right != NULL) |
609 | trecurse (root->right, action, level + 1); |
610 | (*action) (root, endorder, level); |
611 | } |
612 | } |
613 | |
614 | |
615 | /* Walk the nodes of a tree. |
616 | ROOT is the root of the tree to be walked, ACTION the function to be |
617 | called at each node. */ |
618 | void |
619 | __twalk (const void *vroot, __action_fn_t action) |
620 | { |
621 | const_node root = (const_node) vroot; |
622 | |
623 | CHECK_TREE (root); |
624 | |
625 | if (root != NULL && action != NULL) |
626 | trecurse (root, action, 0); |
627 | } |
628 | libc_hidden_def (__twalk) |
629 | weak_alias (__twalk, twalk) |
630 | |
631 | |
632 | |
633 | /* The standardized functions miss an important functionality: the |
634 | tree cannot be removed easily. We provide a function to do this. */ |
635 | static void |
636 | internal_function |
637 | tdestroy_recurse (node root, __free_fn_t freefct) |
638 | { |
639 | if (root->left != NULL) |
640 | tdestroy_recurse (root->left, freefct); |
641 | if (root->right != NULL) |
642 | tdestroy_recurse (root->right, freefct); |
643 | (*freefct) ((void *) root->key); |
644 | /* Free the node itself. */ |
645 | free (root); |
646 | } |
647 | |
648 | void |
649 | __tdestroy (void *vroot, __free_fn_t freefct) |
650 | { |
651 | node root = (node) vroot; |
652 | |
653 | CHECK_TREE (root); |
654 | |
655 | if (root != NULL) |
656 | tdestroy_recurse (root, freefct); |
657 | } |
658 | weak_alias (__tdestroy, tdestroy) |
659 | |