1 | /* Copyright (C) 1995-2016 Free Software Foundation, Inc. |
---|---|

2 | This file is part of the GNU C Library. |

3 | Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997. |

4 | |

5 | The GNU C Library is free software; you can redistribute it and/or |

6 | modify it under the terms of the GNU Lesser General Public |

7 | License as published by the Free Software Foundation; either |

8 | version 2.1 of the License, or (at your option) any later version. |

9 | |

10 | The GNU C Library is distributed in the hope that it will be useful, |

11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

13 | Lesser General Public License for more details. |

14 | |

15 | You should have received a copy of the GNU Lesser General Public |

16 | License along with the GNU C Library; if not, see |

17 | <http://www.gnu.org/licenses/>. */ |

18 | |

19 | /* Tree search for red/black trees. |

20 | The algorithm for adding nodes is taken from one of the many "Algorithms" |

21 | books by Robert Sedgewick, although the implementation differs. |

22 | The algorithm for deleting nodes can probably be found in a book named |

23 | "Introduction to Algorithms" by Cormen/Leiserson/Rivest. At least that's |

24 | the book that my professor took most algorithms from during the "Data |

25 | Structures" course... |

26 | |

27 | Totally public domain. */ |

28 | |

29 | /* Red/black trees are binary trees in which the edges are colored either red |

30 | or black. They have the following properties: |

31 | 1. The number of black edges on every path from the root to a leaf is |

32 | constant. |

33 | 2. No two red edges are adjacent. |

34 | Therefore there is an upper bound on the length of every path, it's |

35 | O(log n) where n is the number of nodes in the tree. No path can be longer |

36 | than 1+2*P where P is the length of the shortest path in the tree. |

37 | Useful for the implementation: |

38 | 3. If one of the children of a node is NULL, then the other one is red |

39 | (if it exists). |

40 | |

41 | In the implementation, not the edges are colored, but the nodes. The color |

42 | interpreted as the color of the edge leading to this node. The color is |

43 | meaningless for the root node, but we color the root node black for |

44 | convenience. All added nodes are red initially. |

45 | |

46 | Adding to a red/black tree is rather easy. The right place is searched |

47 | with a usual binary tree search. Additionally, whenever a node N is |

48 | reached that has two red successors, the successors are colored black and |

49 | the node itself colored red. This moves red edges up the tree where they |

50 | pose less of a problem once we get to really insert the new node. Changing |

51 | N's color to red may violate rule 2, however, so rotations may become |

52 | necessary to restore the invariants. Adding a new red leaf may violate |

53 | the same rule, so afterwards an additional check is run and the tree |

54 | possibly rotated. |

55 | |

56 | Deleting is hairy. There are mainly two nodes involved: the node to be |

57 | deleted (n1), and another node that is to be unchained from the tree (n2). |

58 | If n1 has a successor (the node with a smallest key that is larger than |

59 | n1), then the successor becomes n2 and its contents are copied into n1, |

60 | otherwise n1 becomes n2. |

61 | Unchaining a node may violate rule 1: if n2 is black, one subtree is |

62 | missing one black edge afterwards. The algorithm must try to move this |

63 | error upwards towards the root, so that the subtree that does not have |

64 | enough black edges becomes the whole tree. Once that happens, the error |

65 | has disappeared. It may not be necessary to go all the way up, since it |

66 | is possible that rotations and recoloring can fix the error before that. |

67 | |

68 | Although the deletion algorithm must walk upwards through the tree, we |

69 | do not store parent pointers in the nodes. Instead, delete allocates a |

70 | small array of parent pointers and fills it while descending the tree. |

71 | Since we know that the length of a path is O(log n), where n is the number |

72 | of nodes, this is likely to use less memory. */ |

73 | |

74 | /* Tree rotations look like this: |

75 | A C |

76 | / \ / \ |

77 | B C A G |

78 | / \ / \ --> / \ |

79 | D E F G B F |

80 | / \ |

81 | D E |

82 | |

83 | In this case, A has been rotated left. This preserves the ordering of the |

84 | binary tree. */ |

85 | |

86 | #include <stdlib.h> |

87 | #include <string.h> |

88 | #include <search.h> |

89 | |

90 | typedef struct node_t |

91 | { |

92 | /* Callers expect this to be the first element in the structure - do not |

93 | move! */ |

94 | const void *key; |

95 | struct node_t *left; |

96 | struct node_t *right; |

97 | unsigned int red:1; |

98 | } *node; |

99 | typedef const struct node_t *const_node; |

100 | |

101 | #undef DEBUGGING |

102 | |

103 | #ifdef DEBUGGING |

104 | |

105 | /* Routines to check tree invariants. */ |

106 | |

107 | #include <assert.h> |

108 | |

109 | #define CHECK_TREE(a) check_tree(a) |

110 | |

111 | static void |

112 | check_tree_recurse (node p, int d_sofar, int d_total) |

113 | { |

114 | if (p == NULL) |

115 | { |

116 | assert (d_sofar == d_total); |

117 | return; |

118 | } |

119 | |

120 | check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total); |

121 | check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total); |

122 | if (p->left) |

123 | assert (!(p->left->red && p->red)); |

124 | if (p->right) |

125 | assert (!(p->right->red && p->red)); |

126 | } |

127 | |

128 | static void |

129 | check_tree (node root) |

130 | { |

131 | int cnt = 0; |

132 | node p; |

133 | if (root == NULL) |

134 | return; |

135 | root->red = 0; |

136 | for(p = root->left; p; p = p->left) |

137 | cnt += !p->red; |

138 | check_tree_recurse (root, 0, cnt); |

139 | } |

140 | |

141 | |

142 | #else |

143 | |

144 | #define CHECK_TREE(a) |

145 | |

146 | #endif |

147 | |

148 | /* Possibly "split" a node with two red successors, and/or fix up two red |

149 | edges in a row. ROOTP is a pointer to the lowest node we visited, PARENTP |

150 | and GPARENTP pointers to its parent/grandparent. P_R and GP_R contain the |

151 | comparison values that determined which way was taken in the tree to reach |

152 | ROOTP. MODE is 1 if we need not do the split, but must check for two red |

153 | edges between GPARENTP and ROOTP. */ |

154 | static void |

155 | maybe_split_for_insert (node *rootp, node *parentp, node *gparentp, |

156 | int p_r, int gp_r, int mode) |

157 | { |

158 | node root = *rootp; |

159 | node *rp, *lp; |

160 | rp = &(*rootp)->right; |

161 | lp = &(*rootp)->left; |

162 | |

163 | /* See if we have to split this node (both successors red). */ |

164 | if (mode == 1 |

165 | || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red)) |

166 | { |

167 | /* This node becomes red, its successors black. */ |

168 | root->red = 1; |

169 | if (*rp) |

170 | (*rp)->red = 0; |

171 | if (*lp) |

172 | (*lp)->red = 0; |

173 | |

174 | /* If the parent of this node is also red, we have to do |

175 | rotations. */ |

176 | if (parentp != NULL && (*parentp)->red) |

177 | { |

178 | node gp = *gparentp; |

179 | node p = *parentp; |

180 | /* There are two main cases: |

181 | 1. The edge types (left or right) of the two red edges differ. |

182 | 2. Both red edges are of the same type. |

183 | There exist two symmetries of each case, so there is a total of |

184 | 4 cases. */ |

185 | if ((p_r > 0) != (gp_r > 0)) |

186 | { |

187 | /* Put the child at the top of the tree, with its parent |

188 | and grandparent as successors. */ |

189 | p->red = 1; |

190 | gp->red = 1; |

191 | root->red = 0; |

192 | if (p_r < 0) |

193 | { |

194 | /* Child is left of parent. */ |

195 | p->left = *rp; |

196 | *rp = p; |

197 | gp->right = *lp; |

198 | *lp = gp; |

199 | } |

200 | else |

201 | { |

202 | /* Child is right of parent. */ |

203 | p->right = *lp; |

204 | *lp = p; |

205 | gp->left = *rp; |

206 | *rp = gp; |

207 | } |

208 | *gparentp = root; |

209 | } |

210 | else |

211 | { |

212 | *gparentp = *parentp; |

213 | /* Parent becomes the top of the tree, grandparent and |

214 | child are its successors. */ |

215 | p->red = 0; |

216 | gp->red = 1; |

217 | if (p_r < 0) |

218 | { |

219 | /* Left edges. */ |

220 | gp->left = p->right; |

221 | p->right = gp; |

222 | } |

223 | else |

224 | { |

225 | /* Right edges. */ |

226 | gp->right = p->left; |

227 | p->left = gp; |

228 | } |

229 | } |

230 | } |

231 | } |

232 | } |

233 | |

234 | /* Find or insert datum into search tree. |

235 | KEY is the key to be located, ROOTP is the address of tree root, |

236 | COMPAR the ordering function. */ |

237 | void * |

238 | __tsearch (const void *key, void **vrootp, __compar_fn_t compar) |

239 | { |

240 | node q; |

241 | node *parentp = NULL, *gparentp = NULL; |

242 | node *rootp = (node *) vrootp; |

243 | node *nextp; |

244 | int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler. */ |

245 | |

246 | if (rootp == NULL) |

247 | return NULL; |

248 | |

249 | /* This saves some additional tests below. */ |

250 | if (*rootp != NULL) |

251 | (*rootp)->red = 0; |

252 | |

253 | CHECK_TREE (*rootp); |

254 | |

255 | nextp = rootp; |

256 | while (*nextp != NULL) |

257 | { |

258 | node root = *rootp; |

259 | r = (*compar) (key, root->key); |

260 | if (r == 0) |

261 | return root; |

262 | |

263 | maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0); |

264 | /* If that did any rotations, parentp and gparentp are now garbage. |

265 | That doesn't matter, because the values they contain are never |

266 | used again in that case. */ |

267 | |

268 | nextp = r < 0 ? &root->left : &root->right; |

269 | if (*nextp == NULL) |

270 | break; |

271 | |

272 | gparentp = parentp; |

273 | parentp = rootp; |

274 | rootp = nextp; |

275 | |

276 | gp_r = p_r; |

277 | p_r = r; |

278 | } |

279 | |

280 | q = (struct node_t *) malloc (sizeof (struct node_t)); |

281 | if (q != NULL) |

282 | { |

283 | *nextp = q; /* link new node to old */ |

284 | q->key = key; /* initialize new node */ |

285 | q->red = 1; |

286 | q->left = q->right = NULL; |

287 | |

288 | if (nextp != rootp) |

289 | /* There may be two red edges in a row now, which we must avoid by |

290 | rotating the tree. */ |

291 | maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1); |

292 | } |

293 | |

294 | return q; |

295 | } |

296 | libc_hidden_def (__tsearch) |

297 | weak_alias (__tsearch, tsearch) |

298 | |

299 | |

300 | /* Find datum in search tree. |

301 | KEY is the key to be located, ROOTP is the address of tree root, |

302 | COMPAR the ordering function. */ |

303 | void * |

304 | __tfind (const void *key, void *const *vrootp, __compar_fn_t compar) |

305 | { |

306 | node *rootp = (node *) vrootp; |

307 | |

308 | if (rootp == NULL) |

309 | return NULL; |

310 | |

311 | CHECK_TREE (*rootp); |

312 | |

313 | while (*rootp != NULL) |

314 | { |

315 | node root = *rootp; |

316 | int r; |

317 | |

318 | r = (*compar) (key, root->key); |

319 | if (r == 0) |

320 | return root; |

321 | |

322 | rootp = r < 0 ? &root->left : &root->right; |

323 | } |

324 | return NULL; |

325 | } |

326 | libc_hidden_def (__tfind) |

327 | weak_alias (__tfind, tfind) |

328 | |

329 | |

330 | /* Delete node with given key. |

331 | KEY is the key to be deleted, ROOTP is the address of the root of tree, |

332 | COMPAR the comparison function. */ |

333 | void * |

334 | __tdelete (const void *key, void **vrootp, __compar_fn_t compar) |

335 | { |

336 | node p, q, r, retval; |

337 | int cmp; |

338 | node *rootp = (node *) vrootp; |

339 | node root, unchained; |

340 | /* Stack of nodes so we remember the parents without recursion. It's |

341 | _very_ unlikely that there are paths longer than 40 nodes. The tree |

342 | would need to have around 250.000 nodes. */ |

343 | int stacksize = 40; |

344 | int sp = 0; |

345 | node **nodestack = alloca (sizeof (node *) * stacksize); |

346 | |

347 | if (rootp == NULL) |

348 | return NULL; |

349 | p = *rootp; |

350 | if (p == NULL) |

351 | return NULL; |

352 | |

353 | CHECK_TREE (p); |

354 | |

355 | while ((cmp = (*compar) (key, (*rootp)->key)) != 0) |

356 | { |

357 | if (sp == stacksize) |

358 | { |

359 | node **newstack; |

360 | stacksize += 20; |

361 | newstack = alloca (sizeof (node *) * stacksize); |

362 | nodestack = memcpy (newstack, nodestack, sp * sizeof (node *)); |

363 | } |

364 | |

365 | nodestack[sp++] = rootp; |

366 | p = *rootp; |

367 | rootp = ((cmp < 0) |

368 | ? &(*rootp)->left |

369 | : &(*rootp)->right); |

370 | if (*rootp == NULL) |

371 | return NULL; |

372 | } |

373 | |

374 | /* This is bogus if the node to be deleted is the root... this routine |

375 | really should return an integer with 0 for success, -1 for failure |

376 | and errno = ESRCH or something. */ |

377 | retval = p; |

378 | |

379 | /* We don't unchain the node we want to delete. Instead, we overwrite |

380 | it with its successor and unchain the successor. If there is no |

381 | successor, we really unchain the node to be deleted. */ |

382 | |

383 | root = *rootp; |

384 | |

385 | r = root->right; |

386 | q = root->left; |

387 | |

388 | if (q == NULL || r == NULL) |

389 | unchained = root; |

390 | else |

391 | { |

392 | node *parent = rootp, *up = &root->right; |

393 | for (;;) |

394 | { |

395 | if (sp == stacksize) |

396 | { |

397 | node **newstack; |

398 | stacksize += 20; |

399 | newstack = alloca (sizeof (node *) * stacksize); |

400 | nodestack = memcpy (newstack, nodestack, sp * sizeof (node *)); |

401 | } |

402 | nodestack[sp++] = parent; |

403 | parent = up; |

404 | if ((*up)->left == NULL) |

405 | break; |

406 | up = &(*up)->left; |

407 | } |

408 | unchained = *up; |

409 | } |

410 | |

411 | /* We know that either the left or right successor of UNCHAINED is NULL. |

412 | R becomes the other one, it is chained into the parent of UNCHAINED. */ |

413 | r = unchained->left; |

414 | if (r == NULL) |

415 | r = unchained->right; |

416 | if (sp == 0) |

417 | *rootp = r; |

418 | else |

419 | { |

420 | q = *nodestack[sp-1]; |

421 | if (unchained == q->right) |

422 | q->right = r; |

423 | else |

424 | q->left = r; |

425 | } |

426 | |

427 | if (unchained != root) |

428 | root->key = unchained->key; |

429 | if (!unchained->red) |

430 | { |

431 | /* Now we lost a black edge, which means that the number of black |

432 | edges on every path is no longer constant. We must balance the |

433 | tree. */ |

434 | /* NODESTACK now contains all parents of R. R is likely to be NULL |

435 | in the first iteration. */ |

436 | /* NULL nodes are considered black throughout - this is necessary for |

437 | correctness. */ |

438 | while (sp > 0 && (r == NULL || !r->red)) |

439 | { |

440 | node *pp = nodestack[sp - 1]; |

441 | p = *pp; |

442 | /* Two symmetric cases. */ |

443 | if (r == p->left) |

444 | { |

445 | /* Q is R's brother, P is R's parent. The subtree with root |

446 | R has one black edge less than the subtree with root Q. */ |

447 | q = p->right; |

448 | if (q->red) |

449 | { |

450 | /* If Q is red, we know that P is black. We rotate P left |

451 | so that Q becomes the top node in the tree, with P below |

452 | it. P is colored red, Q is colored black. |

453 | This action does not change the black edge count for any |

454 | leaf in the tree, but we will be able to recognize one |

455 | of the following situations, which all require that Q |

456 | is black. */ |

457 | q->red = 0; |

458 | p->red = 1; |

459 | /* Left rotate p. */ |

460 | p->right = q->left; |

461 | q->left = p; |

462 | *pp = q; |

463 | /* Make sure pp is right if the case below tries to use |

464 | it. */ |

465 | nodestack[sp++] = pp = &q->left; |

466 | q = p->right; |

467 | } |

468 | /* We know that Q can't be NULL here. We also know that Q is |

469 | black. */ |

470 | if ((q->left == NULL || !q->left->red) |

471 | && (q->right == NULL || !q->right->red)) |

472 | { |

473 | /* Q has two black successors. We can simply color Q red. |

474 | The whole subtree with root P is now missing one black |

475 | edge. Note that this action can temporarily make the |

476 | tree invalid (if P is red). But we will exit the loop |

477 | in that case and set P black, which both makes the tree |

478 | valid and also makes the black edge count come out |

479 | right. If P is black, we are at least one step closer |

480 | to the root and we'll try again the next iteration. */ |

481 | q->red = 1; |

482 | r = p; |

483 | } |

484 | else |

485 | { |

486 | /* Q is black, one of Q's successors is red. We can |

487 | repair the tree with one operation and will exit the |

488 | loop afterwards. */ |

489 | if (q->right == NULL || !q->right->red) |

490 | { |

491 | /* The left one is red. We perform the same action as |

492 | in maybe_split_for_insert where two red edges are |

493 | adjacent but point in different directions: |

494 | Q's left successor (let's call it Q2) becomes the |

495 | top of the subtree we are looking at, its parent (Q) |

496 | and grandparent (P) become its successors. The former |

497 | successors of Q2 are placed below P and Q. |

498 | P becomes black, and Q2 gets the color that P had. |

499 | This changes the black edge count only for node R and |

500 | its successors. */ |

501 | node q2 = q->left; |

502 | q2->red = p->red; |

503 | p->right = q2->left; |

504 | q->left = q2->right; |

505 | q2->right = q; |

506 | q2->left = p; |

507 | *pp = q2; |

508 | p->red = 0; |

509 | } |

510 | else |

511 | { |

512 | /* It's the right one. Rotate P left. P becomes black, |

513 | and Q gets the color that P had. Q's right successor |

514 | also becomes black. This changes the black edge |

515 | count only for node R and its successors. */ |

516 | q->red = p->red; |

517 | p->red = 0; |

518 | |

519 | q->right->red = 0; |

520 | |

521 | /* left rotate p */ |

522 | p->right = q->left; |

523 | q->left = p; |

524 | *pp = q; |

525 | } |

526 | |

527 | /* We're done. */ |

528 | sp = 1; |

529 | r = NULL; |

530 | } |

531 | } |

532 | else |

533 | { |

534 | /* Comments: see above. */ |

535 | q = p->left; |

536 | if (q->red) |

537 | { |

538 | q->red = 0; |

539 | p->red = 1; |

540 | p->left = q->right; |

541 | q->right = p; |

542 | *pp = q; |

543 | nodestack[sp++] = pp = &q->right; |

544 | q = p->left; |

545 | } |

546 | if ((q->right == NULL || !q->right->red) |

547 | && (q->left == NULL || !q->left->red)) |

548 | { |

549 | q->red = 1; |

550 | r = p; |

551 | } |

552 | else |

553 | { |

554 | if (q->left == NULL || !q->left->red) |

555 | { |

556 | node q2 = q->right; |

557 | q2->red = p->red; |

558 | p->left = q2->right; |

559 | q->right = q2->left; |

560 | q2->left = q; |

561 | q2->right = p; |

562 | *pp = q2; |

563 | p->red = 0; |

564 | } |

565 | else |

566 | { |

567 | q->red = p->red; |

568 | p->red = 0; |

569 | q->left->red = 0; |

570 | p->left = q->right; |

571 | q->right = p; |

572 | *pp = q; |

573 | } |

574 | sp = 1; |

575 | r = NULL; |

576 | } |

577 | } |

578 | --sp; |

579 | } |

580 | if (r != NULL) |

581 | r->red = 0; |

582 | } |

583 | |

584 | free (unchained); |

585 | return retval; |

586 | } |

587 | libc_hidden_def (__tdelete) |

588 | weak_alias (__tdelete, tdelete) |

589 | |

590 | |

591 | /* Walk the nodes of a tree. |

592 | ROOT is the root of the tree to be walked, ACTION the function to be |

593 | called at each node. LEVEL is the level of ROOT in the whole tree. */ |

594 | static void |

595 | internal_function |

596 | trecurse (const void *vroot, __action_fn_t action, int level) |

597 | { |

598 | const_node root = (const_node) vroot; |

599 | |

600 | if (root->left == NULL && root->right == NULL) |

601 | (*action) (root, leaf, level); |

602 | else |

603 | { |

604 | (*action) (root, preorder, level); |

605 | if (root->left != NULL) |

606 | trecurse (root->left, action, level + 1); |

607 | (*action) (root, postorder, level); |

608 | if (root->right != NULL) |

609 | trecurse (root->right, action, level + 1); |

610 | (*action) (root, endorder, level); |

611 | } |

612 | } |

613 | |

614 | |

615 | /* Walk the nodes of a tree. |

616 | ROOT is the root of the tree to be walked, ACTION the function to be |

617 | called at each node. */ |

618 | void |

619 | __twalk (const void *vroot, __action_fn_t action) |

620 | { |

621 | const_node root = (const_node) vroot; |

622 | |

623 | CHECK_TREE (root); |

624 | |

625 | if (root != NULL && action != NULL) |

626 | trecurse (root, action, 0); |

627 | } |

628 | libc_hidden_def (__twalk) |

629 | weak_alias (__twalk, twalk) |

630 | |

631 | |

632 | |

633 | /* The standardized functions miss an important functionality: the |

634 | tree cannot be removed easily. We provide a function to do this. */ |

635 | static void |

636 | internal_function |

637 | tdestroy_recurse (node root, __free_fn_t freefct) |

638 | { |

639 | if (root->left != NULL) |

640 | tdestroy_recurse (root->left, freefct); |

641 | if (root->right != NULL) |

642 | tdestroy_recurse (root->right, freefct); |

643 | (*freefct) ((void *) root->key); |

644 | /* Free the node itself. */ |

645 | free (root); |

646 | } |

647 | |

648 | void |

649 | __tdestroy (void *vroot, __free_fn_t freefct) |

650 | { |

651 | node root = (node) vroot; |

652 | |

653 | CHECK_TREE (root); |

654 | |

655 | if (root != NULL) |

656 | tdestroy_recurse (root, freefct); |

657 | } |

658 | weak_alias (__tdestroy, tdestroy) |

659 |