Back to index

glibc  2.9
tsearch.c
Go to the documentation of this file.
00001 /* Copyright (C) 1995, 1996, 1997, 2000, 2006 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 /* Tree search for red/black trees.
00021    The algorithm for adding nodes is taken from one of the many "Algorithms"
00022    books by Robert Sedgewick, although the implementation differs.
00023    The algorithm for deleting nodes can probably be found in a book named
00024    "Introduction to Algorithms" by Cormen/Leiserson/Rivest.  At least that's
00025    the book that my professor took most algorithms from during the "Data
00026    Structures" course...
00027 
00028    Totally public domain.  */
00029 
00030 /* Red/black trees are binary trees in which the edges are colored either red
00031    or black.  They have the following properties:
00032    1. The number of black edges on every path from the root to a leaf is
00033       constant.
00034    2. No two red edges are adjacent.
00035    Therefore there is an upper bound on the length of every path, it's
00036    O(log n) where n is the number of nodes in the tree.  No path can be longer
00037    than 1+2*P where P is the length of the shortest path in the tree.
00038    Useful for the implementation:
00039    3. If one of the children of a node is NULL, then the other one is red
00040       (if it exists).
00041 
00042    In the implementation, not the edges are colored, but the nodes.  The color
00043    interpreted as the color of the edge leading to this node.  The color is
00044    meaningless for the root node, but we color the root node black for
00045    convenience.  All added nodes are red initially.
00046 
00047    Adding to a red/black tree is rather easy.  The right place is searched
00048    with a usual binary tree search.  Additionally, whenever a node N is
00049    reached that has two red successors, the successors are colored black and
00050    the node itself colored red.  This moves red edges up the tree where they
00051    pose less of a problem once we get to really insert the new node.  Changing
00052    N's color to red may violate rule 2, however, so rotations may become
00053    necessary to restore the invariants.  Adding a new red leaf may violate
00054    the same rule, so afterwards an additional check is run and the tree
00055    possibly rotated.
00056 
00057    Deleting is hairy.  There are mainly two nodes involved: the node to be
00058    deleted (n1), and another node that is to be unchained from the tree (n2).
00059    If n1 has a successor (the node with a smallest key that is larger than
00060    n1), then the successor becomes n2 and its contents are copied into n1,
00061    otherwise n1 becomes n2.
00062    Unchaining a node may violate rule 1: if n2 is black, one subtree is
00063    missing one black edge afterwards.  The algorithm must try to move this
00064    error upwards towards the root, so that the subtree that does not have
00065    enough black edges becomes the whole tree.  Once that happens, the error
00066    has disappeared.  It may not be necessary to go all the way up, since it
00067    is possible that rotations and recoloring can fix the error before that.
00068 
00069    Although the deletion algorithm must walk upwards through the tree, we
00070    do not store parent pointers in the nodes.  Instead, delete allocates a
00071    small array of parent pointers and fills it while descending the tree.
00072    Since we know that the length of a path is O(log n), where n is the number
00073    of nodes, this is likely to use less memory.  */
00074 
00075 /* Tree rotations look like this:
00076       A                C
00077      / \              / \
00078     B   C            A   G
00079    / \ / \  -->     / \
00080    D E F G         B   F
00081                   / \
00082                  D   E
00083 
00084    In this case, A has been rotated left.  This preserves the ordering of the
00085    binary tree.  */
00086 
00087 #include <stdlib.h>
00088 #include <string.h>
00089 #include <search.h>
00090 
00091 typedef struct node_t
00092 {
00093   /* Callers expect this to be the first element in the structure - do not
00094      move!  */
00095   const void *key;
00096   struct node_t *left;
00097   struct node_t *right;
00098   unsigned int red:1;
00099 } *node;
00100 typedef const struct node_t *const_node;
00101 
00102 #undef DEBUGGING
00103 
00104 #ifdef DEBUGGING
00105 
00106 /* Routines to check tree invariants.  */
00107 
00108 #include <assert.h>
00109 
00110 #define CHECK_TREE(a) check_tree(a)
00111 
00112 static void
00113 check_tree_recurse (node p, int d_sofar, int d_total)
00114 {
00115   if (p == NULL)
00116     {
00117       assert (d_sofar == d_total);
00118       return;
00119     }
00120 
00121   check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total);
00122   check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total);
00123   if (p->left)
00124     assert (!(p->left->red && p->red));
00125   if (p->right)
00126     assert (!(p->right->red && p->red));
00127 }
00128 
00129 static void
00130 check_tree (node root)
00131 {
00132   int cnt = 0;
00133   node p;
00134   if (root == NULL)
00135     return;
00136   root->red = 0;
00137   for(p = root->left; p; p = p->left)
00138     cnt += !p->red;
00139   check_tree_recurse (root, 0, cnt);
00140 }
00141 
00142 
00143 #else
00144 
00145 #define CHECK_TREE(a)
00146 
00147 #endif
00148 
00149 /* Possibly "split" a node with two red successors, and/or fix up two red
00150    edges in a row.  ROOTP is a pointer to the lowest node we visited, PARENTP
00151    and GPARENTP pointers to its parent/grandparent.  P_R and GP_R contain the
00152    comparison values that determined which way was taken in the tree to reach
00153    ROOTP.  MODE is 1 if we need not do the split, but must check for two red
00154    edges between GPARENTP and ROOTP.  */
00155 static void
00156 maybe_split_for_insert (node *rootp, node *parentp, node *gparentp,
00157                      int p_r, int gp_r, int mode)
00158 {
00159   node root = *rootp;
00160   node *rp, *lp;
00161   rp = &(*rootp)->right;
00162   lp = &(*rootp)->left;
00163 
00164   /* See if we have to split this node (both successors red).  */
00165   if (mode == 1
00166       || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red))
00167     {
00168       /* This node becomes red, its successors black.  */
00169       root->red = 1;
00170       if (*rp)
00171        (*rp)->red = 0;
00172       if (*lp)
00173        (*lp)->red = 0;
00174 
00175       /* If the parent of this node is also red, we have to do
00176         rotations.  */
00177       if (parentp != NULL && (*parentp)->red)
00178        {
00179          node gp = *gparentp;
00180          node p = *parentp;
00181          /* There are two main cases:
00182             1. The edge types (left or right) of the two red edges differ.
00183             2. Both red edges are of the same type.
00184             There exist two symmetries of each case, so there is a total of
00185             4 cases.  */
00186          if ((p_r > 0) != (gp_r > 0))
00187            {
00188              /* Put the child at the top of the tree, with its parent
00189                and grandparent as successors.  */
00190              p->red = 1;
00191              gp->red = 1;
00192              root->red = 0;
00193              if (p_r < 0)
00194               {
00195                 /* Child is left of parent.  */
00196                 p->left = *rp;
00197                 *rp = p;
00198                 gp->right = *lp;
00199                 *lp = gp;
00200               }
00201              else
00202               {
00203                 /* Child is right of parent.  */
00204                 p->right = *lp;
00205                 *lp = p;
00206                 gp->left = *rp;
00207                 *rp = gp;
00208               }
00209              *gparentp = root;
00210            }
00211          else
00212            {
00213              *gparentp = *parentp;
00214              /* Parent becomes the top of the tree, grandparent and
00215                child are its successors.  */
00216              p->red = 0;
00217              gp->red = 1;
00218              if (p_r < 0)
00219               {
00220                 /* Left edges.  */
00221                 gp->left = p->right;
00222                 p->right = gp;
00223               }
00224              else
00225               {
00226                 /* Right edges.  */
00227                 gp->right = p->left;
00228                 p->left = gp;
00229               }
00230            }
00231        }
00232     }
00233 }
00234 
00235 /* Find or insert datum into search tree.
00236    KEY is the key to be located, ROOTP is the address of tree root,
00237    COMPAR the ordering function.  */
00238 void *
00239 __tsearch (const void *key, void **vrootp, __compar_fn_t compar)
00240 {
00241   node q;
00242   node *parentp = NULL, *gparentp = NULL;
00243   node *rootp = (node *) vrootp;
00244   node *nextp;
00245   int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler.  */
00246 
00247   if (rootp == NULL)
00248     return NULL;
00249 
00250   /* This saves some additional tests below.  */
00251   if (*rootp != NULL)
00252     (*rootp)->red = 0;
00253 
00254   CHECK_TREE (*rootp);
00255 
00256   nextp = rootp;
00257   while (*nextp != NULL)
00258     {
00259       node root = *rootp;
00260       r = (*compar) (key, root->key);
00261       if (r == 0)
00262        return root;
00263 
00264       maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0);
00265       /* If that did any rotations, parentp and gparentp are now garbage.
00266         That doesn't matter, because the values they contain are never
00267         used again in that case.  */
00268 
00269       nextp = r < 0 ? &root->left : &root->right;
00270       if (*nextp == NULL)
00271        break;
00272 
00273       gparentp = parentp;
00274       parentp = rootp;
00275       rootp = nextp;
00276 
00277       gp_r = p_r;
00278       p_r = r;
00279     }
00280 
00281   q = (struct node_t *) malloc (sizeof (struct node_t));
00282   if (q != NULL)
00283     {
00284       *nextp = q;                  /* link new node to old */
00285       q->key = key;                /* initialize new node */
00286       q->red = 1;
00287       q->left = q->right = NULL;
00288 
00289       if (nextp != rootp)
00290        /* There may be two red edges in a row now, which we must avoid by
00291           rotating the tree.  */
00292        maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1);
00293     }
00294 
00295   return q;
00296 }
00297 weak_alias (__tsearch, tsearch)
00298 
00299 
00300 /* Find datum in search tree.
00301    KEY is the key to be located, ROOTP is the address of tree root,
00302    COMPAR the ordering function.  */
00303 void *
00304 __tfind (key, vrootp, compar)
00305      const void *key;
00306      void *const *vrootp;
00307      __compar_fn_t compar;
00308 {
00309   node *rootp = (node *) vrootp;
00310 
00311   if (rootp == NULL)
00312     return NULL;
00313 
00314   CHECK_TREE (*rootp);
00315 
00316   while (*rootp != NULL)
00317     {
00318       node root = *rootp;
00319       int r;
00320 
00321       r = (*compar) (key, root->key);
00322       if (r == 0)
00323        return root;
00324 
00325       rootp = r < 0 ? &root->left : &root->right;
00326     }
00327   return NULL;
00328 }
00329 weak_alias (__tfind, tfind)
00330 
00331 
00332 /* Delete node with given key.
00333    KEY is the key to be deleted, ROOTP is the address of the root of tree,
00334    COMPAR the comparison function.  */
00335 void *
00336 __tdelete (const void *key, void **vrootp, __compar_fn_t compar)
00337 {
00338   node p, q, r, retval;
00339   int cmp;
00340   node *rootp = (node *) vrootp;
00341   node root, unchained;
00342   /* Stack of nodes so we remember the parents without recursion.  It's
00343      _very_ unlikely that there are paths longer than 40 nodes.  The tree
00344      would need to have around 250.000 nodes.  */
00345   int stacksize = 40;
00346   int sp = 0;
00347   node **nodestack = alloca (sizeof (node *) * stacksize);
00348 
00349   if (rootp == NULL)
00350     return NULL;
00351   p = *rootp;
00352   if (p == NULL)
00353     return NULL;
00354 
00355   CHECK_TREE (p);
00356 
00357   while ((cmp = (*compar) (key, (*rootp)->key)) != 0)
00358     {
00359       if (sp == stacksize)
00360        {
00361          node **newstack;
00362          stacksize += 20;
00363          newstack = alloca (sizeof (node *) * stacksize);
00364          nodestack = memcpy (newstack, nodestack, sp * sizeof (node *));
00365        }
00366 
00367       nodestack[sp++] = rootp;
00368       p = *rootp;
00369       rootp = ((cmp < 0)
00370               ? &(*rootp)->left
00371               : &(*rootp)->right);
00372       if (*rootp == NULL)
00373        return NULL;
00374     }
00375 
00376   /* This is bogus if the node to be deleted is the root... this routine
00377      really should return an integer with 0 for success, -1 for failure
00378      and errno = ESRCH or something.  */
00379   retval = p;
00380 
00381   /* We don't unchain the node we want to delete. Instead, we overwrite
00382      it with its successor and unchain the successor.  If there is no
00383      successor, we really unchain the node to be deleted.  */
00384 
00385   root = *rootp;
00386 
00387   r = root->right;
00388   q = root->left;
00389 
00390   if (q == NULL || r == NULL)
00391     unchained = root;
00392   else
00393     {
00394       node *parent = rootp, *up = &root->right;
00395       for (;;)
00396        {
00397          if (sp == stacksize)
00398            {
00399              node **newstack;
00400              stacksize += 20;
00401              newstack = alloca (sizeof (node *) * stacksize);
00402              nodestack = memcpy (newstack, nodestack, sp * sizeof (node *));
00403            }
00404          nodestack[sp++] = parent;
00405          parent = up;
00406          if ((*up)->left == NULL)
00407            break;
00408          up = &(*up)->left;
00409        }
00410       unchained = *up;
00411     }
00412 
00413   /* We know that either the left or right successor of UNCHAINED is NULL.
00414      R becomes the other one, it is chained into the parent of UNCHAINED.  */
00415   r = unchained->left;
00416   if (r == NULL)
00417     r = unchained->right;
00418   if (sp == 0)
00419     *rootp = r;
00420   else
00421     {
00422       q = *nodestack[sp-1];
00423       if (unchained == q->right)
00424        q->right = r;
00425       else
00426        q->left = r;
00427     }
00428 
00429   if (unchained != root)
00430     root->key = unchained->key;
00431   if (!unchained->red)
00432     {
00433       /* Now we lost a black edge, which means that the number of black
00434         edges on every path is no longer constant.  We must balance the
00435         tree.  */
00436       /* NODESTACK now contains all parents of R.  R is likely to be NULL
00437         in the first iteration.  */
00438       /* NULL nodes are considered black throughout - this is necessary for
00439         correctness.  */
00440       while (sp > 0 && (r == NULL || !r->red))
00441        {
00442          node *pp = nodestack[sp - 1];
00443          p = *pp;
00444          /* Two symmetric cases.  */
00445          if (r == p->left)
00446            {
00447              /* Q is R's brother, P is R's parent.  The subtree with root
00448                R has one black edge less than the subtree with root Q.  */
00449              q = p->right;
00450              if (q->red)
00451               {
00452                 /* If Q is red, we know that P is black. We rotate P left
00453                    so that Q becomes the top node in the tree, with P below
00454                    it.  P is colored red, Q is colored black.
00455                    This action does not change the black edge count for any
00456                    leaf in the tree, but we will be able to recognize one
00457                    of the following situations, which all require that Q
00458                    is black.  */
00459                 q->red = 0;
00460                 p->red = 1;
00461                 /* Left rotate p.  */
00462                 p->right = q->left;
00463                 q->left = p;
00464                 *pp = q;
00465                 /* Make sure pp is right if the case below tries to use
00466                    it.  */
00467                 nodestack[sp++] = pp = &q->left;
00468                 q = p->right;
00469               }
00470              /* We know that Q can't be NULL here.  We also know that Q is
00471                black.  */
00472              if ((q->left == NULL || !q->left->red)
00473                 && (q->right == NULL || !q->right->red))
00474               {
00475                 /* Q has two black successors.  We can simply color Q red.
00476                    The whole subtree with root P is now missing one black
00477                    edge.  Note that this action can temporarily make the
00478                    tree invalid (if P is red).  But we will exit the loop
00479                    in that case and set P black, which both makes the tree
00480                    valid and also makes the black edge count come out
00481                    right.  If P is black, we are at least one step closer
00482                    to the root and we'll try again the next iteration.  */
00483                 q->red = 1;
00484                 r = p;
00485               }
00486              else
00487               {
00488                 /* Q is black, one of Q's successors is red.  We can
00489                    repair the tree with one operation and will exit the
00490                    loop afterwards.  */
00491                 if (q->right == NULL || !q->right->red)
00492                   {
00493                     /* The left one is red.  We perform the same action as
00494                       in maybe_split_for_insert where two red edges are
00495                       adjacent but point in different directions:
00496                       Q's left successor (let's call it Q2) becomes the
00497                       top of the subtree we are looking at, its parent (Q)
00498                       and grandparent (P) become its successors. The former
00499                       successors of Q2 are placed below P and Q.
00500                       P becomes black, and Q2 gets the color that P had.
00501                       This changes the black edge count only for node R and
00502                       its successors.  */
00503                     node q2 = q->left;
00504                     q2->red = p->red;
00505                     p->right = q2->left;
00506                     q->left = q2->right;
00507                     q2->right = q;
00508                     q2->left = p;
00509                     *pp = q2;
00510                     p->red = 0;
00511                   }
00512                 else
00513                   {
00514                     /* It's the right one.  Rotate P left. P becomes black,
00515                       and Q gets the color that P had.  Q's right successor
00516                       also becomes black.  This changes the black edge
00517                       count only for node R and its successors.  */
00518                     q->red = p->red;
00519                     p->red = 0;
00520 
00521                     q->right->red = 0;
00522 
00523                     /* left rotate p */
00524                     p->right = q->left;
00525                     q->left = p;
00526                     *pp = q;
00527                   }
00528 
00529                 /* We're done.  */
00530                 sp = 1;
00531                 r = NULL;
00532               }
00533            }
00534          else
00535            {
00536              /* Comments: see above.  */
00537              q = p->left;
00538              if (q->red)
00539               {
00540                 q->red = 0;
00541                 p->red = 1;
00542                 p->left = q->right;
00543                 q->right = p;
00544                 *pp = q;
00545                 nodestack[sp++] = pp = &q->right;
00546                 q = p->left;
00547               }
00548              if ((q->right == NULL || !q->right->red)
00549                      && (q->left == NULL || !q->left->red))
00550               {
00551                 q->red = 1;
00552                 r = p;
00553               }
00554              else
00555               {
00556                 if (q->left == NULL || !q->left->red)
00557                   {
00558                     node q2 = q->right;
00559                     q2->red = p->red;
00560                     p->left = q2->right;
00561                     q->right = q2->left;
00562                     q2->left = q;
00563                     q2->right = p;
00564                     *pp = q2;
00565                     p->red = 0;
00566                   }
00567                 else
00568                   {
00569                     q->red = p->red;
00570                     p->red = 0;
00571                     q->left->red = 0;
00572                     p->left = q->right;
00573                     q->right = p;
00574                     *pp = q;
00575                   }
00576                 sp = 1;
00577                 r = NULL;
00578               }
00579            }
00580          --sp;
00581        }
00582       if (r != NULL)
00583        r->red = 0;
00584     }
00585 
00586   free (unchained);
00587   return retval;
00588 }
00589 weak_alias (__tdelete, tdelete)
00590 
00591 
00592 /* Walk the nodes of a tree.
00593    ROOT is the root of the tree to be walked, ACTION the function to be
00594    called at each node.  LEVEL is the level of ROOT in the whole tree.  */
00595 static void
00596 internal_function
00597 trecurse (const void *vroot, __action_fn_t action, int level)
00598 {
00599   const_node root = (const_node) vroot;
00600 
00601   if (root->left == NULL && root->right == NULL)
00602     (*action) (root, leaf, level);
00603   else
00604     {
00605       (*action) (root, preorder, level);
00606       if (root->left != NULL)
00607        trecurse (root->left, action, level + 1);
00608       (*action) (root, postorder, level);
00609       if (root->right != NULL)
00610        trecurse (root->right, action, level + 1);
00611       (*action) (root, endorder, level);
00612     }
00613 }
00614 
00615 
00616 /* Walk the nodes of a tree.
00617    ROOT is the root of the tree to be walked, ACTION the function to be
00618    called at each node.  */
00619 void
00620 __twalk (const void *vroot, __action_fn_t action)
00621 {
00622   const_node root = (const_node) vroot;
00623 
00624   CHECK_TREE (root);
00625 
00626   if (root != NULL && action != NULL)
00627     trecurse (root, action, 0);
00628 }
00629 weak_alias (__twalk, twalk)
00630 
00631 
00632 
00633 /* The standardized functions miss an important functionality: the
00634    tree cannot be removed easily.  We provide a function to do this.  */
00635 static void
00636 internal_function
00637 tdestroy_recurse (node root, __free_fn_t freefct)
00638 {
00639   if (root->left != NULL)
00640     tdestroy_recurse (root->left, freefct);
00641   if (root->right != NULL)
00642     tdestroy_recurse (root->right, freefct);
00643   (*freefct) ((void *) root->key);
00644   /* Free the node itself.  */
00645   free (root);
00646 }
00647 
00648 void
00649 __tdestroy (void *vroot, __free_fn_t freefct)
00650 {
00651   node root = (node) vroot;
00652 
00653   CHECK_TREE (root);
00654 
00655   if (root != NULL)
00656     tdestroy_recurse (root, freefct);
00657 }
00658 weak_alias (__tdestroy, tdestroy)