Back to index

courier  0.68.2
tsearch.c
Go to the documentation of this file.
00001 /* Copyright (C) 1995, 1996, 1997, 2000, 2006 Free Software Foundation, Inc.
00002    Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997.
00003 
00004    NOTE: The canonical source of this file is maintained with the GNU C
00005    Library.  Bugs can be reported to bug-glibc@gnu.org.
00006 
00007    This program is free software; you can redistribute it and/or modify it
00008    under the terms of the GNU Library General Public License as published
00009    by the Free Software Foundation; either version 2, or (at your option)
00010    any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Library General Public License for more details.
00016 
00017    You should have received a copy of the GNU Library General Public
00018    License along with this program; if not, write to the Free Software
00019    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
00020    USA.  */
00021 
00022 /* Tree search for red/black trees.
00023    The algorithm for adding nodes is taken from one of the many "Algorithms"
00024    books by Robert Sedgewick, although the implementation differs.
00025    The algorithm for deleting nodes can probably be found in a book named
00026    "Introduction to Algorithms" by Cormen/Leiserson/Rivest.  At least that's
00027    the book that my professor took most algorithms from during the "Data
00028    Structures" course...
00029 
00030    Totally public domain.  */
00031 
00032 /* Red/black trees are binary trees in which the edges are colored either red
00033    or black.  They have the following properties:
00034    1. The number of black edges on every path from the root to a leaf is
00035       constant.
00036    2. No two red edges are adjacent.
00037    Therefore there is an upper bound on the length of every path, it's
00038    O(log n) where n is the number of nodes in the tree.  No path can be longer
00039    than 1+2*P where P is the length of the shortest path in the tree.
00040    Useful for the implementation:
00041    3. If one of the children of a node is NULL, then the other one is red
00042       (if it exists).
00043 
00044    In the implementation, not the edges are colored, but the nodes.  The color
00045    interpreted as the color of the edge leading to this node.  The color is
00046    meaningless for the root node, but we color the root node black for
00047    convenience.  All added nodes are red initially.
00048 
00049    Adding to a red/black tree is rather easy.  The right place is searched
00050    with a usual binary tree search.  Additionally, whenever a node N is
00051    reached that has two red successors, the successors are colored black and
00052    the node itself colored red.  This moves red edges up the tree where they
00053    pose less of a problem once we get to really insert the new node.  Changing
00054    N's color to red may violate rule 2, however, so rotations may become
00055    necessary to restore the invariants.  Adding a new red leaf may violate
00056    the same rule, so afterwards an additional check is run and the tree
00057    possibly rotated.
00058 
00059    Deleting is hairy.  There are mainly two nodes involved: the node to be
00060    deleted (n1), and another node that is to be unchained from the tree (n2).
00061    If n1 has a successor (the node with a smallest key that is larger than
00062    n1), then the successor becomes n2 and its contents are copied into n1,
00063    otherwise n1 becomes n2.
00064    Unchaining a node may violate rule 1: if n2 is black, one subtree is
00065    missing one black edge afterwards.  The algorithm must try to move this
00066    error upwards towards the root, so that the subtree that does not have
00067    enough black edges becomes the whole tree.  Once that happens, the error
00068    has disappeared.  It may not be necessary to go all the way up, since it
00069    is possible that rotations and recoloring can fix the error before that.
00070 
00071    Although the deletion algorithm must walk upwards through the tree, we
00072    do not store parent pointers in the nodes.  Instead, delete allocates a
00073    small array of parent pointers and fills it while descending the tree.
00074    Since we know that the length of a path is O(log n), where n is the number
00075    of nodes, this is likely to use less memory.  */
00076 
00077 /* Tree rotations look like this:
00078       A                C
00079      / \              / \
00080     B   C            A   G
00081    / \ / \  -->     / \
00082    D E F G         B   F
00083                   / \
00084                  D   E
00085 
00086    In this case, A has been rotated left.  This preserves the ordering of the
00087    binary tree.  */
00088 
00089 #include <config.h>
00090 
00091 /* Specification.  */
00092 #ifdef IN_LIBINTL
00093 # include "tsearch.h"
00094 #else
00095 # include <search.h>
00096 #endif
00097 
00098 #include <stdlib.h>
00099 
00100 typedef int (*__compar_fn_t) (const void *, const void *);
00101 typedef void (*__action_fn_t) (const void *, VISIT, int);
00102 
00103 #ifndef weak_alias
00104 # define __tsearch tsearch
00105 # define __tfind tfind
00106 # define __tdelete tdelete
00107 # define __twalk twalk
00108 #endif
00109 
00110 #ifndef internal_function
00111 /* Inside GNU libc we mark some function in a special way.  In other
00112    environments simply ignore the marking.  */
00113 # define internal_function
00114 #endif
00115 
00116 typedef struct node_t
00117 {
00118   /* Callers expect this to be the first element in the structure - do not
00119      move!  */
00120   const void *key;
00121   struct node_t *left;
00122   struct node_t *right;
00123   unsigned int red:1;
00124 } *node;
00125 typedef const struct node_t *const_node;
00126 
00127 #undef DEBUGGING
00128 
00129 #ifdef DEBUGGING
00130 
00131 /* Routines to check tree invariants.  */
00132 
00133 #include <assert.h>
00134 
00135 #define CHECK_TREE(a) check_tree(a)
00136 
00137 static void
00138 check_tree_recurse (node p, int d_sofar, int d_total)
00139 {
00140   if (p == NULL)
00141     {
00142       assert (d_sofar == d_total);
00143       return;
00144     }
00145 
00146   check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total);
00147   check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total);
00148   if (p->left)
00149     assert (!(p->left->red && p->red));
00150   if (p->right)
00151     assert (!(p->right->red && p->red));
00152 }
00153 
00154 static void
00155 check_tree (node root)
00156 {
00157   int cnt = 0;
00158   node p;
00159   if (root == NULL)
00160     return;
00161   root->red = 0;
00162   for(p = root->left; p; p = p->left)
00163     cnt += !p->red;
00164   check_tree_recurse (root, 0, cnt);
00165 }
00166 
00167 
00168 #else
00169 
00170 #define CHECK_TREE(a)
00171 
00172 #endif
00173 
00174 /* Possibly "split" a node with two red successors, and/or fix up two red
00175    edges in a row.  ROOTP is a pointer to the lowest node we visited, PARENTP
00176    and GPARENTP pointers to its parent/grandparent.  P_R and GP_R contain the
00177    comparison values that determined which way was taken in the tree to reach
00178    ROOTP.  MODE is 1 if we need not do the split, but must check for two red
00179    edges between GPARENTP and ROOTP.  */
00180 static void
00181 maybe_split_for_insert (node *rootp, node *parentp, node *gparentp,
00182                         int p_r, int gp_r, int mode)
00183 {
00184   node root = *rootp;
00185   node *rp, *lp;
00186   rp = &(*rootp)->right;
00187   lp = &(*rootp)->left;
00188 
00189   /* See if we have to split this node (both successors red).  */
00190   if (mode == 1
00191       || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red))
00192     {
00193       /* This node becomes red, its successors black.  */
00194       root->red = 1;
00195       if (*rp)
00196         (*rp)->red = 0;
00197       if (*lp)
00198         (*lp)->red = 0;
00199 
00200       /* If the parent of this node is also red, we have to do
00201          rotations.  */
00202       if (parentp != NULL && (*parentp)->red)
00203         {
00204           node gp = *gparentp;
00205           node p = *parentp;
00206           /* There are two main cases:
00207              1. The edge types (left or right) of the two red edges differ.
00208              2. Both red edges are of the same type.
00209              There exist two symmetries of each case, so there is a total of
00210              4 cases.  */
00211           if ((p_r > 0) != (gp_r > 0))
00212             {
00213               /* Put the child at the top of the tree, with its parent
00214                  and grandparent as successors.  */
00215               p->red = 1;
00216               gp->red = 1;
00217               root->red = 0;
00218               if (p_r < 0)
00219                 {
00220                   /* Child is left of parent.  */
00221                   p->left = *rp;
00222                   *rp = p;
00223                   gp->right = *lp;
00224                   *lp = gp;
00225                 }
00226               else
00227                 {
00228                   /* Child is right of parent.  */
00229                   p->right = *lp;
00230                   *lp = p;
00231                   gp->left = *rp;
00232                   *rp = gp;
00233                 }
00234               *gparentp = root;
00235             }
00236           else
00237             {
00238               *gparentp = *parentp;
00239               /* Parent becomes the top of the tree, grandparent and
00240                  child are its successors.  */
00241               p->red = 0;
00242               gp->red = 1;
00243               if (p_r < 0)
00244                 {
00245                   /* Left edges.  */
00246                   gp->left = p->right;
00247                   p->right = gp;
00248                 }
00249               else
00250                 {
00251                   /* Right edges.  */
00252                   gp->right = p->left;
00253                   p->left = gp;
00254                 }
00255             }
00256         }
00257     }
00258 }
00259 
00260 /* Find or insert datum into search tree.
00261    KEY is the key to be located, ROOTP is the address of tree root,
00262    COMPAR the ordering function.  */
00263 void *
00264 __tsearch (const void *key, void **vrootp, __compar_fn_t compar)
00265 {
00266   node q;
00267   node *parentp = NULL, *gparentp = NULL;
00268   node *rootp = (node *) vrootp;
00269   node *nextp;
00270   int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler.  */
00271 
00272   if (rootp == NULL)
00273     return NULL;
00274 
00275   /* This saves some additional tests below.  */
00276   if (*rootp != NULL)
00277     (*rootp)->red = 0;
00278 
00279   CHECK_TREE (*rootp);
00280 
00281   nextp = rootp;
00282   while (*nextp != NULL)
00283     {
00284       node root = *rootp;
00285       r = (*compar) (key, root->key);
00286       if (r == 0)
00287         return root;
00288 
00289       maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0);
00290       /* If that did any rotations, parentp and gparentp are now garbage.
00291          That doesn't matter, because the values they contain are never
00292          used again in that case.  */
00293 
00294       nextp = r < 0 ? &root->left : &root->right;
00295       if (*nextp == NULL)
00296         break;
00297 
00298       gparentp = parentp;
00299       parentp = rootp;
00300       rootp = nextp;
00301 
00302       gp_r = p_r;
00303       p_r = r;
00304     }
00305 
00306   q = (struct node_t *) malloc (sizeof (struct node_t));
00307   if (q != NULL)
00308     {
00309       *nextp = q;                       /* link new node to old */
00310       q->key = key;                     /* initialize new node */
00311       q->red = 1;
00312       q->left = q->right = NULL;
00313 
00314       if (nextp != rootp)
00315         /* There may be two red edges in a row now, which we must avoid by
00316            rotating the tree.  */
00317         maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1);
00318     }
00319 
00320   return q;
00321 }
00322 #ifdef weak_alias
00323 weak_alias (__tsearch, tsearch)
00324 #endif
00325 
00326 
00327 /* Find datum in search tree.
00328    KEY is the key to be located, ROOTP is the address of tree root,
00329    COMPAR the ordering function.  */
00330 void *
00331 __tfind (key, vrootp, compar)
00332      const void *key;
00333      void *const *vrootp;
00334      __compar_fn_t compar;
00335 {
00336   node *rootp = (node *) vrootp;
00337 
00338   if (rootp == NULL)
00339     return NULL;
00340 
00341   CHECK_TREE (*rootp);
00342 
00343   while (*rootp != NULL)
00344     {
00345       node root = *rootp;
00346       int r;
00347 
00348       r = (*compar) (key, root->key);
00349       if (r == 0)
00350         return root;
00351 
00352       rootp = r < 0 ? &root->left : &root->right;
00353     }
00354   return NULL;
00355 }
00356 #ifdef weak_alias
00357 weak_alias (__tfind, tfind)
00358 #endif
00359 
00360 
00361 /* Delete node with given key.
00362    KEY is the key to be deleted, ROOTP is the address of the root of tree,
00363    COMPAR the comparison function.  */
00364 void *
00365 __tdelete (const void *key, void **vrootp, __compar_fn_t compar)
00366 {
00367   node p, q, r, retval;
00368   int cmp;
00369   node *rootp = (node *) vrootp;
00370   node root, unchained;
00371   /* Stack of nodes so we remember the parents without recursion.  It's
00372      _very_ unlikely that there are paths longer than 40 nodes.  The tree
00373      would need to have around 250.000 nodes.  */
00374   int stacksize = 100;
00375   int sp = 0;
00376   node *nodestack[100];
00377 
00378   if (rootp == NULL)
00379     return NULL;
00380   p = *rootp;
00381   if (p == NULL)
00382     return NULL;
00383 
00384   CHECK_TREE (p);
00385 
00386   while ((cmp = (*compar) (key, (*rootp)->key)) != 0)
00387     {
00388       if (sp == stacksize)
00389         abort ();
00390 
00391       nodestack[sp++] = rootp;
00392       p = *rootp;
00393       rootp = ((cmp < 0)
00394                ? &(*rootp)->left
00395                : &(*rootp)->right);
00396       if (*rootp == NULL)
00397         return NULL;
00398     }
00399 
00400   /* This is bogus if the node to be deleted is the root... this routine
00401      really should return an integer with 0 for success, -1 for failure
00402      and errno = ESRCH or something.  */
00403   retval = p;
00404 
00405   /* We don't unchain the node we want to delete. Instead, we overwrite
00406      it with its successor and unchain the successor.  If there is no
00407      successor, we really unchain the node to be deleted.  */
00408 
00409   root = *rootp;
00410 
00411   r = root->right;
00412   q = root->left;
00413 
00414   if (q == NULL || r == NULL)
00415     unchained = root;
00416   else
00417     {
00418       node *parent = rootp, *up = &root->right;
00419       for (;;)
00420         {
00421           if (sp == stacksize)
00422             abort ();
00423           nodestack[sp++] = parent;
00424           parent = up;
00425           if ((*up)->left == NULL)
00426             break;
00427           up = &(*up)->left;
00428         }
00429       unchained = *up;
00430     }
00431 
00432   /* We know that either the left or right successor of UNCHAINED is NULL.
00433      R becomes the other one, it is chained into the parent of UNCHAINED.  */
00434   r = unchained->left;
00435   if (r == NULL)
00436     r = unchained->right;
00437   if (sp == 0)
00438     *rootp = r;
00439   else
00440     {
00441       q = *nodestack[sp-1];
00442       if (unchained == q->right)
00443         q->right = r;
00444       else
00445         q->left = r;
00446     }
00447 
00448   if (unchained != root)
00449     root->key = unchained->key;
00450   if (!unchained->red)
00451     {
00452       /* Now we lost a black edge, which means that the number of black
00453          edges on every path is no longer constant.  We must balance the
00454          tree.  */
00455       /* NODESTACK now contains all parents of R.  R is likely to be NULL
00456          in the first iteration.  */
00457       /* NULL nodes are considered black throughout - this is necessary for
00458          correctness.  */
00459       while (sp > 0 && (r == NULL || !r->red))
00460         {
00461           node *pp = nodestack[sp - 1];
00462           p = *pp;
00463           /* Two symmetric cases.  */
00464           if (r == p->left)
00465             {
00466               /* Q is R's brother, P is R's parent.  The subtree with root
00467                  R has one black edge less than the subtree with root Q.  */
00468               q = p->right;
00469               if (q->red)
00470                 {
00471                   /* If Q is red, we know that P is black. We rotate P left
00472                      so that Q becomes the top node in the tree, with P below
00473                      it.  P is colored red, Q is colored black.
00474                      This action does not change the black edge count for any
00475                      leaf in the tree, but we will be able to recognize one
00476                      of the following situations, which all require that Q
00477                      is black.  */
00478                   q->red = 0;
00479                   p->red = 1;
00480                   /* Left rotate p.  */
00481                   p->right = q->left;
00482                   q->left = p;
00483                   *pp = q;
00484                   /* Make sure pp is right if the case below tries to use
00485                      it.  */
00486                   nodestack[sp++] = pp = &q->left;
00487                   q = p->right;
00488                 }
00489               /* We know that Q can't be NULL here.  We also know that Q is
00490                  black.  */
00491               if ((q->left == NULL || !q->left->red)
00492                   && (q->right == NULL || !q->right->red))
00493                 {
00494                   /* Q has two black successors.  We can simply color Q red.
00495                      The whole subtree with root P is now missing one black
00496                      edge.  Note that this action can temporarily make the
00497                      tree invalid (if P is red).  But we will exit the loop
00498                      in that case and set P black, which both makes the tree
00499                      valid and also makes the black edge count come out
00500                      right.  If P is black, we are at least one step closer
00501                      to the root and we'll try again the next iteration.  */
00502                   q->red = 1;
00503                   r = p;
00504                 }
00505               else
00506                 {
00507                   /* Q is black, one of Q's successors is red.  We can
00508                      repair the tree with one operation and will exit the
00509                      loop afterwards.  */
00510                   if (q->right == NULL || !q->right->red)
00511                     {
00512                       /* The left one is red.  We perform the same action as
00513                          in maybe_split_for_insert where two red edges are
00514                          adjacent but point in different directions:
00515                          Q's left successor (let's call it Q2) becomes the
00516                          top of the subtree we are looking at, its parent (Q)
00517                          and grandparent (P) become its successors. The former
00518                          successors of Q2 are placed below P and Q.
00519                          P becomes black, and Q2 gets the color that P had.
00520                          This changes the black edge count only for node R and
00521                          its successors.  */
00522                       node q2 = q->left;
00523                       q2->red = p->red;
00524                       p->right = q2->left;
00525                       q->left = q2->right;
00526                       q2->right = q;
00527                       q2->left = p;
00528                       *pp = q2;
00529                       p->red = 0;
00530                     }
00531                   else
00532                     {
00533                       /* It's the right one.  Rotate P left. P becomes black,
00534                          and Q gets the color that P had.  Q's right successor
00535                          also becomes black.  This changes the black edge
00536                          count only for node R and its successors.  */
00537                       q->red = p->red;
00538                       p->red = 0;
00539 
00540                       q->right->red = 0;
00541 
00542                       /* left rotate p */
00543                       p->right = q->left;
00544                       q->left = p;
00545                       *pp = q;
00546                     }
00547 
00548                   /* We're done.  */
00549                   sp = 1;
00550                   r = NULL;
00551                 }
00552             }
00553           else
00554             {
00555               /* Comments: see above.  */
00556               q = p->left;
00557               if (q->red)
00558                 {
00559                   q->red = 0;
00560                   p->red = 1;
00561                   p->left = q->right;
00562                   q->right = p;
00563                   *pp = q;
00564                   nodestack[sp++] = pp = &q->right;
00565                   q = p->left;
00566                 }
00567               if ((q->right == NULL || !q->right->red)
00568                        && (q->left == NULL || !q->left->red))
00569                 {
00570                   q->red = 1;
00571                   r = p;
00572                 }
00573               else
00574                 {
00575                   if (q->left == NULL || !q->left->red)
00576                     {
00577                       node q2 = q->right;
00578                       q2->red = p->red;
00579                       p->left = q2->right;
00580                       q->right = q2->left;
00581                       q2->left = q;
00582                       q2->right = p;
00583                       *pp = q2;
00584                       p->red = 0;
00585                     }
00586                   else
00587                     {
00588                       q->red = p->red;
00589                       p->red = 0;
00590                       q->left->red = 0;
00591                       p->left = q->right;
00592                       q->right = p;
00593                       *pp = q;
00594                     }
00595                   sp = 1;
00596                   r = NULL;
00597                 }
00598             }
00599           --sp;
00600         }
00601       if (r != NULL)
00602         r->red = 0;
00603     }
00604 
00605   free (unchained);
00606   return retval;
00607 }
00608 #ifdef weak_alias
00609 weak_alias (__tdelete, tdelete)
00610 #endif
00611 
00612 
00613 /* Walk the nodes of a tree.
00614    ROOT is the root of the tree to be walked, ACTION the function to be
00615    called at each node.  LEVEL is the level of ROOT in the whole tree.  */
00616 static void
00617 internal_function
00618 trecurse (const void *vroot, __action_fn_t action, int level)
00619 {
00620   const_node root = (const_node) vroot;
00621 
00622   if (root->left == NULL && root->right == NULL)
00623     (*action) (root, leaf, level);
00624   else
00625     {
00626       (*action) (root, preorder, level);
00627       if (root->left != NULL)
00628         trecurse (root->left, action, level + 1);
00629       (*action) (root, postorder, level);
00630       if (root->right != NULL)
00631         trecurse (root->right, action, level + 1);
00632       (*action) (root, endorder, level);
00633     }
00634 }
00635 
00636 
00637 /* Walk the nodes of a tree.
00638    ROOT is the root of the tree to be walked, ACTION the function to be
00639    called at each node.  */
00640 void
00641 __twalk (const void *vroot, __action_fn_t action)
00642 {
00643   const_node root = (const_node) vroot;
00644 
00645   CHECK_TREE (root);
00646 
00647   if (root != NULL && action != NULL)
00648     trecurse (root, action, 0);
00649 }
00650 #ifdef weak_alias
00651 weak_alias (__twalk, twalk)
00652 #endif
00653 
00654 
00655 #ifdef _LIBC
00656 
00657 /* The standardized functions miss an important functionality: the
00658    tree cannot be removed easily.  We provide a function to do this.  */
00659 static void
00660 internal_function
00661 tdestroy_recurse (node root, __free_fn_t freefct)
00662 {
00663   if (root->left != NULL)
00664     tdestroy_recurse (root->left, freefct);
00665   if (root->right != NULL)
00666     tdestroy_recurse (root->right, freefct);
00667   (*freefct) ((void *) root->key);
00668   /* Free the node itself.  */
00669   free (root);
00670 }
00671 
00672 void
00673 __tdestroy (void *vroot, __free_fn_t freefct)
00674 {
00675   node root = (node) vroot;
00676 
00677   CHECK_TREE (root);
00678 
00679   if (root != NULL)
00680     tdestroy_recurse (root, freefct);
00681 }
00682 weak_alias (__tdestroy, tdestroy)
00683 
00684 #endif /* _LIBC */