Back to index

lightning-sunbird  0.9+nobinonly
patricia.c
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Simon Fraser <sfraser@netscape.com>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 
00040 /*---------------------------------------------------------------------
00041 
00042        Patricia
00043        
00044        D.R. Morrison's "Practical Algorithm To Retrieve Information
00045        Coded in Alphanumeric"
00046        
00047        This implementation geared towards the storage of data that
00048        is indexed through arbitrary-length binary keys (e.g. sequence
00049        data). It also only deals with the case where every key is unique;
00050        duplicate keys are not entered into the tree, although you may
00051        do some processing on duplicates in the ReplaceFunction passed
00052        to PatriciaInsert.
00053        
00054        There is no function to remove nodes from the tree at present.
00055        
00056        The implementation is modular, in the sense that all tree data structures
00057        are declared in this file, and are not intended to be accessed externally.
00058        You should only use the functions prototyped in patricia.h, all of which
00059        start with 'Patricia', and should consider the tree as an opaque data 
00060        structure.
00061        
00062        Ref: Donald R. Morrison. PATRICIA -- practical algorithm to retrieve
00063        information coded in alphanumeric. Journal of the ACM, 15(4):514-534,
00064        October 1968
00065 
00066        See Sedgewick, R. Algorithms, 2nd Ed. (1988) for implemenation details.
00067 
00068 -----------------------------------------------------------------------*/
00069 
00070 #include <string.h>
00071 #include <stdlib.h>
00072 
00073 //#include "utils.h"
00074 #include "patricia.h"
00075 
00076 #include "nsAEDefs.h"       // for AE_ASSERT
00077 
00078 /* Uncomment to print verbose logging on tree activity */
00079 //#define VERBOSE
00080 
00081 /* Data structures */
00082 
00083 typedef struct TNodeTag {
00084        unsigned char        *key;                /* The node's key value in an array of chars */
00085        short                bit;                        /* The bit that is examined at this node */
00086        long                        nodeID;                     /* Unique ID for each node, used for debugging */
00087        struct TNodeTag      *left;               /* Pointer to left child node */
00088        struct TNodeTag      *right;                     /* Pointer to right child node */
00089        void                        *data;               /* Pointer to user-defined data structure */
00090 } TNode;
00091 
00092 
00093 typedef struct {
00094        TNode*               headNode;                   /* Pointer to dummy head node */
00095        long                        numKeyBits;          /* Size of keys, in bits */
00096        long                        keySize;                    /* Size of keys, in chars (assumed to be 8 bits each) */
00097        long                        numNodes;            /* Number of nodes in tree */
00098 } TPatriciaTree;
00099 
00100 
00101 /* These macros assume that key is a *[unsigned] char, and that a char is 8 bits big */
00102 
00103 #define TestKeyBit(key, bit)                            ( (key[bit >> 3] & (1 << (bit & 7))) != 0 )
00104 #define CompareKeyBits(key1, key2, bit)          ( (key1[bit >> 3] & (1 << (bit & 7))) == (key2[bit >> 3] & (1 << (bit & 7))) )
00105 
00106 /*---------------------------------------------------------------------
00107 
00108        MakeNewNode
00109        
00110        Allocates a new node as an object on the heap, and initializes
00111        the key and bit values to those supplied.
00112        
00113        Entry: key           binary key data
00114                      bit           bit value
00115                      nodeData      user data for this node, if any.
00116                      
00117        Exit:         return value  pointer to the new node, or NULL on error
00118                                           
00119 -----------------------------------------------------------------------*/
00120 
00121 static TNode *MakeNewNode(TPatriciaTree *tree, const unsigned char *key, short bit, void *nodeData)
00122 {
00123        static long          nodeID = 0;
00124        TNode         *newNode;
00125        
00126        newNode = (TNode *)calloc(1, sizeof(TNode));
00127        if (newNode == NULL) return NULL;
00128 
00129        newNode->key = (unsigned char *)calloc(tree->keySize + 1, 1);         //last bit must be zero
00130        if (newNode->key == NULL) {
00131               free(newNode);
00132               return NULL;
00133        }
00134        memcpy(newNode->key, key, tree->keySize);
00135        newNode->bit = bit;
00136        newNode->data = nodeData;
00137        newNode->nodeID = nodeID;
00138        nodeID ++;
00139        
00140        return newNode;
00141 }
00142 
00143 
00144 /*---------------------------------------------------------------------
00145 
00146        MakeHeadNode
00147        
00148        Allocates the dummy head node of the tree, which is initialized to
00149                      key field = 000000...
00150                      bits = numKeyBits
00151                      left & right = point to self
00152                      data = NULL
00153                      
00154        Exit:         return value  pointer to the new node, or NULL on error
00155                                           
00156 -----------------------------------------------------------------------*/
00157 
00158 static TNode *MakeHeadNode(TPatriciaTree *tree)
00159 {
00160        TNode         *newNode;
00161        
00162        newNode = (TNode *)calloc(1, sizeof(TNode));
00163        if (newNode == NULL) return NULL;
00164 
00165        newNode->key = (unsigned char *)calloc(tree->keySize + 1, 1);         //last bit must be zero
00166        if (newNode->key == NULL) {
00167               free(newNode);
00168               return NULL;
00169        }
00170        memset(newNode->key, 0, tree->keySize);
00171        newNode->bit = tree->numKeyBits;
00172        newNode->data = NULL;
00173        newNode->nodeID = -1;
00174        
00175        /* Both self-pointers for the head node */
00176        newNode->left = newNode;
00177        newNode->right = newNode;
00178        
00179        return newNode;
00180 }
00181 
00182 
00183 /*---------------------------------------------------------------------
00184 
00185        InternalSearch
00186        
00187        Search the tree for a node with the given key, starting at
00188        startNode.
00189        
00190        Entry: tree          pointer to a tree
00191                      key           binary key data
00192                      startNode     node to start the search (must NOT be NULL)
00193                      
00194        Exit:         return value: pointer to found node, or node with upward
00195                                           link. The caller must verify whether this
00196                                           is the node wanted by comparing keys.
00197                                           
00198 -----------------------------------------------------------------------*/
00199 
00200 static TNode *InternalSearch(TPatriciaTree *tree, TNode *x, const unsigned char *key)
00201 {
00202        TNode  *p;
00203 
00204        AE_ASSERT(x, "No node");
00205 
00206        do {
00207               p = x;
00208               
00209               if (TestKeyBit(key, x->bit))
00210                      x = x->right;
00211               else
00212                      x = x->left;
00213               
00214        } while (p->bit > x->bit);
00215        
00216        return x;
00217 }
00218 
00219 
00220 
00221 /*---------------------------------------------------------------------
00222 
00223        InternalTraverse
00224        
00225        A traverse routine used internally.
00226        
00227        Entry: tree                 pointer to a tree
00228                      key                  binary key data
00229                      x                    node to start the search (must NOT be NULL)
00230                      traverseFunc  function called on each node, like this:
00231                      
00232                                           (*traverseFunc)(x->data, x->key, arg1, arg2);
00233                      
00234        Exit:         return value: 0 if everything OK
00235                                           Non-zero value on error
00236                                           
00237 -----------------------------------------------------------------------*/
00238 
00239 static int InternalTraverse(TPatriciaTree *tree, TNode *x, NodeTraverseFunction traverseFunc, void *arg1, void *arg2)
00240 {
00241        TNode  *p;
00242        int           err = 0;
00243        
00244        AE_ASSERT(x, "No node");
00245        AE_ASSERT(x->left && x->right, "Left or right child missing");
00246 
00247 #ifdef VERBOSE
00248        printf("Visiting node %ld with left %ld and right %ld\n", x->nodeID, x->left->nodeID, x->right->nodeID);
00249 #endif
00250 
00251        if (x != tree->headNode) {
00252               err = (*traverseFunc)(x->data, x->key, arg1, arg2);
00253               if (err != 0) return err;
00254        }
00255        
00256        p = x->left;
00257        if (p->bit < x->bit)
00258               err = InternalTraverse(tree, p, traverseFunc, arg1, arg2);
00259 
00260        if (err != 0) return err;
00261        
00262        p = x->right;
00263        if (p->bit < x->bit)
00264               err = InternalTraverse(tree, p, traverseFunc, arg1, arg2);
00265        
00266        return err;
00267 }
00268 
00269 
00270 
00271 /*---------------------------------------------------------------------
00272 
00273        TraverseAndFree
00274        
00275        A traverse routine used internall.
00276        
00277        Entry: tree                 pointer to a tree
00278                      key                  binary key data
00279                      x                    node to start the search (must NOT be NULL)
00280                      traverseFunc  function called on each node, like this:
00281                      
00282                                           (*traverseFunc)(x->data, x->key, arg1, arg2);
00283                      
00284        Exit:         return value: 0 if everything OK
00285                                           Non-zero value on error
00286                                           
00287 -----------------------------------------------------------------------*/
00288 
00289 static int TraverseAndFree(TPatriciaTree *tree, TNode *x, NodeFreeFunction freeFunc, void *refCon)
00290 {
00291        TNode  *p;
00292        int           err = 0;
00293        
00294        AE_ASSERT(x, "No node");
00295        AE_ASSERT(x->left && x->right, "Left or right child missing");
00296               
00297        p = x->left;
00298        if (p->bit < x->bit) {
00299               err = TraverseAndFree(tree, p, freeFunc, refCon);
00300               if (err != 0) return err;
00301        }
00302        
00303        p = x->right;
00304        if (p->bit < x->bit) {
00305               err = TraverseAndFree(tree, p, freeFunc, refCon);
00306               if (err != 0) return err;
00307        }
00308 
00309        err = (*freeFunc)(x->data, x->key, refCon);
00310        
00311 #ifdef VERBOSE
00312        printf("Freeing node %ld\n", x->nodeID);
00313 #endif
00314 
00315        free(x->key);
00316        free(x);
00317        
00318        return err;
00319 }
00320 
00321 
00322 #pragma mark -
00323 
00324 
00325 /*---------------------------------------------------------------------
00326 
00327        InitPatriciaTree
00328        
00329        Allocate and initialize a new, empty PatriciaTree.
00330        
00331        Entry: keySize              length of the keys, in bits
00332                      
00333        Exit:         return value  A reference to the created PatriciaTree, 
00334                                           or NULL on error.
00335                                           
00336 -----------------------------------------------------------------------*/
00337 
00338 PatriciaTreeRef PatriciaInitTree(long numKeyBits)
00339 {
00340        TPatriciaTree        *tree = NULL;
00341 
00342        tree = (TPatriciaTree *)calloc(1, sizeof(TPatriciaTree));
00343        if (tree == NULL) return NULL;
00344        
00345        tree->numKeyBits = numKeyBits;
00346        tree->keySize = (numKeyBits >> 3) + ((numKeyBits & 7) != 0);
00347        tree->numNodes = 0;
00348        
00349        tree->headNode = MakeHeadNode(tree);
00350        if (tree->headNode == NULL) {
00351               free(tree);
00352               return NULL;
00353        }
00354        
00355        return (PatriciaTreeRef)tree;
00356 }
00357 
00358 
00359 
00360 /*---------------------------------------------------------------------
00361 
00362        PatriciaFreeTree
00363        
00364        Free a Patricia tree and all associate data structures. freeFunc is
00365        called on each node's node->data. If no freeFunc is supplied, then
00366        the node->data will NOT be freed.
00367        
00368        Entry: treeRef              reference to a previously allocated PatriciaTree
00369                      freeFunc             a function that is called on each node->data
00370                      arg                  pointer to data that is also passed to the free function.
00371                      
00372        The free function is called like this:           (*freeFunc)(node->data, node->key, arg)
00373                                                                
00374 -----------------------------------------------------------------------*/
00375 
00376 void PatriciaFreeTree(PatriciaTreeRef treeRef, NodeFreeFunction freeFunc, void *refCon)
00377 {
00378        TPatriciaTree *tree = (TPatriciaTree *)treeRef;
00379 
00380        if (tree == NULL) return;
00381        
00382        /* Traverse the tree and free the data */
00383        TraverseAndFree(tree, tree->headNode, freeFunc, refCon);
00384        
00385        free(tree);
00386 }
00387 
00388 
00389 
00390 /*---------------------------------------------------------------------
00391 
00392        PatriciaSearch
00393        
00394        Search the tree for the node with the given key.
00395        
00396        Entry: key           pointer to binary key data
00397                      data          pointer to placeholder for returned data
00398                                    If you pass NULL in this argument, no value
00399                                    will be returned (i.e. I don't deference 0).
00400                      
00401        Exit:         return value  1 if key found (data returned in *data)
00402                                           0 if key not found (NULL returned in *data)
00403        
00404 -----------------------------------------------------------------------*/
00405 
00406 int PatriciaSearch(PatriciaTreeRef treeRef, const unsigned char *key, void **data)
00407 {
00408        TPatriciaTree *tree = (TPatriciaTree *)treeRef;
00409        TNode         *foundNode;
00410        
00411        AE_ASSERT(tree, "Where is my tree?");
00412 
00413        foundNode = InternalSearch(tree, tree->headNode, key);
00414        AE_ASSERT(foundNode, "Should have found node");
00415 
00416        if (memcmp(foundNode->key, key, tree->keySize) == 0) {
00417               if (data != NULL)
00418                      *data = foundNode->data;
00419               return 1;
00420        } else
00421               return 0;
00422 }
00423 
00424 
00425 
00426 /*---------------------------------------------------------------------
00427 
00428        PatriciaInsert
00429        
00430        Insert a node into the tree with the given key and data.
00431               
00432        Entry: key                  pointer to binary key data
00433                      replaceFunc   function executed when a node with the key we
00434                                           are inserting is found in the tree.
00435                      data                 pointer to node data structure
00436                      
00437        Exit:         return value  0 if insertion successful
00438                                           1 if key already exists
00439                                           -1 if some other error
00440 
00441 -----------------------------------------------------------------------*/
00442 
00443 int PatriciaInsert(PatriciaTreeRef treeRef, NodeReplaceFunction replaceFunc, const unsigned char *key, void *data, void *refCon)
00444 {
00445        TPatriciaTree *tree = (TPatriciaTree *)treeRef;
00446        TNode         *x, *t, *p;
00447        short         i;
00448 
00449        x = tree->headNode;
00450        t = InternalSearch(tree, x, key);
00451 
00452        AE_ASSERT(t, "Should have found node");
00453 
00454        if (memcmp(t->key, key, tree->keySize) == 0) {
00455               if (replaceFunc) (*replaceFunc)(&t->data, t->key, data, refCon);
00456               return 1;                   /* It's already there */
00457        }
00458        
00459        i = tree->numKeyBits - 1;
00460        
00461        while (CompareKeyBits(key, t->key, i))
00462               i --;  
00463               
00464        do {
00465               p = x;
00466               x = (TestKeyBit(key, x->bit)) ? x->right : x->left;
00467        } while (x->bit > i && p->bit > x->bit);
00468        
00469        t = MakeNewNode(tree, key, i, data);
00470        if (t == NULL) return -1;
00471        
00472        if (TestKeyBit(key, t->bit)) {
00473               t->right = t;
00474               t->left = x;
00475        } else {
00476               t->right = x;
00477               t->left = t;
00478        }
00479               
00480        if (TestKeyBit(key, p->bit))
00481               p->right = t;
00482        else
00483               p->left = t;
00484        
00485 #ifdef VERBOSE
00486        printf("Inserted node %ld with left %ld and right %ld\n", t->nodeID, t->left->nodeID, t->right->nodeID);
00487 #endif
00488 
00489        tree->numNodes ++;
00490        
00491        return 0;
00492 }
00493 
00494 
00495 /*---------------------------------------------------------------------
00496 
00497        PatriciaTraverse
00498        
00499        Traverse the tree, executing traverseFunc for each node. It's called like
00500        this:
00501               (*traverseFunc)(node->data, node->key, arg1, arg2);
00502               
00503        traverseFunc should return 0 if it completes successfully, or any other
00504        value if there is an error. In this case, the traverse is abruptly terminated.
00505        
00506        Entry: treeRef              reference to a Patricia tree
00507                      traverseFunc  function to be called on each node
00508                      arg1, arg2    arguments passed to traverseFunc
00509               
00510        Exit:         return value  0 if traverse competed successfully
00511                                           -1 if something caused the traverse to
00512                                                  terminate.
00513 
00514 -----------------------------------------------------------------------*/
00515 
00516 int PatriciaTraverse(PatriciaTreeRef treeRef, NodeTraverseFunction traverseFunc, void *arg, void *refCon)
00517 {
00518        TPatriciaTree *tree = (TPatriciaTree *)treeRef;
00519        
00520        return InternalTraverse(tree, tree->headNode, traverseFunc, arg, refCon);
00521 }
00522 
00523 
00524 /*---------------------------------------------------------------------
00525 
00526        PatriciaGetNumNodes
00527        
00528        Return the number of nodes in the tree
00529        
00530 -----------------------------------------------------------------------*/
00531 
00532 long PatriciaGetNumNodes(PatriciaTreeRef treeRef)
00533 {
00534        return ((TPatriciaTree *)treeRef)->numNodes;
00535 }