Back to index

python-biopython  1.60
Classes | Defines | Functions | Variables
trie.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "trie.h"

Go to the source code of this file.

Classes

struct  Transition
struct  Trie

Defines

#define MAX_KEY_LENGTH   1000

Functions

static char * duplicate (const char *s)
TrieTrie_new (void)
int Trie_set (Trie *trie, const char *key, const void *value)
void Trie_del (Trie *trie)
void * Trie_get (const Trie *trie, const char *key)
static void _get_approximate_trie (const Trie *trie, const char *key, const int k, void(*callback)(const char *key, const void *value, const int mismatches, void *data), void *data, const int mismatches, char *current_key, const int max_key)
static void _get_approximate_transition (const char *key, const int k, const Transition *transition, const char *suffix, void(*callback)(const char *key, const void *value, const int mismatches, void *data), void *data, const int mismatches, char *current_key, const int max_key)
void Trie_get_approximate (const Trie *trie, const char *key, const int k, void(*callback)(const char *key, const void *value, const int mismatches, void *data), void *data)
int Trie_len (const Trie *trie)
int Trie_has_key (const Trie *trie, const char *key)
int Trie_has_prefix (const Trie *trie, const char *prefix)
static void _iterate_helper (const Trie *trie, void(*callback)(const char *key, const void *value, void *data), void *data, char *current_key, const int max_key)
void Trie_iterate (const Trie *trie, void(*callback)(const char *key, const void *value, void *data), void *data)
static void _with_prefix_helper (const Trie *trie, const char *prefix, void(*callback)(const char *key, const void *value, void *data), void *data, char *current_key, const int max_key)
void Trie_with_prefix (const Trie *trie, const char *prefix, void(*callback)(const char *key, const void *value, void *data), void *data)
static int _serialize_transition (const Transition *transition, int(*write)(const void *towrite, const int length, void *data), int(*write_value)(const void *value, void *data), void *data)
static int _serialize_trie (const Trie *trie, int(*write)(const void *towrite, const int length, void *data), int(*write_value)(const void *value, void *data), void *data)
int Trie_serialize (const Trie *trie, int(*write)(const void *towrite, const int length, void *data), int(*write_value)(const void *value, void *data), void *data)
static int _deserialize_transition (Transition *transition, int(*read)(void *wasread, const int length, void *data), void *(*read_value)(void *data), void *data)
static int _deserialize_trie (Trie *trie, int(*read)(void *wasread, const int length, void *data), void *(*read_value)(void *data), void *data)
TrieTrie_deserialize (int(*read)(void *wasread, const int length, void *data), void *(*read_value)(void *data), void *data)

Variables

static char KEY [MAX_KEY_LENGTH]

Class Documentation

struct Transition

Definition at line 22 of file trie.c.

Collaboration diagram for Transition:
Class Members
Trie * next
char * suffix
struct Trie

Definition at line 34 of file trie.c.

Collaboration diagram for Trie:
Class Members
unsigned char num_transitions
Transition * transitions
void * value

Define Documentation

#define MAX_KEY_LENGTH   1000

Definition at line 41 of file trie.c.


Function Documentation

static int _deserialize_transition ( Transition transition,
int(*)(void *wasread, const int length, void *data)  read,
void *(*)(void *data)  read_value,
void *  data 
) [static]

Definition at line 695 of file trie.c.

{
    int suffixlen;
    unsigned char has_trie;
    
    if(!(*read)(&suffixlen, sizeof(suffixlen), data))
       goto _deserialize_transition_error;
    if(suffixlen < 0 || suffixlen >= MAX_KEY_LENGTH)
       goto _deserialize_transition_error;
    if(!(*read)(KEY, suffixlen, data))
       goto _deserialize_transition_error;
    KEY[suffixlen] = 0;
    if(!(transition->suffix = duplicate(KEY)))
       goto _deserialize_transition_error;
    if(!(*read)(&has_trie, sizeof(has_trie), data))
       goto _deserialize_transition_error;
    if(has_trie != 0 && has_trie != 1)
       goto _deserialize_transition_error;
    if(has_trie) {
       transition->next = Trie_new();
       if(!_deserialize_trie(transition->next, read, read_value, data))
           goto _deserialize_transition_error;
    }
    return 1;

 _deserialize_transition_error:
    if(transition->suffix) {
       free(transition->suffix);
       transition->suffix = NULL;
    }
    if(transition->next) {
       Trie_del(transition->next);
       transition->next = NULL;
    }
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _deserialize_trie ( Trie trie,
int(*)(void *wasread, const int length, void *data)  read,
void *(*)(void *data)  read_value,
void *  data 
) [static]

Definition at line 656 of file trie.c.

{
    int i;
    unsigned char has_value;

    if(!(*read)(&has_value, sizeof(has_value), data))
       goto _deserialize_trie_error;
    if(has_value != 0 && has_value != 1)
       goto _deserialize_trie_error;
    if(has_value) {
       if(!(trie->value = (*read_value)(data)))
           goto _deserialize_trie_error;
    }
    if(!(*read)(&trie->num_transitions, sizeof(trie->num_transitions), data))
       goto _deserialize_trie_error;
    if(!(trie->transitions = 
        malloc(trie->num_transitions*sizeof(Transition))))
       goto _deserialize_trie_error;
    for(i=0; i<trie->num_transitions; i++) {
       if(!_deserialize_transition(&trie->transitions[i], 
                                read, read_value, data))
           goto _deserialize_trie_error;
    }
    return 1;
   
 _deserialize_trie_error:
    trie->num_transitions = 0;
    if(trie->transitions) {
       free(trie->transitions);
       trie->transitions = NULL;
    }
    trie->value = NULL;
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void _get_approximate_transition ( const char *  key,
const int  k,
const Transition transition,
const char *  suffix,
void(*)(const char *key, const void *value, const int mismatches, void *data)  callback,
void *  data,
const int  mismatches,
char *  current_key,
const int  max_key 
) [static]

Definition at line 252 of file trie.c.

{
    int i;
    int prev_keylen = strlen(current_key);

    /* Short circuit optimization.  If there's too many characters to
       possibly be a match, then don't even try to match things. */
    if((int)(strlen(suffix) - strlen(key)) > k)
       return;

    /* Match as many characters as possible. */
    i = 0;
    while(suffix[i] && (key[i] == suffix[i])) {
       i++;
    }
    /* Check to make sure the key is not too long.  BUG: If it is,
       fails silently. */
    if((prev_keylen+i) >= max_key)
       return;
    strncat(current_key, suffix, i);

    /* If all the letters in the suffix matched, then move to the
       next trie. */
    if(!suffix[i]) {
       _get_approximate_trie(transition->next, &key[i], k, callback, data,
                           mismatches, current_key, max_key);
    }
    /* Otherwise, try out different kinds of mismatches. */
    else if(k) {
       int new_keylen = prev_keylen+i;

       /* Letter replacement, skip the next letter in both the key and
          suffix. */
       if((new_keylen+1 < max_key) && key[i] && suffix[i]) {
           current_key[new_keylen] = suffix[i];
           current_key[new_keylen+1] = 0;
           _get_approximate_transition(&key[i+1], k-1,
                                   transition, &suffix[i+1],
                                   callback, data,
                                   mismatches+1, current_key, max_key);
           current_key[new_keylen] = 0;
       }

       /* Insertion in key, skip the next letter in the key. */
       if(key[i]) {
           _get_approximate_transition(&key[i+1], k-1, 
                                   transition, &suffix[i],
                                   callback, data,
                                   mismatches+1, current_key, max_key);
       }

       /* Deletion from key, skip the next letter in the suffix. */
       if((new_keylen+1 < max_key) && suffix[i]) {
           current_key[new_keylen] = suffix[i];
           current_key[new_keylen+1] = 0;
           _get_approximate_transition(&key[i], k-1,
                                   transition, &suffix[i+1],
                                   callback, data,
                                   mismatches+1, current_key, max_key);
           current_key[new_keylen] = 0;
       }
    }
    current_key[prev_keylen] = 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void _get_approximate_trie ( const Trie trie,
const char *  key,
const int  k,
void(*)(const char *key, const void *value, const int mismatches, void *data)  callback,
void *  data,
const int  mismatches,
char *  current_key,
const int  max_key 
) [static]

Definition at line 329 of file trie.c.

{
    int i;

    /* If there's no more key to match, then I'm done. */
    if(!key[0]) {
       if(trie->value)
           (*callback)(current_key, trie->value, mismatches, data);
    }
    /* If there are no more mismatches allowed, then fall back to the
       faster Trie_get. */
    else if(!k) {
       void *value = Trie_get(trie, key);
       if(value) {
           int l = strlen(current_key);
           /* Make sure I have enough space for the full key. */
           if(l + strlen(key) < max_key) {
              strcat(current_key, key);
              (*callback)(current_key, value, mismatches, data);
              current_key[l] = 0;
           }
           /* BUG: Ran out of space for the key.  This fails
              silently, but should signal an error. */
       }
    }
    /* If there are no more transitions, then all the characters left
       in the key are mismatches. */
    else if(!trie->num_transitions) {
       if(trie->value && (strlen(key) <= k)) {
           (*callback)(current_key, trie->value, 
                     mismatches+strlen(key), data);
       }
    }
    /* Otherwise, try to match each of the transitions. */
    else {
       for(i=0; i<trie->num_transitions; i++) {
           Transition* transition = &trie->transitions[i];
           const char *suffix = transition->suffix;
           _get_approximate_transition(key, k, transition, suffix,
                                   callback, data, 
                                   mismatches, current_key, max_key);
       }
    }

}

Here is the call graph for this function:

Here is the caller graph for this function:

static void _iterate_helper ( const Trie trie,
void(*)(const char *key, const void *value, void *data)  callback,
void *  data,
char *  current_key,
const int  max_key 
) [static]

Definition at line 453 of file trie.c.

{
    int i;
    if(trie->value)
       (*callback)(current_key, trie->value, data);
    for(i=0; i<trie->num_transitions; i++) {
       Transition* transition = &trie->transitions[i];
       const char *suffix = transition->suffix;
       int keylen = strlen(current_key);

       if(keylen + strlen(suffix) >= max_key) {
           /* BUG: This will fail silently.  It should raise some
              sort of error. */
           continue;
       }
       strcat(current_key, suffix);
       _iterate_helper(transition->next, callback, data, 
                     current_key, max_key);
       current_key[keylen] = 0;
    }
}

Here is the caller graph for this function:

static int _serialize_transition ( const Transition transition,
int(*)(const void *towrite, const int length, void *data)  write,
int(*)(const void *value, void *data)  write_value,
void *  data 
) [static]

Definition at line 612 of file trie.c.

{
    int suffixlen;
    unsigned char has_trie;

    suffixlen = strlen(transition->suffix);
    if(!(*write)(&suffixlen, sizeof(suffixlen), data))
       return 0;
    if(!(*write)(transition->suffix, suffixlen, data))
       return 0;

    has_trie = (transition->next != NULL);
    if(!(*write)(&has_trie, sizeof(has_trie), data))
       return 0;
    if(has_trie) {
       if(!_serialize_trie(transition->next, write, write_value, data))
           return 0;
    }
    return 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _serialize_trie ( const Trie trie,
int(*)(const void *towrite, const int length, void *data)  write,
int(*)(const void *value, void *data)  write_value,
void *  data 
) [static]

Definition at line 583 of file trie.c.

{
    int i;
    unsigned char has_value;

    has_value = (trie->value != NULL);
    if(!(*write)(&has_value, sizeof(has_value), data))
       return 0;
    if(has_value) {
       if(!(*write_value)(trie->value, data))
           return 0;
    }

    if(!(*write)(&trie->num_transitions, sizeof(trie->num_transitions), data))
       return 0;
    for(i=0; i<trie->num_transitions; i++) {
       if(!_serialize_transition(&trie->transitions[i], 
                              write, write_value, data))
           return 0;
    }

    return 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void _with_prefix_helper ( const Trie trie,
const char *  prefix,
void(*)(const char *key, const void *value, void *data)  callback,
void *  data,
char *  current_key,
const int  max_key 
) [static]

Definition at line 492 of file trie.c.

{
    int first, last, mid;

    if(!prefix[0]) {
       _iterate_helper(trie, callback, data, current_key, max_key);
       return;
    }

    /* The transitions are stored in alphabetical order.  Do a binary
     * search to find the proper one.
     */
    first = 0;
    last = trie->num_transitions-1;
    while(first <= last) {
       Transition* transition;
       const char *suffix;
       int suffixlen, prefixlen, minlen;
       int c;
       mid = (first+last)/2;
       transition = &trie->transitions[mid];
       suffix = transition->suffix;
       suffixlen = strlen(suffix);
       prefixlen = strlen(prefix);
       minlen = (suffixlen < prefixlen) ? suffixlen : prefixlen;
       c = strncmp(prefix, suffix, minlen);
       if(c < 0)
           last = mid-1;
       else if(c > 0)
           first = mid+1;
       else {
           int keylen = strlen(current_key);
           if(keylen + minlen >= max_key) {
              /* BUG: This will fail silently.  It should raise some
                 sort of error. */
              break;
           }
           strncat(current_key, suffix, minlen);
           _with_prefix_helper(transition->next, prefix+minlen,
                            callback, data, current_key, max_key);
           current_key[keylen] = 0;
           break;
       }
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static char* duplicate ( const char *  s) [static]

Definition at line 7 of file trie.c.

                                      {
    /* Don't use strdup, as it's not ANSI C. */
    char* t = malloc((strlen(s)+1)*sizeof(char));
    if (!t) return NULL;
    strcpy(t, s);
    return t;
}

Here is the caller graph for this function:

void Trie_del ( Trie trie)

Definition at line 192 of file trie.c.

                          {
    int i;
    if(!trie)
       return;
    for(i=0; i<trie->num_transitions; i++) {
       Transition* transition = &trie->transitions[i];
       if(transition->suffix)
           free(transition->suffix);
       Trie_del(transition->next);
    }
    free(trie);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Trie* Trie_deserialize ( int(*)(void *wasread, const int length, void *data)  read,
void *(*)(void *data)  read_value,
void *  data 
)

Definition at line 736 of file trie.c.

{
    Trie* trie = Trie_new();
    if(!_deserialize_trie(trie, read, read_value, data)) {
       Trie_del(trie);
       return NULL;
    }
    return trie;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void* Trie_get ( const Trie trie,
const char *  key 
)

Definition at line 205 of file trie.c.

                                                  {
    int first, last, mid;

    if(!key[0]) {
       return trie->value;
    }

    /* The transitions are stored in alphabetical order.  Do a binary
     * search to find the proper one.
     */
    first = 0;
    last = trie->num_transitions-1;
    while(first <= last) {
       Transition* transition;
       char *suffix;
       int c;
       mid = (first+last)/2;
       transition = &trie->transitions[mid];
       suffix = transition->suffix;
       /* If suffix is a substring of key, then get the value from
          the next trie.
       */
       c = strncmp(key, suffix, strlen(suffix));
       if(c < 0)
           last = mid-1;
       else if(c > 0)
           first = mid+1;
       else
           return Trie_get(transition->next, key+strlen(suffix));
    }
    return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void Trie_get_approximate ( const Trie trie,
const char *  key,
const int  k,
void(*)(const char *key, const void *value, const int mismatches, void *data)  callback,
void *  data 
)

Definition at line 385 of file trie.c.

{
    KEY[0] = 0;
    _get_approximate_trie(trie, key, k, callback, data, 0, KEY,MAX_KEY_LENGTH);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int Trie_has_key ( const Trie trie,
const char *  key 
)

Definition at line 412 of file trie.c.

{
    return Trie_get(trie, key) != NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int Trie_has_prefix ( const Trie trie,
const char *  prefix 
)

Definition at line 417 of file trie.c.

{
    int first, last, mid;

    if(!prefix[0]) {
       return 1;
    }

    /* The transitions are stored in alphabetical order.  Do a binary
     * search to find the proper one.
     */
    first = 0;
    last = trie->num_transitions-1;
    while(first <= last) {
       Transition* transition;
       char *suffix;
       int suffixlen, prefixlen, minlen;
       int c;
       mid = (first+last)/2;
       transition = &trie->transitions[mid];
       suffix = transition->suffix;
       suffixlen = strlen(suffix);
       prefixlen = strlen(prefix);
       minlen = (suffixlen < prefixlen) ? suffixlen : prefixlen;
       c = strncmp(prefix, suffix, minlen);
       if(c < 0)
           last = mid-1;
       else if(c > 0)
           first = mid+1;
       else
           return Trie_has_prefix(transition->next, prefix+minlen);
    }
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void Trie_iterate ( const Trie trie,
void(*)(const char *key, const void *value, void *data)  callback,
void *  data 
)

Definition at line 481 of file trie.c.

{
    KEY[0] = 0;
    _iterate_helper(trie, callback, data, KEY, MAX_KEY_LENGTH);
}

Here is the call graph for this function:

Here is the caller graph for this function:

int Trie_len ( const Trie trie)

Definition at line 397 of file trie.c.

{
    int i;
    int length = 0;
    
    if(!trie)
       return 0;
    if(trie->value)
       length += 1;
    for(i=0; i<trie->num_transitions; i++) {
       length += Trie_len(trie->transitions[i].next);
    }
    return length;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Trie* Trie_new ( void  )

Definition at line 45 of file trie.c.

                     {
    Trie* trie;

    if(!(trie = malloc(sizeof(struct Trie))))
       return NULL;
    trie->transitions = NULL;
    trie->num_transitions = 0;
    trie->value = NULL;
    return trie;
}

Here is the caller graph for this function:

int Trie_serialize ( const Trie trie,
int(*)(const void *towrite, const int length, void *data)  write,
int(*)(const void *value, void *data)  write_value,
void *  data 
)

Definition at line 637 of file trie.c.

{
    int success = _serialize_trie(trie, write, write_value, data);
    (*write)(NULL, 0, data);
    return success;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int Trie_set ( Trie trie,
const char *  key,
const void *  value 
)

Definition at line 56 of file trie.c.

                                                             {
    int i;
    Transition* transition=NULL;
    const char *suffix=NULL;
    int retval = 0;
    int first, last, mid;

    if(!key[0]) {
       trie->value = (void *)value;
       return 0;
    }

    /* Insert the key in alphabetical order.  Do a binary search to
       find the proper place. */
    first = 0;
    last = trie->num_transitions-1;
    i = -1;
    while(first <= last) {
       mid = (first+last)/2;
       transition = &trie->transitions[mid];
       suffix = transition->suffix;
       if(key[0] < suffix[0])
           last = mid-1;
       else if(key[0] > suffix[0])
           first = mid+1;
       else {
           i = mid;
           break;
       }
    }

    /* If no place was found for it, then the indexes will be in the
       order last,first.  Place it at index first. */
    if(i == -1)
       i = first;

    /* If nothing matches, then insert a new trie here. */
    if((i >= trie->num_transitions) || (key[0] != suffix[0])) {
       char *new_suffix=NULL;
       Trie* newtrie=NULL;
       Transition* new_transitions=NULL;

       /* Create some variables for the new transition.  I'm going to
          allocate these first so that if I can detect memory errors
          before I mess up the data structure of the transitions.
       */
       if(!(new_suffix = duplicate(key)))
           goto insert_memerror;
       if(!(newtrie = Trie_new()))
           goto insert_memerror;

       /* Create some space for the next transition.  Allocate some
          memory and shift the old transitions over to make room for
          this one.
       */
       if(!(new_transitions = malloc(sizeof(Transition) *
                                  (trie->num_transitions+1))))
           goto insert_memerror;
       memcpy(new_transitions, trie->transitions,
              sizeof(Transition)*i);
       memcpy(&new_transitions[i+1], &trie->transitions[i],
              sizeof(Transition)*(trie->num_transitions-i));
       free(trie->transitions);
       trie->transitions = new_transitions;
       new_transitions = NULL;
       trie->num_transitions += 1;

       /* Initialize the new transition. */
       transition = &trie->transitions[i];
       transition->suffix = new_suffix;
       transition->next = newtrie;
       transition->next->value = (void *)value;

       if(0) {
       insert_memerror:
           if(new_transitions) free(new_transitions);
           if(newtrie) free(newtrie);
           if(new_suffix) free(new_suffix);
           return 1;
       }
    } 
    /* There are three cases where the key and suffix share some
       letters. 
       1.  suffix is proper substring of key.
       2.  key is proper substring of suffix.
       3.  neither is proper substring of other.

       For cases 2 and 3, I need to first split up the transition
       based on the number of characters shared.  Then, I can insert
       the rest of the key into the next trie.
    */
    else {
       /* Count the number of characters shared between key
          and suffix. */
       int chars_shared = 0;
       while(key[chars_shared] && key[chars_shared] == suffix[chars_shared])
           chars_shared++;

       /* Case 2 or 3, split this sucker! */
       if(chars_shared < strlen(suffix)) {
           Trie* newtrie=NULL;
           char *new_suffix1=NULL, *new_suffix2=NULL;

           if(!(new_suffix1 = malloc(chars_shared+1)))
              goto split_memerror;
           strncpy(new_suffix1, key, chars_shared);
           new_suffix1[chars_shared] = 0;
           if(!(new_suffix2 = duplicate(suffix+chars_shared)))
              goto split_memerror;
           if(!(newtrie = Trie_new()))
              goto split_memerror;
           if(!(newtrie->transitions = malloc(sizeof(Transition))))
              goto split_memerror;
           newtrie->num_transitions = 1;
           newtrie->transitions[0].next = transition->next;
           newtrie->transitions[0].suffix = new_suffix2;

           free(transition->suffix);
           transition->suffix = new_suffix1;
           transition->next = newtrie;

           if(0) {
           split_memerror:
              if(newtrie && newtrie->transitions) free(newtrie->transitions);
              if(newtrie) free(newtrie);
              if(new_suffix2) free(new_suffix2);
              if(new_suffix1) free(new_suffix1);
              return 1;
           }
       }
       retval = Trie_set(transition->next, key+chars_shared, value);
    }

    return retval;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void Trie_with_prefix ( const Trie trie,
const char *  prefix,
void(*)(const char *key, const void *value, void *data)  callback,
void *  data 
)

Definition at line 544 of file trie.c.

{
    KEY[0] = 0;
    _with_prefix_helper(trie, prefix, callback, data, KEY, MAX_KEY_LENGTH);
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

char KEY[MAX_KEY_LENGTH] [static]

Definition at line 42 of file trie.c.