Back to index

python-biopython  1.60
triemodule.c
Go to the documentation of this file.
00001 #include <Python.h>
00002 #include <marshal.h>
00003 #include "trie.h"
00004 
00005 #if PY_VERSION_HEX < 0x02050000
00006 #define Py_ssize_t int
00007 #endif
00008 
00009 
00010 
00011 staticforward PyTypeObject Trie_Type;
00012 
00013 typedef struct {
00014     PyObject_HEAD
00015     Trie* trie;
00016 } trieobject;
00017 
00018 static PyObject*
00019 trie_trie(PyObject* self, PyObject* args)
00020 {
00021     trieobject* trieobj;
00022     Trie* trie;
00023 
00024     if (!PyArg_ParseTuple(args,":trie")) 
00025         return NULL;
00026     if(!(trie = Trie_new()))
00027        return PyErr_NoMemory();
00028     if(!(trieobj = PyObject_New(trieobject, &Trie_Type)))
00029        return NULL;
00030     trieobj->trie = trie;
00031     return (PyObject*)trieobj;
00032 }
00033 
00034 static void 
00035 _decref_objects(const char *key, const void *value, void *data) 
00036 {
00037     Py_DECREF((PyObject *)value);
00038 }
00039 
00040 static void
00041 trie_dealloc(PyObject* self)
00042 {
00043     trieobject *mp = (trieobject *)self;
00044     Trie_iterate(mp->trie, _decref_objects, NULL);
00045     Trie_del(mp->trie);
00046     PyObject_Del(self);
00047 }
00048 
00049 static Py_ssize_t
00050 trie_length(trieobject *mp)
00051 {
00052     return Trie_len(mp->trie);
00053 }
00054 
00055 static PyObject *
00056 trie_subscript(trieobject *mp, PyObject *py_key)
00057 {
00058     const char *key;
00059     PyObject *py_value;
00060 
00061     /* Make sure key is a string. */
00062     if(!PyString_Check(py_key)) {
00063        PyErr_SetString(PyExc_TypeError, "key must be a string");
00064        return NULL;
00065     }
00066     key = PyString_AS_STRING(py_key);
00067     py_value = Trie_get(mp->trie, key);
00068     if(py_value == NULL)
00069        PyErr_SetString(PyExc_KeyError, key);
00070     else
00071        Py_INCREF(py_value);
00072     return py_value;
00073 }
00074 
00075 static int
00076 trie_ass_sub(trieobject *mp, PyObject *py_key, PyObject *py_value)
00077 {
00078     const char *key;
00079     PyObject *py_prev;
00080 
00081     /* Make sure key is a string. */
00082     if(!PyString_Check(py_key)) {
00083        PyErr_SetString(PyExc_TypeError, "key must be a string");
00084        return -1;
00085     }
00086     key = PyString_AS_STRING(py_key);
00087     
00088     /* Check to see whether something already exists at that key.  If
00089        there's already an object there, then I will have to remove it.
00090     */
00091     py_prev = Trie_get(mp->trie, key);
00092     if(py_prev) {
00093        Py_DECREF(py_prev);
00094     }
00095 
00096     /* The client wants to delete a key from a dictionary.  The Trie
00097        API doesn't support this, so I will just overwrite it with
00098        NULL. */
00099     if(!py_value) {
00100        /* If the key doesn't exist, raise a KeyError. */
00101        if(!py_prev) {
00102            PyErr_SetString(PyExc_KeyError, key);
00103            return -1;
00104        }
00105        Trie_set(mp->trie, key, NULL);
00106     }
00107     /* The client wants to set a key in the dictionary. */
00108     else {
00109        Py_INCREF(py_value);
00110        if(Trie_set(mp->trie, key, py_value)) {
00111            PyErr_SetString(PyExc_AssertionError, "error setting trie");
00112            return -1;
00113        }
00114     }
00115     return 0;
00116 }
00117 
00118 static char has_key__doc__[] =
00119 "D.has_key(k) -> 1 if D has a key k, else 0";
00120 
00121 static PyObject *
00122 trie_has_key(trieobject *mp, PyObject *py_key)
00123 {
00124     const char *key;
00125     int has_key;
00126 
00127     /* Make sure key is a string. */
00128     if(!PyString_Check(py_key)) {
00129        PyErr_SetString(PyExc_TypeError, "key must be a string");
00130        return NULL;
00131     }
00132     key = PyString_AS_STRING(py_key);
00133     has_key = Trie_has_key(mp->trie, key);
00134     return PyInt_FromLong((long)has_key);
00135 }
00136 
00137 static PyObject *
00138 trie_has_key_onearg(trieobject *mp, PyObject *py_args)
00139 {
00140     PyObject *py_arg;
00141     if(!PyArg_ParseTuple(py_args, "O", &py_arg))
00142        return NULL;
00143     return trie_has_key(mp, py_arg);
00144 }
00145 
00146 
00147 
00148 static char has_prefix__doc__[] =
00149 "D.has_prefix(k) -> 1 if D has a prefix k, else 0";
00150 
00151 static PyObject *
00152 trie_has_prefix(trieobject *mp, PyObject *py_prefix)
00153 {
00154     const char *prefix;
00155     int has_prefix;
00156 
00157     /* Make sure prefix is a string. */
00158     if(!PyString_Check(py_prefix)) {
00159        PyErr_SetString(PyExc_TypeError, "k must be a string");
00160        return NULL;
00161     }
00162     prefix = PyString_AS_STRING(py_prefix);
00163     has_prefix = Trie_has_prefix(mp->trie, prefix);
00164     return PyInt_FromLong((long)has_prefix);
00165 }
00166 
00167 static PyObject *
00168 trie_has_prefix_onearg(trieobject *mp, PyObject *py_args)
00169 {
00170     PyObject *py_arg;
00171     if(!PyArg_ParseTuple(py_args, "O", &py_arg))
00172        return NULL;
00173     return trie_has_prefix(mp, py_arg);
00174 }
00175 
00176 static char with_prefix__doc__[] =
00177 "D.with_prefix(prefix) -> list of D's keys that begins with prefix";
00178 
00179 static void 
00180 _trie_with_prefix_helper(const char *key, const void *value, void *data) 
00181 {
00182     PyObject *py_list = (PyObject *)data;
00183     PyObject *py_key;
00184 
00185     if(PyErr_Occurred())
00186        return;
00187 
00188     if(!(py_key = PyString_FromString(key)))
00189        return;
00190     PyList_Append(py_list, py_key);
00191     Py_DECREF(py_key);
00192 }
00193 
00194 static PyObject *
00195 trie_with_prefix(trieobject *mp, PyObject *py_prefix)
00196 {
00197     const char *prefix;
00198     PyObject *py_list;
00199 
00200     /* Make sure prefix is a string. */
00201     if(!PyString_Check(py_prefix)) {
00202        PyErr_SetString(PyExc_TypeError, "k must be a string");
00203        return NULL;
00204     }
00205     prefix = PyString_AS_STRING(py_prefix);
00206 
00207     if(!(py_list = PyList_New(0)))
00208        return NULL;
00209     Trie_with_prefix(mp->trie, prefix, 
00210                    _trie_with_prefix_helper, (void *)py_list);
00211     if(PyErr_Occurred()) {
00212        Py_DECREF(py_list);
00213        return NULL;
00214     }
00215     return py_list;
00216 }
00217 
00218 static PyObject *
00219 trie_with_prefix_onearg(trieobject *mp, PyObject *py_args)
00220 {
00221     PyObject *py_arg;
00222     if(!PyArg_ParseTuple(py_args, "O", &py_arg))
00223        return NULL;
00224     return trie_with_prefix(mp, py_arg);
00225 }
00226 
00227 
00228 static char keys__doc__[] =
00229 "D.keys() -> list of D's keys";
00230 
00231 static void 
00232 _trie_keys_helper(const char *key, const void *value, void *data) 
00233 {
00234     PyObject *py_list = (PyObject *)data;
00235     PyObject *py_key;
00236 
00237     if(PyErr_Occurred())
00238        return;
00239 
00240     if(!(py_key = PyString_FromString(key)))
00241        return;
00242     PyList_Append(py_list, py_key);
00243     Py_DECREF(py_key);
00244 }
00245 
00246 static PyObject *
00247 trie_keys(trieobject *mp)
00248 {
00249     PyObject *py_list;
00250 
00251     if(!(py_list = PyList_New(0)))
00252        return NULL;
00253     Trie_iterate(mp->trie, _trie_keys_helper, (void *)py_list);
00254     if(PyErr_Occurred()) {
00255        Py_DECREF(py_list);
00256        return NULL;
00257     }
00258     return py_list;
00259 }
00260 
00261 static PyObject *
00262 trie_keys_noargs(trieobject *mp, PyObject *py_args)
00263 {
00264     if(PyTuple_Size(py_args) != 0) {
00265        PyErr_SetString(PyExc_ValueError, "no args expected");
00266        return NULL;
00267     }
00268     return trie_keys(mp);
00269 }
00270 
00271 static char values__doc__[] =
00272 "D.values() -> list of D's values";
00273 
00274 static void 
00275 _trie_values_helper(const char *key, const void *value, void *data) 
00276 {
00277     PyObject *py_list = (PyObject *)data;
00278     if(PyErr_Occurred())
00279        return;
00280     PyList_Append(py_list, (PyObject *)value);
00281 }
00282 
00283 static PyObject *
00284 trie_values(trieobject *mp)
00285 {
00286     PyObject *py_list;
00287 
00288     if(!(py_list = PyList_New(0)))
00289        return NULL;
00290     Trie_iterate(mp->trie, _trie_values_helper, (void *)py_list);
00291     if(PyErr_Occurred()) {
00292        Py_DECREF(py_list);
00293        return NULL;
00294     }
00295     return py_list;
00296 }
00297 
00298 static PyObject *
00299 trie_values_noargs(trieobject *mp, PyObject *py_args)
00300 {
00301     if(PyTuple_Size(py_args) != 0) {
00302        PyErr_SetString(PyExc_ValueError, "no args expected");
00303        return NULL;
00304     }
00305     return trie_values(mp);
00306 }
00307 
00308 static char get__doc__[] =
00309 "D.get(k[,d]) -> D[k] if D.has_key(k), else d.  d defaults to None.";
00310 
00311 static PyObject *
00312 trie_get(trieobject *mp, PyObject *args)
00313 {
00314     const char *key;
00315     PyObject *py_value;
00316     PyObject *py_failobj = Py_None;
00317 
00318     if (!PyArg_ParseTuple(args, "s|O:get", &key, &py_failobj))
00319        return NULL;
00320     py_value = Trie_get(mp->trie, key);
00321     if(!py_value)
00322        py_value = py_failobj;
00323     Py_INCREF(py_value);
00324     return py_value;
00325 }
00326 
00327 static char get_approximate__doc__[] =
00328 "D.get_approximate(key, k) -> List of (key, value, mismatches) in D, allowing up to k mismatches in key.";
00329 
00330 static void 
00331 _trie_get_approximate_helper(const char *key, const void *value, 
00332                           const int mismatches, void *data)
00333 {
00334     /* Append a tuple of (key, value) to data, which is a PyList. */
00335     PyObject *py_list = (PyObject *)data,
00336        *py_value = (PyObject *)value,
00337        *py_key,
00338        *py_tuple,
00339        *py_mismatches;
00340 
00341     if(PyErr_Occurred())
00342        return;
00343 
00344     if(!(py_key = PyString_FromString(key)))
00345        return;
00346     if(!(py_mismatches = PyInt_FromLong(mismatches))) {
00347        Py_DECREF(py_key);
00348        return;
00349     }
00350     Py_INCREF(py_value);
00351 
00352     if(!(py_tuple = PyTuple_New(3))) {
00353        Py_DECREF(py_key);
00354        Py_DECREF(py_mismatches);
00355        Py_DECREF(py_value);
00356        return;
00357     }
00358     PyTuple_SetItem(py_tuple, 0, py_key);
00359     PyTuple_SetItem(py_tuple, 1, py_value);
00360     PyTuple_SetItem(py_tuple, 2, py_mismatches);
00361     PyList_Append(py_list, py_tuple);
00362     Py_DECREF(py_tuple);
00363 }
00364 
00365 static PyObject *
00366 trie_get_approximate(trieobject *mp, PyObject *args)
00367 {
00368     const char *key;
00369     int k;
00370     PyObject *py_list;
00371 
00372     if (!PyArg_ParseTuple(args, "si:get_approximate", &key, &k))
00373        return NULL;
00374 
00375     if(!(py_list = PyList_New(0)))
00376        return NULL;
00377     Trie_get_approximate(mp->trie, key, k, 
00378                       _trie_get_approximate_helper, (void *)py_list);
00379     if(PyErr_Occurred()) {
00380        Py_DECREF(py_list);
00381        return NULL;
00382     }
00383     return py_list;
00384 }
00385 
00386 static long
00387 trie_nohash(PyObject *self)
00388 {
00389     PyErr_SetString(PyExc_TypeError, "trie objects are unhashable");
00390     return -1;
00391 }
00392 
00393 static PyMappingMethods trie_as_mapping = {
00394 /* The first member of PyMappingMethods was redefined in Python 2.5. */
00395 #if PY_VERSION_HEX < 0x02050000
00396     (inquiry)trie_length,        /*mp_length*/
00397 #else
00398     (lenfunc)trie_length,        /*mp_length*/
00399 #endif
00400     (binaryfunc)trie_subscript,  /*mp_subscript*/
00401     (objobjargproc)trie_ass_sub  /*mp_ass_subscript*/
00402 };
00403 
00404 static PyMethodDef trieobj_methods[] = {
00405     /*  METH_O and METH_NOARGS require Python 2.2.
00406     {"has_key", (PyCFunction)trie_has_key,  METH_O,
00407      has_key__doc__},
00408     {"has_prefix", (PyCFunction)trie_has_prefix,  METH_O,
00409      has_prefix__doc__},
00410     {"with_prefix", (PyCFunction)trie_with_prefix,  METH_O,
00411      with_prefix__doc__},
00412     {"keys",    (PyCFunction)trie_keys,     METH_NOARGS,
00413      keys__doc__},
00414     {"values",  (PyCFunction)trie_values,   METH_NOARGS,
00415      values__doc__},
00416     */
00417 
00418     {"has_key", (PyCFunction)trie_has_key_onearg,  METH_VARARGS,
00419      has_key__doc__},
00420     {"has_prefix", (PyCFunction)trie_has_prefix_onearg,  METH_VARARGS,
00421      has_prefix__doc__},
00422     {"with_prefix", (PyCFunction)trie_with_prefix_onearg,  METH_VARARGS,
00423      with_prefix__doc__},
00424     {"keys",    (PyCFunction)trie_keys_noargs,     METH_VARARGS,
00425      keys__doc__},
00426     {"values",  (PyCFunction)trie_values_noargs,   METH_VARARGS,
00427      values__doc__},
00428 
00429     {"get",     (PyCFunction)trie_get,      METH_VARARGS,
00430      get__doc__},
00431     {"get_approximate",  (PyCFunction)trie_get_approximate,  METH_VARARGS,
00432      get_approximate__doc__},
00433     {NULL, NULL}   /* sentinel */
00434 };
00435 
00436 static PyObject *trie_getattr(PyObject *obj, const char *name)
00437 {
00438     return Py_FindMethod(trieobj_methods, obj, name);
00439 
00440 }
00441 
00442 static PyTypeObject Trie_Type = {
00443     PyObject_HEAD_INIT(NULL)
00444     0,
00445     "trie",
00446     sizeof(trieobject),
00447     0,
00448     trie_dealloc,       /*tp_dealloc*/
00449     0,                  /*tp_print*/
00450     (getattrfunc)trie_getattr,                  /*tp_getattr*/
00451     0,                  /*tp_setattr*/
00452     0,                  /*tp_compare*/
00453     0,                  /*tp_repr*/
00454     0,                  /*tp_as_number*/
00455     0,                  /*tp_as_sequence*/
00456     &trie_as_mapping,   /*tp_as_mapping*/
00457     trie_nohash,        /*tp_hash */
00458 };
00459 
00460 static int
00461 _write_to_handle(const void *towrite, const int length, void *handle)
00462 {
00463     PyObject *py_handle = (PyObject *)handle,
00464        *py_retval = NULL;
00465     int success = 0;
00466 
00467     if(!length)
00468        return 1;
00469 
00470     if(!(py_retval = PyObject_CallMethod(py_handle, "write", "s#", 
00471                                     towrite, length)))
00472        goto _write_to_handle_cleanup;
00473     success = 1;
00474 
00475  _write_to_handle_cleanup:
00476     if(py_retval) {
00477        Py_DECREF(py_retval);
00478     }
00479     return success;
00480 }
00481 
00482 static int _write_value_to_handle(const void *value, void *handle)
00483 {
00484     PyObject *py_value = (PyObject *)value,
00485        *py_marshalled = NULL;
00486     char *marshalled;
00487     Py_ssize_t length;
00488     int success = 0;
00489 
00490 #ifdef Py_MARSHAL_VERSION  
00491     if(!(py_marshalled =   
00492         PyMarshal_WriteObjectToString(py_value, Py_MARSHAL_VERSION)))  
00493         goto _write_value_to_handle_cleanup;  
00494 #else  
00495     if(!(py_marshalled = PyMarshal_WriteObjectToString(py_value)))  
00496         goto _write_value_to_handle_cleanup;  
00497 #endif  
00498     if(PyString_AsStringAndSize(py_marshalled, &marshalled, &length) == -1)
00499        goto _write_value_to_handle_cleanup;
00500     if(!_write_to_handle(&length, sizeof(length), handle))
00501        goto _write_value_to_handle_cleanup;
00502     if (length != (int)length)
00503        goto _write_value_to_handle_cleanup;
00504     if(!_write_to_handle(marshalled, (int)length, handle))
00505        goto _write_value_to_handle_cleanup;
00506     success = 1;
00507 
00508  _write_value_to_handle_cleanup:
00509     if(py_marshalled) {
00510        Py_DECREF(py_marshalled);
00511     }
00512 
00513     return success;
00514 }
00515 
00516 static PyObject *
00517 trie_save(PyObject *self, PyObject *args)
00518 {
00519     PyObject *py_handle,
00520        *py_trie;
00521     trieobject *mp;
00522 
00523     if(!PyArg_ParseTuple(args, "OO:save", &py_handle, &py_trie))
00524         return NULL;
00525     mp = (trieobject *)py_trie;
00526     if(!Trie_serialize(mp->trie, _write_to_handle, _write_value_to_handle, 
00527                      (void *)py_handle)) {
00528        if(!PyErr_Occurred())
00529            PyErr_SetString(PyExc_RuntimeError,
00530                          "saving failed for some reason");
00531        return NULL;
00532     }
00533     Py_INCREF(Py_None);
00534     return Py_None;
00535 }
00536 
00537 static int 
00538 _read_from_handle(void *wasread, const int length, void *handle)
00539 {
00540     PyObject *py_handle = (PyObject *)handle,
00541        *py_retval = NULL;
00542     void *retval;
00543     int success = 0;
00544     PyBufferProcs *buffer;
00545     int segment;
00546     int bytes_read, bytes_left;
00547     
00548     if(!length)
00549        return 1;
00550 
00551     if(!(py_retval = PyObject_CallMethod(py_handle, "read", "i", length)))
00552        goto _read_from_handle_cleanup;
00553     if(!py_retval->ob_type->tp_as_buffer) {
00554        PyErr_SetString(PyExc_ValueError, "read method should return buffer");
00555        goto _read_from_handle_cleanup;
00556     }
00557     if(!(py_retval->ob_type->tp_flags & Py_TPFLAGS_DEFAULT)) {
00558        PyErr_SetString(PyExc_ValueError, "no bf_getcharbuffer slot");
00559        goto _read_from_handle_cleanup;
00560     }
00561     buffer = py_retval->ob_type->tp_as_buffer;
00562     if(!buffer->bf_getreadbuffer) {
00563        PyErr_SetString(PyExc_ValueError, "no bf_getreadbuffer");
00564        goto _read_from_handle_cleanup;
00565     }
00566 
00567     bytes_left = length;
00568     segment = 0;
00569     while(bytes_left > 0) {
00570        if((bytes_read = buffer->bf_getreadbuffer(py_retval, 
00571                                             segment, &retval)) == -1)
00572            goto _read_from_handle_cleanup; 
00573        memcpy(wasread, retval, bytes_read);
00574        wasread = (void *)((char *)wasread + bytes_read);
00575        bytes_left -= bytes_read;
00576        segment += 1;
00577     }
00578 
00579     success = 1;
00580     
00581  _read_from_handle_cleanup:
00582     if(py_retval) {
00583        Py_DECREF(py_retval);
00584     }
00585     return success;
00586 }
00587 
00588 #define MAX_KEY_LENGTH 2000
00589 static void *
00590 _read_value_from_handle(void *handle)
00591 {
00592     Py_ssize_t length;
00593     char KEY[MAX_KEY_LENGTH];
00594 
00595     if(!_read_from_handle((void *)&length, sizeof(length), (void *)handle))
00596        return NULL;
00597     if(length < 0 || length >= MAX_KEY_LENGTH)
00598        return NULL;
00599     if(!_read_from_handle((void *)KEY, length, (void *)handle))
00600        return NULL;
00601     return PyMarshal_ReadObjectFromString(KEY, length);
00602 }
00603 
00604 
00605 static PyObject *
00606 trie_load(PyObject *self, PyObject *args)
00607 {
00608     PyObject *py_handle;
00609     Trie* trie;
00610     trieobject *trieobj;
00611 
00612     if(!PyArg_ParseTuple(args, "O:load", &py_handle))
00613        return NULL;
00614 
00615     if(!(trie = Trie_deserialize(_read_from_handle, _read_value_from_handle, 
00616                              py_handle))) {
00617        if(!PyErr_Occurred())
00618            PyErr_SetString(PyExc_RuntimeError, 
00619                          "loading failed for some reason");
00620        return NULL;
00621     }
00622        
00623     if(!(trieobj = PyObject_New(trieobject, &Trie_Type))) {
00624        Trie_del(trie);
00625        return NULL;
00626     }
00627     trieobj->trie = trie;
00628     return (PyObject *)trieobj;
00629 }
00630 
00631 static PyMethodDef trie_methods[] = {
00632     {"trie", trie_trie, METH_VARARGS, 
00633      "trie() -> new Trie object."},
00634     {"load", trie_load, METH_VARARGS, 
00635      "load(handle) -> trie object"},
00636     {"save", trie_save, METH_VARARGS, 
00637      "save(handle, trie), save a trie object to a handle"},
00638     {NULL, NULL, 0, NULL}
00639 };
00640 
00641 static char trie__doc__[] =
00642 "\
00643 This module implements a trie data structure.  This allows an O(M)\n\
00644 lookup of a string in a dictionary, where M is the length of the\n\
00645 string.  It also supports approximate matches.\n\
00646 \n\
00647 Functions:\n\
00648 trie    Create a new trie object.\n\
00649 save    Save a trie to a handle.\n\
00650 load    Load a trie from a handle.\n\
00651 \n\
00652 ";
00653 
00654 DL_EXPORT(void)
00655 inittrie(void) 
00656 {
00657     Trie_Type.ob_type = &PyType_Type;
00658 
00659     (void) Py_InitModule3("trie", trie_methods, trie__doc__);
00660 }