Back to index

python-biopython  1.60
Classes | Defines | Functions | Variables
cpairwise2module.c File Reference
#include "Python.h"

Go to the source code of this file.

Classes

struct  IndexList

Defines

#define _PRECISION   1000
#define rint(x)   (int)((x)*_PRECISION+0.5)

Functions

static double PyNumber_AsDouble (PyObject *py_num)
double calc_affine_penalty (int length, double open, double extend, int penalize_extend_when_opening)
static void IndexList_init (struct IndexList *il)
static void IndexList_free (struct IndexList *il)
static void IndexList_clear (struct IndexList *il)
static int IndexList_contains (struct IndexList *il, const int row, const int col)
static int IndexList__verify_free_index (struct IndexList *il, int num_needed)
static void IndexList_append (struct IndexList *il, const int row, const int col)
static void IndexList_extend (struct IndexList *il1, struct IndexList *il2)
double _get_match_score (PyObject *py_sequenceA, PyObject *py_sequenceB, PyObject *py_match_fn, int i, int j, char *sequenceA, char *sequenceB, int use_sequence_cstring, double match, double mismatch, int use_match_mismatch_scores)
static PyObject * cpairwise2__make_score_matrix_fast (PyObject *self, PyObject *args)
static PyObject * cpairwise2_rint (PyObject *self, PyObject *args, PyObject *keywds)
void initcpairwise2 (void)

Variables

static PyMethodDef cpairwise2Methods []
static char cpairwise2__doc__ []

Class Documentation

struct IndexList

Definition at line 48 of file cpairwise2module.c.

Class Members
int * indexes
int num_allocated
int num_used

Define Documentation

#define _PRECISION   1000

Definition at line 15 of file cpairwise2module.c.

#define rint (   x)    (int)((x)*_PRECISION+0.5)

Definition at line 16 of file cpairwise2module.c.


Function Documentation

double _get_match_score ( PyObject *  py_sequenceA,
PyObject *  py_sequenceB,
PyObject *  py_match_fn,
int  i,
int  j,
char *  sequenceA,
char *  sequenceB,
int  use_sequence_cstring,
double  match,
double  mismatch,
int  use_match_mismatch_scores 
)

Definition at line 157 of file cpairwise2module.c.

{
    PyObject *py_A=NULL, 
       *py_B=NULL;
    PyObject *py_arglist=NULL, *py_result=NULL;
    double score = 0;

    if(use_sequence_cstring && use_match_mismatch_scores) {
       score = (sequenceA[i] == sequenceB[j]) ? match : mismatch;
       return score;
    }
    /* Calculate the match score. */
    if(!(py_A = PySequence_GetItem(py_sequenceA, i)))
       goto _get_match_score_cleanup;
    if(!(py_B = PySequence_GetItem(py_sequenceB, j)))
       goto _get_match_score_cleanup;
    if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B)))
       goto _get_match_score_cleanup;

    if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist)))
       goto _get_match_score_cleanup;
    score = PyNumber_AsDouble(py_result);
 _get_match_score_cleanup:
    if(py_A) {
       Py_DECREF(py_A);
    }
    if(py_B) {
       Py_DECREF(py_B);
    }
    if(py_arglist) {
       Py_DECREF(py_arglist);
    }
    if(py_result) {
       Py_DECREF(py_result);
    }
    return score;
}

Here is the call graph for this function:

Here is the caller graph for this function:

double calc_affine_penalty ( int  length,
double  open,
double  extend,
int  penalize_extend_when_opening 
)

Definition at line 35 of file cpairwise2module.c.

{
    double penalty;

    if(length <= 0)
       return 0.0;
    penalty = open + extend * length;
    if(!penalize_extend_when_opening)
       penalty -= extend;
    return penalty;
}

Here is the caller graph for this function:

static PyObject* cpairwise2__make_score_matrix_fast ( PyObject *  self,
PyObject *  args 
) [static]

Definition at line 222 of file cpairwise2module.c.

{
    int i;
    int row, col;

    PyObject *py_sequenceA, *py_sequenceB, *py_match_fn;
#if PY_MAJOR_VERSION >= 3
    PyObject *py_bytesA, *py_bytesB;
#endif
    char *sequenceA=NULL, *sequenceB=NULL;
    int use_sequence_cstring;
    double open_A, extend_A, open_B, extend_B;
    int penalize_extend_when_opening, penalize_end_gaps;
    int align_globally, score_only;

    PyObject *py_match=NULL, *py_mismatch=NULL;
    double first_A_gap, first_B_gap;
    double match, mismatch;
    int use_match_mismatch_scores;
    int lenA, lenB;
    double *score_matrix = (double *)NULL;
    struct IndexList *trace_matrix = (struct IndexList *)NULL;
    PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL;

    double *row_cache_score = (double *)NULL, 
       *col_cache_score = (double *)NULL;
    struct IndexList *row_cache_index = (struct IndexList *)NULL, 
       *col_cache_index = (struct IndexList *)NULL;

    PyObject *py_retval = NULL;

    if(!PyArg_ParseTuple(args, "OOOddddiiii", &py_sequenceA, &py_sequenceB,
                      &py_match_fn, &open_A, &extend_A, &open_B, &extend_B,
                      &penalize_extend_when_opening, &penalize_end_gaps,
                      &align_globally, &score_only))
       return NULL;
    if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) {
       PyErr_SetString(PyExc_TypeError, 
                     "py_sequenceA and py_sequenceB should be sequences.");
       return NULL;
    }

    /* Optimize for the common case.  Check to see if py_sequenceA and
       py_sequenceB are strings.  If they are, use the c string
       representation. */
#if PY_MAJOR_VERSION < 3
    use_sequence_cstring = 0;
    if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) {
       sequenceA = PyString_AS_STRING(py_sequenceA);
       sequenceB = PyString_AS_STRING(py_sequenceB);
       use_sequence_cstring = 1;
    }
#else
    py_bytesA = _create_bytes_object(py_sequenceA);
    py_bytesB = _create_bytes_object(py_sequenceB);
    if (py_bytesA && py_bytesB) {
        sequenceA = PyBytes_AS_STRING(py_bytesA);
        sequenceB = PyBytes_AS_STRING(py_bytesB);
       use_sequence_cstring = 1;
    }
    else {
        Py_XDECREF(py_bytesA);
        Py_XDECREF(py_bytesB);
        use_sequence_cstring = 0;
    }
#endif

    if(!PyCallable_Check(py_match_fn)) {
       PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable.");
       return NULL;
    }
    /* Optimize for the common case.  Check to see if py_match_fn is
       an identity_match.  If so, pull out the match and mismatch
       member variables and calculate the scores myself. */
    match = mismatch = 0;
    use_match_mismatch_scores = 0;
    if(!(py_match = PyObject_GetAttrString(py_match_fn, "match")))
        goto cleanup_after_py_match_fn;
    match = PyNumber_AsDouble(py_match);
    if(PyErr_Occurred())
        goto cleanup_after_py_match_fn;
    if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch")))
        goto cleanup_after_py_match_fn;
    mismatch = PyNumber_AsDouble(py_mismatch);
    if(PyErr_Occurred())
        goto cleanup_after_py_match_fn;
    use_match_mismatch_scores = 1;
cleanup_after_py_match_fn:
    if(PyErr_Occurred())
        PyErr_Clear();
    if(py_match) {
        Py_DECREF(py_match);
    }
    if(py_mismatch) {
        Py_DECREF(py_mismatch);
    }

    /* Cache some commonly used gap penalties */
    first_A_gap = calc_affine_penalty(1, open_A, extend_A, 
                                  penalize_extend_when_opening);
    first_B_gap = calc_affine_penalty(1, open_B, extend_B,
                                  penalize_extend_when_opening);

    /* Allocate matrices for storing the results and initalize them. */
    lenA = PySequence_Length(py_sequenceA);
    lenB = PySequence_Length(py_sequenceB);
    score_matrix = malloc(lenA*lenB*sizeof(*score_matrix));
    trace_matrix = malloc(lenA*lenB*sizeof(*trace_matrix));
    if(!score_matrix || !trace_matrix) {
       PyErr_SetString(PyExc_MemoryError, "Out of memory");
       goto _cleanup_make_score_matrix_fast;
    }
    for(i=0; i<lenA*lenB; i++) {
       score_matrix[i] = 0;
       IndexList_init(&trace_matrix[i]);
    }

    /* Initialize the first row and col of the score matrix. */
    for(i=0; i<lenA; i++) {
       double score = _get_match_score(py_sequenceA, py_sequenceB, 
                                   py_match_fn, i, 0,
                                   sequenceA, sequenceB,
                                   use_sequence_cstring,
                                   match, mismatch,
                                   use_match_mismatch_scores);
       if(PyErr_Occurred())
           goto _cleanup_make_score_matrix_fast;
       if(penalize_end_gaps)
           score += calc_affine_penalty(i, open_B, extend_B, 
                                    penalize_extend_when_opening);
       score_matrix[i*lenB] = score;
    }
    for(i=0; i<lenB; i++) {
       double score = _get_match_score(py_sequenceA, py_sequenceB, 
                                   py_match_fn, 0, i,
                                   sequenceA, sequenceB,
                                   use_sequence_cstring,
                                   match, mismatch,
                                   use_match_mismatch_scores);
       if(PyErr_Occurred())
           goto _cleanup_make_score_matrix_fast;
       if(penalize_end_gaps)
           score += calc_affine_penalty(i, open_A, extend_A, 
                                    penalize_extend_when_opening);
       score_matrix[i] = score;
    }

    /* Now initialize the row and col cache. */
    row_cache_score = malloc((lenA-1)*sizeof(*row_cache_score));
    row_cache_index = malloc((lenA-1)* sizeof(*row_cache_index));
    col_cache_score = malloc((lenB-1)*sizeof(*col_cache_score));
    col_cache_index = malloc((lenB-1)* sizeof(*col_cache_index));
    if(!row_cache_score || !row_cache_index || 
       !col_cache_score || !col_cache_index) {
       PyErr_SetString(PyExc_MemoryError, "Out of memory");
       goto _cleanup_make_score_matrix_fast;
    }
    memset((void *)row_cache_score, 0, (lenA-1)*sizeof(*row_cache_score));
    memset((void *)row_cache_index, 0, (lenA-1)*sizeof(*row_cache_index));
    memset((void *)col_cache_score, 0, (lenB-1)*sizeof(*col_cache_score));
    memset((void *)col_cache_index, 0, (lenB-1)*sizeof(*col_cache_index));
    for(i=0; i<lenA-1; i++) {
       row_cache_score[i] = score_matrix[i*lenB] + first_A_gap;
       IndexList_append(&row_cache_index[i], i, 0);
    }
    for(i=0; i<lenB-1; i++) {
       col_cache_score[i] = score_matrix[i] + first_B_gap;
       IndexList_append(&col_cache_index[i], 0, i);
    }

    /* Fill in the score matrix. */
    for(row=1; row<lenA; row++) {
       for(col=1; col<lenB; col++) {
           double nogap_score, row_score, col_score, best_score;
           int best_score_rint;
           struct IndexList *il;

           double score, open_score, extend_score;
           int open_score_rint, extend_score_rint;

           /* Calculate the best score. */
           nogap_score = score_matrix[(row-1)*lenB+col-1];
           if(col > 1) {
              row_score = row_cache_score[row-1];
           } else {
              row_score = nogap_score-1; /* Make sure it's not best score */
           }
           if(row > 1) {
              col_score = col_cache_score[col-1];
           } else {
              col_score = nogap_score-1; /* Make sure it's not best score */
           }

           best_score = (row_score > col_score) ? row_score : col_score;
           if(nogap_score > best_score)
              best_score = nogap_score;
           best_score_rint = rint(best_score);

           /* Set the score and traceback matrices. */
           score = best_score + _get_match_score(py_sequenceA, py_sequenceB, 
                                            py_match_fn, row, col,
                                            sequenceA, sequenceB,
                                            use_sequence_cstring,
                                            match, mismatch,
                                            use_match_mismatch_scores);
           if(PyErr_Occurred())
              goto _cleanup_make_score_matrix_fast;
           if(!align_globally && score < 0)
              score_matrix[row*lenB+col] = 0;
           else
              score_matrix[row*lenB+col] = score;

           il = &trace_matrix[row*lenB+col];
           if(best_score_rint == rint(nogap_score)) {
              IndexList_append(il, row-1, col-1);
           }
           if(best_score_rint == rint(row_score)) {
              IndexList_extend(il, &row_cache_index[row-1]);
           }
           if(best_score_rint == rint(col_score)) {
              IndexList_extend(il, &col_cache_index[col-1]);
           }

           /* Update the cached column scores. */
           open_score = score_matrix[(row-1)*lenB+col-1] + first_B_gap;
           extend_score = col_cache_score[col-1] + extend_B;
           open_score_rint = rint(open_score);
           extend_score_rint = rint(extend_score);
           if(open_score_rint > extend_score_rint) {
              col_cache_score[col-1] = open_score;
              IndexList_clear(&col_cache_index[col-1]);
              IndexList_append(&col_cache_index[col-1], row-1, col-1);
           } else if(extend_score_rint > open_score_rint) {
              col_cache_score[col-1] = extend_score;
           } else {
              col_cache_score[col-1] = open_score;
              if(!IndexList_contains(&col_cache_index[col-1], row-1, col-1))
                  IndexList_append(&col_cache_index[col-1], row-1, col-1);
           }
           
           /* Update the cached row scores. */
           open_score = score_matrix[(row-1)*lenB+col-1] + first_A_gap;
           extend_score = row_cache_score[row-1] + extend_A;
           open_score_rint = rint(open_score);
           extend_score_rint = rint(extend_score);
           if(open_score_rint > extend_score_rint) {
              row_cache_score[row-1] = open_score;
              IndexList_clear(&row_cache_index[row-1]);
              IndexList_append(&row_cache_index[row-1], row-1, col-1);
           } else if(extend_score_rint > open_score_rint) {
              row_cache_score[row-1] = extend_score;
           } else {
              row_cache_score[row-1] = open_score;
              if(!IndexList_contains(&row_cache_index[row-1], row-1, col-1))
                  IndexList_append(&row_cache_index[row-1], row-1, col-1);
           }
       }
    }

    /* Save the score and traceback matrices into real python objects. */
    if(!(py_score_matrix = PyList_New(lenA)))
       goto _cleanup_make_score_matrix_fast;
    if(!(py_trace_matrix = PyList_New(lenA)))
       goto _cleanup_make_score_matrix_fast;
    for(row=0; row<lenA; row++) {
       PyObject *py_score_row, *py_trace_row;
       if(!(py_score_row = PyList_New(lenB)))
           goto _cleanup_make_score_matrix_fast;
       PyList_SET_ITEM(py_score_matrix, row, py_score_row);
       if(!(py_trace_row = PyList_New(lenB)))
           goto _cleanup_make_score_matrix_fast;
       PyList_SET_ITEM(py_trace_matrix, row, py_trace_row);

       for(col=0; col<lenB; col++) {
           int i;
           PyObject *py_score, *py_indexlist;
           int offset = row*lenB + col;
           struct IndexList *il = &trace_matrix[offset];

           /* Set py_score_matrix[row][col] to the score. */
           if(!(py_score = PyFloat_FromDouble(score_matrix[offset])))
              goto _cleanup_make_score_matrix_fast;
           PyList_SET_ITEM(py_score_row, col, py_score);

           if(score_only)
              continue;
           /* Set py_trace_matrix[row][col] to a list of indexes.  On
              the edges of the matrix (row or column is 0), the
              matrix should be [None]. */
           if(!row || !col) {
              if(!(py_indexlist = PyList_New(1)))
                  goto _cleanup_make_score_matrix_fast;
              Py_INCREF(Py_None);
              PyList_SET_ITEM(py_indexlist, 0, Py_None);
           }
           else {
              if(!(py_indexlist = PyList_New(il->num_used)))
                  goto _cleanup_make_score_matrix_fast;
              for(i=0; i<il->num_used; i++) {
                  PyObject *py_index=NULL;
                  int row = il->indexes[i*2],
                     col = il->indexes[i*2+1];
                  if(!(py_index = Py_BuildValue("(ii)", row, col)))
                     goto _cleanup_make_score_matrix_fast;
                  PyList_SET_ITEM(py_indexlist, i, py_index);
              }
           }
           PyList_SET_ITEM(py_trace_row, col, py_indexlist);
       }
    }

    py_retval = Py_BuildValue("(OO)", py_score_matrix, py_trace_matrix);


 _cleanup_make_score_matrix_fast:
    if(score_matrix)
       free(score_matrix);
    if(trace_matrix) {
       for(i=0; i<lenA*lenB; i++)
           IndexList_free(&trace_matrix[i]);
       free(trace_matrix);
    }
    if(row_cache_score)
       free(row_cache_score);
    if(col_cache_score)
       free(col_cache_score);
    if(row_cache_index) {
       for(i=0; i<lenA-1; i++)
           IndexList_free(&row_cache_index[i]);
       free(row_cache_index);
    }
    if(col_cache_index) {
       for(i=0; i<lenB-1; i++) {
           IndexList_free(&col_cache_index[i]);
       }
       free(col_cache_index);
    }
    if(py_score_matrix) {
       Py_DECREF(py_score_matrix);
    }
    if(py_trace_matrix) {
       Py_DECREF(py_trace_matrix);
    }
#if PY_MAJOR_VERSION >= 3
    if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA);
    if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB);
#endif

    return py_retval;
}

Here is the call graph for this function:

static PyObject* cpairwise2_rint ( PyObject *  self,
PyObject *  args,
PyObject *  keywds 
) [static]

Definition at line 574 of file cpairwise2module.c.

{
    double x;
    int precision = _PRECISION;
    int rint_x;

    static char *kwlist[] = {"x", "precision", NULL};

    if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist,
                                &x, &precision))
       return NULL;
    rint_x = (int)(x * precision + 0.5);
#if PY_MAJOR_VERSION >= 3
    return PyLong_FromLong((long)rint_x);
#else
    return PyInt_FromLong((long)rint_x);
#endif
}
static int IndexList__verify_free_index ( struct IndexList il,
int  num_needed 
) [static]

Definition at line 88 of file cpairwise2module.c.

{
    int *indexes;
    int num_to_allocate;
    int num_bytes;

    if(il->num_allocated >= num_needed)
       return 1;

    /* Nearly all the cases are for list of length 1 or 2.
       Empirically, the code seems to run fastest when I allocate the
       exact amount for these. */
    if(num_needed <= 2)
       num_to_allocate = num_needed;
    else 
       num_to_allocate = num_needed*2;
    num_bytes = num_to_allocate*sizeof(int)*2;
    if(!(indexes = realloc((void *)il->indexes, num_bytes))) {
       PyErr_SetString(PyExc_MemoryError, "Out of memory");
       return 0;
    }
    il->indexes = indexes;
    il->num_allocated = num_to_allocate;
    return 1;
}

Here is the caller graph for this function:

static void IndexList_append ( struct IndexList il,
const int  row,
const int  col 
) [static]

Definition at line 114 of file cpairwise2module.c.

{
    int i;
    if(!IndexList__verify_free_index(il, il->num_used+1))
       return;
    i=il->num_used*2;
    il->indexes[i] = row;
    il->indexes[i+1] = col;
    il->num_used += 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void IndexList_clear ( struct IndexList il) [static]

Definition at line 69 of file cpairwise2module.c.

{
    il->num_used = 0;
}

Here is the caller graph for this function:

static int IndexList_contains ( struct IndexList il,
const int  row,
const int  col 
) [static]

Definition at line 74 of file cpairwise2module.c.

{
    int i;
    int stop;

    stop = il->num_used*2;
    for(i=0; i<stop; i+=2) {
       if((il->indexes[i] == row) && (il->indexes[i+1] == col))
           return 1;
    }
    return 0;
}

Here is the caller graph for this function:

static void IndexList_extend ( struct IndexList il1,
struct IndexList il2 
) [static]

Definition at line 126 of file cpairwise2module.c.

{
    int i1, i2;
    int stop;

    if(!IndexList__verify_free_index(il1, il1->num_used+il2->num_used))
       return;
    stop=il2->num_used * 2;
    for(i1=il1->num_used*2, i2=0; i2<stop; i1+=2, i2+=2) {
       il1->indexes[i1] = il2->indexes[i2];
       il1->indexes[i1+1] = il2->indexes[i2+1];
    }
    il1->num_used += il2->num_used;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void IndexList_free ( struct IndexList il) [static]

Definition at line 62 of file cpairwise2module.c.

{
    if(il->indexes)
       free(il->indexes);
    IndexList_init(il);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void IndexList_init ( struct IndexList il) [static]

Definition at line 54 of file cpairwise2module.c.

{
    /* il->num_used = 0;
    il->num_allocated = 0;
    il->indexes = NULL; */
    memset((void *)il, 0, sizeof(struct IndexList));
}

Here is the caller graph for this function:

void initcpairwise2 ( void  )

Definition at line 628 of file cpairwise2module.c.

{
#if PY_MAJOR_VERSION >= 3
    PyObject* module = PyModule_Create(&moduledef);
    if (module==NULL) return NULL;
    return module;
#else
    (void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__);
#endif
}
static double PyNumber_AsDouble ( PyObject *  py_num) [static]

Definition at line 21 of file cpairwise2module.c.

{
    double val;
    PyObject *floatobj;

    if((floatobj = PyNumber_Float(py_num)) == NULL)
       return(0.0);
    val = PyFloat_AsDouble(floatobj);
    Py_DECREF(floatobj);
    return val;
}

Here is the caller graph for this function:


Variable Documentation

char cpairwise2__doc__[] [static]
Initial value:
"XXX document here\n\
\n\
"

Definition at line 603 of file cpairwise2module.c.

Initial value:
 {
    {"_make_score_matrix_fast", 
     (PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""},
    {"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""},
    {NULL, NULL, 0, NULL}
}

Definition at line 596 of file cpairwise2module.c.