Back to index

python-biopython  1.60
clustermodule.c
Go to the documentation of this file.
00001 #include "Python.h"
00002 #include "numpy/arrayobject.h"
00003 #include <stdio.h>
00004 #include <string.h>
00005 #include <float.h>
00006 #include "cluster.h"
00007 
00008 
00009 /* Must define Py_TYPE for Python 2.5 or older */
00010 #ifndef Py_TYPE
00011 #  define Py_TYPE(o) ((o)->ob_type)
00012 #endif
00013 
00014 /* Must define PyVarObject_HEAD_INIT for Python 2.5 or older */
00015 #ifndef PyVarObject_HEAD_INIT
00016 #define PyVarObject_HEAD_INIT(type, size)       \
00017         PyObject_HEAD_INIT(type) size,
00018 #endif
00019 
00020 /* ========================================================================== */
00021 /* -- Helper routines ------------------------------------------------------- */
00022 /* ========================================================================== */
00023 
00024 static int
00025 distance_converter(PyObject* object, void* pointer)
00026 { char c;
00027   const char* data;
00028   const char known_distances[] = "ebcauxsk";
00029 #if PY_MAJOR_VERSION < 3
00030   if (PyString_Check(object))
00031       data = PyString_AsString(object);
00032   else
00033 #endif
00034   if (PyUnicode_Check(object))
00035       data = PyUnicode_AS_DATA(object);
00036   else
00037   { PyErr_SetString(PyExc_ValueError, "distance should be a string");
00038     return 0;
00039   }
00040   if (strlen(data)!=1)
00041   { PyErr_SetString(PyExc_ValueError, "distance should be a single character");
00042     return 0;
00043   }
00044   c = data[0];
00045   if (!strchr(known_distances, c))
00046   { PyErr_Format(PyExc_ValueError, "unknown distance function specified (should be one of '%s')", known_distances);
00047     return 0;
00048   }
00049   *((char*)pointer) = c;
00050   return 1;
00051 }
00052 
00053 static int
00054 method_treecluster_converter(PyObject* object, void* pointer)
00055 { char c;
00056   const char* data;
00057   const char known_methods[] = "csma";
00058 #if PY_MAJOR_VERSION < 3
00059   if (PyString_Check(object))
00060       data = PyString_AsString(object);
00061   else
00062 #endif
00063   if (PyUnicode_Check(object))
00064       data = PyUnicode_AS_DATA(object);
00065   else
00066   { PyErr_SetString(PyExc_ValueError, "method should be a string");
00067     return 0;
00068   }
00069   if (strlen(data)!=1)
00070   { PyErr_SetString(PyExc_ValueError, "method should be a single character");
00071     return 0;
00072   }
00073   c = data[0];
00074   if (!strchr(known_methods, c))
00075   { PyErr_Format(PyExc_ValueError, "unknown method function specified (should be one of '%s')", known_methods);
00076     return 0;
00077   }
00078   *((char*)pointer) = c;
00079   return 1;
00080 }
00081 
00082 static int
00083 method_kcluster_converter(PyObject* object, void* pointer)
00084 { char c;
00085   const char* data;
00086   const char known_methods[] = "am";
00087 #if PY_MAJOR_VERSION < 3
00088   if (PyString_Check(object))
00089       data = PyString_AsString(object);
00090   else
00091 #endif
00092   if (PyUnicode_Check(object))
00093       data = PyUnicode_AS_DATA(object);
00094   else
00095   { PyErr_SetString(PyExc_ValueError, "method should be a string");
00096     return 0;
00097   }
00098   if (strlen(data)!=1)
00099   { PyErr_SetString(PyExc_ValueError, "method should be a single character");
00100     return 0;
00101   }
00102   c = data[0];
00103   if (!strchr(known_methods, c))
00104   { PyErr_Format(PyExc_ValueError, "unknown method function specified (should be one of '%s')", known_methods);
00105     return 0;
00106   }
00107   *((char*)pointer) = c;
00108   return 1;
00109 }
00110 
00111 static int
00112 method_clusterdistance_converter(PyObject* object, void* pointer)
00113 { char c;
00114   const char* data;
00115   const char known_methods[] = "amsxv";
00116 #if PY_MAJOR_VERSION < 3
00117   if (PyString_Check(object))
00118       data = PyString_AsString(object);
00119   else
00120 #endif
00121   if (PyUnicode_Check(object))
00122       data = PyUnicode_AS_DATA(object);
00123   else
00124   { PyErr_SetString(PyExc_ValueError, "method should be a string");
00125     return 0;
00126   }
00127   if (strlen(data)!=1)
00128   { PyErr_SetString(PyExc_ValueError, "method should be a single character");
00129     return 0;
00130   }
00131   c = data[0];
00132   if (!strchr(known_methods, c))
00133   { PyErr_Format(PyExc_ValueError, "unknown method function specified (should be one of '%s')", known_methods);
00134     return 0;
00135   }
00136   *((char*)pointer) = c;
00137   return 1;
00138 }
00139 /* -- data ------------------------------------------------------------------ */
00140 
00141 static double**
00142 parse_data(PyObject* object, PyArrayObject** array)
00143 /* Takes the Python object from the argument list, and finds the microarray
00144  * data set. In case of an error, the array is DECREF'ed and set to NULL. */
00145 { int i, j;
00146   int nrows, ncols;
00147   double** data = NULL;
00148   if(!PyArray_Check(object)) /* Try to convert object to a 2D double array */
00149   { *array = (PyArrayObject*) PyArray_FromObject(object, NPY_DOUBLE, 2, 2);
00150     if (*array==NULL)
00151     { PyErr_SetString(PyExc_TypeError, "data cannot be converted to needed array.");
00152       return NULL;
00153     }
00154   }
00155   else /* User passed an array */
00156   { *array = (PyArrayObject*) object;
00157     /* Check number of dimensions */
00158     if (PyArray_NDIM(*array) == 2) Py_INCREF(object);
00159     else
00160     { PyErr_Format(PyExc_ValueError,
00161                    "data has incorrect rank (%d expected 2)",
00162                    PyArray_NDIM(*array));
00163       *array = NULL;
00164       return NULL;
00165     }
00166     if (PyArray_TYPE(*array) != NPY_DOUBLE) /* Cast to type double */
00167     { *array = (PyArrayObject*) PyArray_Cast(*array, NPY_DOUBLE);
00168       Py_DECREF(object);
00169       if (!(*array))
00170       { PyErr_SetString(PyExc_ValueError,
00171                         "data cannot be cast to needed type.");
00172         return NULL;
00173       }
00174     }
00175   }
00176   nrows = (int) PyArray_DIM(*array, 0);
00177   ncols = (int) PyArray_DIM(*array, 1);
00178   if (nrows != PyArray_DIM(*array, 0) || ncols != PyArray_DIM(*array, 1))
00179   { PyErr_SetString(PyExc_ValueError, "data matrix is too large");
00180     Py_DECREF((PyObject*) (*array));
00181     *array = NULL;
00182     return NULL;
00183   }
00184   if (nrows < 1 || ncols < 1)
00185   { PyErr_SetString(PyExc_ValueError, "data is an empty matrix");
00186     Py_DECREF((PyObject*) (*array));
00187     *array = NULL;
00188     return NULL;
00189   }
00190   data = malloc(nrows*sizeof(double*));
00191   if (PyArray_STRIDE(*array, 1)==sizeof(double)) /* Each row is contiguous */
00192   { const char* p = PyArray_BYTES(*array);
00193     const npy_intp stride =  PyArray_STRIDE(*array, 0);
00194     for (i=0; i < nrows; i++, p+=stride) data[i] = (double*)p;
00195   }
00196   else /* We need to create contiguous rows */
00197   { const char* p0 = PyArray_BYTES(*array);
00198     const npy_intp rowstride =  PyArray_STRIDE(*array, 0);
00199     const npy_intp colstride =  PyArray_STRIDE(*array, 1);
00200     for (i=0; i < nrows; i++)
00201     { const char* p = p0;
00202       data[i] = malloc(ncols*sizeof(double));
00203       for (j=0; j < ncols; j++, p+=colstride) data[i][j] = *((double*)p);
00204       p0 += rowstride;
00205     }
00206   }
00207   return data;
00208 }
00209 
00210 static void
00211 free_data(PyArrayObject* array, double** data)
00212 { if(data[0]!=PyArray_DATA(array))
00213   { npy_intp i;
00214     npy_intp nrows = PyArray_DIM(array, 0);
00215     for (i=0; i<nrows; i++) free(data[i]);
00216   }
00217   free(data);
00218   Py_DECREF((PyObject*) array);
00219 }
00220 
00221 /* -- mask ------------------------------------------------------------------ */
00222 
00223 static int**
00224 parse_mask(PyObject* object, PyArrayObject** array,
00225            const npy_intp dimensions[2])
00226 { int i, j;
00227   const int nrows = dimensions[0];
00228   const int ncolumns = dimensions[1];
00229   int** mask;
00230   if (object==NULL) /* Return the default mask */
00231   { mask = malloc(nrows*sizeof(int*));
00232     for (i=0; i<nrows; i++)
00233     { mask[i] = malloc(ncolumns*sizeof(int));
00234       for (j=0; j<ncolumns; j++) mask[i][j] = 1;
00235     }
00236     *array = NULL;
00237     return mask;
00238   }
00239   if(!PyArray_Check(object)) /* Try to convert object to a 2D double array */
00240   { *array = (PyArrayObject*) PyArray_FromObject(object, NPY_INT, 2, 2);
00241     if (!(*array))
00242     { PyErr_SetString(PyExc_TypeError, "mask cannot be converted to needed array");
00243       return NULL;
00244     }
00245   }
00246   else /* User passed an array */
00247   { *array = (PyArrayObject*) object;
00248     if(PyArray_NDIM(*array) != 2) /* Checking number of dimensions */
00249     { PyErr_Format(PyExc_ValueError, "mask has incorrect rank (%d expected 2)", PyArray_NDIM(*array));
00250       *array = NULL;
00251       return NULL;
00252     }
00253     if (PyArray_TYPE(*array) == NPY_INT) Py_INCREF(object);
00254     else
00255     { *array = (PyArrayObject*) PyArray_Cast(*array, NPY_INT);
00256       if (!(*array))
00257       { PyErr_SetString(PyExc_ValueError, "mask cannot be cast to needed type.");
00258         return NULL;
00259       }
00260     } 
00261   }
00262   if(PyArray_DIM(*array, 0) != nrows) /* Checking number of rows */
00263   { PyErr_Format(PyExc_ValueError,
00264                  "mask has incorrect number of rows (%" NPY_INTP_FMT " expected %d)", PyArray_DIM(*array, 0), nrows);
00265     Py_DECREF((PyObject*)*array);
00266     *array = NULL;
00267     return NULL;
00268   }
00269   /* no checking on last dimension of expected size 1 */
00270   if (ncolumns != 1 && PyArray_DIM(*array, 1) != ncolumns)
00271   { PyErr_Format(PyExc_ValueError,
00272                  "mask incorrect number of columns (%" NPY_INTP_FMT " expected %d)", PyArray_DIM(*array, 1), ncolumns);
00273     *array = NULL;
00274     return NULL;
00275   }
00276   /* All checks OK */
00277   mask = malloc(nrows*sizeof(int*));
00278   if (PyArray_STRIDE(*array, 1)==sizeof(int)) /* Each row is contiguous */
00279   { const char* p = PyArray_BYTES(*array);
00280     const npy_intp stride = PyArray_STRIDE(*array, 0); /* to go to the next row */
00281     for (i=0; i < nrows; i++, p+=stride) mask[i] = (int*)p;
00282   }
00283   else /* We need to create contiguous rows */
00284   { const char* p0 = PyArray_BYTES(*array);
00285     const npy_intp rowstride =  PyArray_STRIDE(*array, 0);
00286     const npy_intp colstride =  PyArray_STRIDE(*array, 1);
00287     for (i=0; i < nrows; i++)
00288     { const char* p = p0;
00289       mask[i] = malloc(ncolumns*sizeof(int));
00290       for (j=0; j < ncolumns; j++, p+=colstride) mask[i][j] = *((int*)p);
00291       p0 += rowstride;
00292     }
00293   }
00294   return mask;
00295 }
00296 
00297 static void
00298 free_mask(PyArrayObject* array, int** mask, int nrows)
00299 { int i;
00300   if (array)
00301   { if(mask[0]!=PyArray_DATA(array)) for (i=0; i<nrows; i++) free(mask[i]);
00302     Py_DECREF((PyObject*) array);
00303   } else for (i=0; i<nrows; i++) free(mask[i]);
00304   free(mask);
00305   return;
00306 }
00307 
00308 /* -- weight ---------------------------------------------------------------- */
00309 
00310 static double*
00311 parse_weight(PyObject* object, PyArrayObject** array, const int ndata)
00312 { int i;
00313   double* weight = NULL;
00314   if (object==NULL) /* Return the default weights */
00315   { weight = malloc(ndata*sizeof(double));
00316     for (i = 0; i < ndata; i++) weight[i] = 1.0;
00317     *array = NULL;
00318     return weight;
00319   }
00320   if(!PyArray_Check(object)) /* Try to convert object to a 1D double array */
00321   { *array = (PyArrayObject*) PyArray_FromObject(object, NPY_DOUBLE, 1, 1);
00322     if (!(*array))
00323     { PyErr_SetString(PyExc_TypeError,
00324                       "weight cannot be converted to needed array.");
00325       return NULL;
00326     }
00327   }
00328   else
00329   { *array = (PyArrayObject*) object;
00330     if (PyArray_TYPE(*array) == NPY_DOUBLE) Py_INCREF(object);
00331     else
00332     { *array = (PyArrayObject*)PyArray_Cast(*array, NPY_DOUBLE);
00333       if (!(*array))
00334       { PyErr_SetString(PyExc_ValueError,
00335                         "weight cannot be cast to needed type.");
00336         return NULL;
00337       }
00338     }
00339   }
00340   if(PyArray_NDIM(*array) == 1) /* Checking number of dimensions */
00341   { /* no checking on last dimension of expected size 1 */
00342     if (ndata!=1 && ndata!=PyArray_DIM(*array, 0)) 
00343     { PyErr_Format(PyExc_ValueError,
00344               "weight has incorrect extent (%" NPY_INTP_FMT " expected %d)",
00345               PyArray_DIM(*array, 0), ndata);
00346       Py_DECREF(*array);
00347       *array = NULL;
00348       return NULL;
00349     }
00350   }
00351   else
00352   { if (PyArray_NDIM(*array) > 0 || ndata != 1)
00353     { PyErr_Format(PyExc_ValueError,
00354                    "weight has incorrect rank (%d expected 1)",
00355                    PyArray_NDIM(*array));
00356       Py_DECREF(*array);
00357       *array = NULL;
00358       return NULL;
00359     }
00360   }
00361   /* All checks OK */
00362   if (PyArray_ISCONTIGUOUS(*array)) weight = PyArray_DATA(*array);
00363   else
00364   { const char* p = PyArray_BYTES(*array);
00365     const npy_intp stride = PyArray_STRIDE(*array, 0);
00366     weight = malloc(ndata*sizeof(double));
00367     for (i = 0; i < ndata; i++, p += stride) weight[i] = *(double*)p;
00368   }
00369   return weight;
00370 }
00371 
00372 static void
00373 free_weight(PyArrayObject* array, double* weight)
00374 { if (array)
00375   { if (weight!=PyArray_DATA(array)) free(weight);
00376     Py_DECREF((PyObject*) array);
00377   } else free(weight);
00378   return;
00379 }
00380 
00381 /* -- initialid ------------------------------------------------------------- */
00382 
00383 static PyArrayObject*
00384 parse_initialid(PyObject* object, int* nclusters, npy_intp nitems)
00385 /* This function creates the clusterid variable for the kcluster and kmedoids
00386  * routines, and fills it with the initial clustering solution if specified
00387  * by the user in object. */
00388 { npy_intp i;
00389   npy_intp stride;
00390   const char* p;
00391   int* q;
00392   int* number;
00393   PyArrayObject* array;
00394   /* -- First we create the clusterid variable ------------------------ */
00395   PyArrayObject* clusterid =
00396     (PyArrayObject*) PyArray_SimpleNew(1, &nitems, NPY_INT);
00397   if (!clusterid)
00398   { PyErr_SetString(PyExc_MemoryError, "could not create clusterid array");
00399     return NULL;
00400   }
00401   /* -- If the user didn't specify an initial clustering, we're done -- */
00402   if (object==NULL) return clusterid;
00403   /* -- Check if the specified object is an array --------------------- */
00404   if(!PyArray_Check(object))
00405   { array = (PyArrayObject*) PyArray_FromObject(object, NPY_INT,1,1);
00406     if (!array)
00407     { PyErr_SetString(PyExc_TypeError,
00408                       "initialid cannot be converted to needed array.");
00409       Py_DECREF((PyObject*) clusterid);
00410       return NULL;
00411     }
00412   }
00413   else
00414   { array = (PyArrayObject*) object;
00415     /* -- Check if the array contains integers ------------------------ */
00416     if (PyArray_TYPE(array) == NPY_INT) Py_INCREF(object);
00417     else
00418     { array = (PyArrayObject*) PyArray_Cast(array, NPY_INT);
00419       if (!array)
00420       { PyErr_SetString(PyExc_ValueError,
00421                         "initialid cannot be cast to needed type.");
00422         Py_DECREF((PyObject*) clusterid);
00423         return NULL;
00424       }
00425     } 
00426   }
00427   /* -- Check the size of the array ----------------------------------- */
00428   if(PyArray_NDIM(array) == 1)
00429   { /* no checking on last dimension of expected size 1 */
00430     if (nitems!=1 && nitems!=PyArray_DIM(array, 0)) 
00431     { PyErr_Format(PyExc_ValueError,
00432               "initialid has incorrect extent (%" NPY_INTP_FMT
00433               " expected %" NPY_INTP_FMT ")",
00434               PyArray_DIM(array, 0), nitems);
00435       Py_DECREF((PyObject*) array);
00436       Py_DECREF((PyObject*) clusterid);
00437       return NULL;
00438     }
00439   }
00440   else
00441   { if (PyArray_NDIM(array) > 0 || nitems != 1)
00442     { PyErr_Format(PyExc_ValueError,
00443                    "initialid has incorrect rank (%d expected 1)",
00444                    PyArray_NDIM(array));
00445       Py_DECREF((PyObject*) array);
00446       Py_DECREF((PyObject*) clusterid);
00447       return NULL;
00448     }
00449   }
00450   /* -- The array seems to be OK. Count the number of clusters -------- */
00451   *nclusters = -1;
00452   stride = PyArray_STRIDE(array, 0);
00453   p = PyArray_BYTES(array);
00454   for (i = 0; i < nitems; i++, p+=stride)
00455   { const int j = *((int*)p);
00456     if (j > *nclusters) *nclusters = j;
00457     if (j < 0)
00458     { PyErr_SetString(PyExc_ValueError,
00459                       "initialid contains a negative cluster number");
00460       Py_DECREF((PyObject*) array);
00461       Py_DECREF((PyObject*) clusterid);
00462       return NULL;
00463     }
00464   }
00465   (*nclusters)++; /* One more than the highest cluster index */
00466   /* Count the number of items in each cluster */
00467   number = calloc(*nclusters,sizeof(int));
00468   p = PyArray_BYTES(array);
00469   q = PyArray_DATA(clusterid);
00470   for (i = 0; i < nitems; i++, p+=stride, q++)
00471   { *q = *((int*)p);
00472     number[*q]++;
00473   }
00474   /* Check if any clusters are empty */
00475   for (i = 0; i < (*nclusters); i++) if(number[i]==0) break;
00476   free(number);
00477   Py_DECREF((PyObject*) array);
00478   if (i < (*nclusters)) /* Due to the break above */
00479   { PyErr_Format(PyExc_ValueError,
00480                  "argument initialid: Cluster %" NPY_INTP_FMT " is empty", i);
00481     Py_DECREF((PyObject*) clusterid);
00482     return NULL;
00483   }
00484   return clusterid;
00485 }
00486 
00487 /* -- clusterid ------------------------------------------------------------- */
00488 
00489 static int*
00490 parse_clusterid(PyObject* object, PyArrayObject** array, unsigned int nitems,
00491   int* nclusters)
00492 /* This function reads the cluster assignments of all items from object */
00493 { unsigned int i;
00494   int j;
00495   npy_intp stride;
00496   const char* p;
00497   int* number;
00498   int* clusterid;
00499   /* -- Default is to assign all items to the same cluster ------------ */
00500   if (object==NULL)
00501   { clusterid = calloc(nitems, sizeof(int));
00502     *array = NULL;
00503     *nclusters = 1;
00504     return clusterid;
00505   }
00506   /* -- The user specified something. Let's see if it is an array ----- */
00507   if(!PyArray_Check(object))
00508   { *array = (PyArrayObject*) PyArray_FromObject(object, NPY_INT, 1, 1);
00509     if (!(*array))
00510     { PyErr_SetString(PyExc_TypeError,
00511                       "clusterid cannot be converted to needed array.");
00512       return NULL;
00513     }
00514   }
00515   else
00516   { *array = (PyArrayObject*) object;
00517     /* -- Check if the array contains integers ------------------------ */
00518     if (PyArray_TYPE(*array) == NPY_INT) Py_INCREF(object);
00519     else
00520     { *array = (PyArrayObject*) PyArray_Cast(*array, NPY_INT);
00521       if (!(*array))
00522       { PyErr_SetString(PyExc_ValueError,
00523                         "clusterid cannot be cast to needed type.");
00524         return NULL;
00525       }
00526     } 
00527   }
00528   /* -- Check the array size ------------------------------------------ */
00529   if(PyArray_NDIM(*array) == 1)
00530   { /* no checking on last dimension of expected size 1 */
00531     if (nitems!=1 && nitems!=PyArray_DIM(*array, 0)) 
00532     { PyErr_Format(PyExc_ValueError,
00533               "clusterid has incorrect extent (%" NPY_INTP_FMT " expected %d)",
00534               PyArray_DIM(*array, 0), nitems);
00535       Py_DECREF((PyObject*) (*array));
00536       return NULL;
00537     }
00538   }
00539   else if (PyArray_NDIM(*array) > 0 || nitems != 1)
00540   { PyErr_Format(PyExc_ValueError,
00541                  "clusterid has incorrect rank (%d expected 1)",
00542                  PyArray_NDIM(*array));
00543     Py_DECREF((PyObject*) (*array));
00544     return NULL;
00545   }
00546   /* -- The array seems to be OK. Count the number of clusters -------- */
00547   stride = PyArray_STRIDE(*array, 0);
00548   p = PyArray_BYTES(*array);
00549   *nclusters = -1;
00550   for (i = 0; i < nitems; i++, p+=stride)
00551   { j = (*(int*)p);
00552     if (j > *nclusters) *nclusters = j;
00553     if (j < 0)
00554     { PyErr_SetString(PyExc_ValueError,
00555                       "clusterid contains an invalid cluster number");
00556       Py_DECREF((PyObject*) (*array));
00557       return NULL;
00558     }
00559   }
00560   (*nclusters)++;
00561   /* -- Count the number of items in each cluster --------------------- */
00562   number = calloc(*nclusters, sizeof(int));
00563   p = PyArray_BYTES(*array);
00564   for (i = 0; i < nitems; i++, p+=stride)
00565   { j = *((int*)p);
00566     number[j]++;
00567   }
00568   for (j = 0; j < (*nclusters); j++) if(number[j]==0) break;
00569   free(number);
00570   if (j < (*nclusters))
00571   { PyErr_Format(PyExc_ValueError,
00572                  "argument initialid: Cluster %d is empty", j);
00573     Py_DECREF((PyObject*) (*array));
00574     return NULL;
00575   }
00576   /* All checks OK */
00577   if (PyArray_ISCONTIGUOUS(*array)) clusterid = PyArray_DATA(*array);
00578   else
00579   { const char* p = PyArray_BYTES(*array);
00580     stride =  PyArray_STRIDE(*array, 0);
00581     clusterid = malloc(nitems*sizeof(int));
00582     for (i = 0; i < nitems; i++, p += stride) clusterid[i] = *(int*)p;
00583   }
00584   return clusterid;
00585 }
00586 
00587 static void
00588 free_clusterid(PyArrayObject* array, int* clusterid)
00589 { if (array)
00590   { if (clusterid!=PyArray_DATA(array)) free(clusterid);
00591     Py_DECREF((PyObject*) array);
00592   } else free(clusterid);
00593 }
00594 
00595 /* -- distance -------------------------------------------------------------- */
00596 
00597 static void
00598 free_distances(PyObject* object, PyArrayObject* array, double** distance, int n)
00599 { int i;
00600   if (array==NULL) /* User passed a lower-triangular matrix as a list of rows */
00601   { for (i = 1; i < n; i++)
00602     { PyObject* row = PyList_GET_ITEM(object, i);
00603       if (PyArray_Check(row))
00604       { PyArrayObject* a = (PyArrayObject*)row;
00605         if (distance[i] == PyArray_DATA(a))
00606         { Py_DECREF(row);
00607           continue;
00608         }
00609       }
00610       free(distance[i]);
00611     }
00612   }
00613   else
00614   { if (PyArray_NDIM(array) == 1)
00615     { const npy_intp stride = PyArray_STRIDE(array, 0);
00616       if (stride!=sizeof(double))
00617         for (i = 1; i < n; i++) free(distance[i]);
00618     }
00619     else
00620     { const npy_intp stride = PyArray_STRIDE(array, 1);
00621       if (stride!=sizeof(double))
00622         for (i = 1; i < n; i++) free(distance[i]);
00623     }
00624     Py_DECREF((PyObject*) array);
00625   }
00626   free(distance);
00627 }
00628 
00629 static double**
00630 parse_distance(PyObject* object, PyArrayObject** array, int* n)
00631 /* Takes the Python object from the argument list, and finds the distance
00632  * matrix. In case of an error, the array is DECREF'ed and set to NULL. */
00633 { int i, j;
00634   double** distance = NULL;
00635   if(!PyArray_Check(object))
00636   { /* Convert object to a 1D or 2D array of type double */
00637     *array = (PyArrayObject*) PyArray_FromObject(object, NPY_DOUBLE, 1, 2);
00638     if (*array==NULL)
00639     { /* This is not necessarily an error; the user may have passed the
00640        * the lower-triangular matrix as a list of rows. Clear the error
00641        * indicator set by PyArrayFromObject first. */
00642       PyErr_Clear();
00643       if (!PyList_Check(object))
00644       { PyErr_SetString(PyExc_TypeError,
00645                         "distance cannot be converted to needed array.");
00646         *n = 0;
00647         return NULL;
00648       }
00649       *n = PyList_GET_SIZE(object);
00650       distance = malloc((*n)*sizeof(double*));
00651       if (!distance)
00652       { PyErr_SetString(PyExc_MemoryError, "failed to store distance matrix.");
00653         *n = 0;
00654         return NULL;
00655       }
00656       for (i = 0; i < *n; i++)
00657       { PyObject* row = PyList_GET_ITEM(object, i);
00658         if (PyArray_Check(row))
00659         { PyArrayObject* a = (PyArrayObject*)row;
00660           if (PyArray_NDIM(a) != 1)
00661           { PyErr_Format(PyExc_ValueError,
00662                          "Row %d in the distance matrix is not one-dimensional.",
00663                          i);
00664             break;
00665           }
00666           if (PyArray_DIM(a, 0) != i)
00667           { PyErr_Format(PyExc_ValueError,
00668                          "Row %d in the distance matrix has incorrect size (%" NPY_INTP_FMT ", should be %d).", i, PyArray_DIM(a, 0), i);
00669             break;
00670           }
00671           if (i==0) continue;
00672           if (PyArray_TYPE(a) == NPY_DOUBLE)
00673           { const npy_intp stride = PyArray_STRIDE(a, 0);
00674             if (stride==sizeof(double)) /* Row is contiguous */
00675             { Py_INCREF(row);
00676               distance[i] = PyArray_DATA(a);
00677             }
00678             else
00679             { const char* p = PyArray_BYTES(a);
00680               distance[i] = malloc(i*sizeof(double));
00681               if(!distance[i])
00682               { Py_DECREF((PyObject*)a);
00683                 PyErr_Format(PyExc_MemoryError,
00684                              "failed to store row %d in the distance matrix.",
00685                              i);
00686                 break;
00687               }
00688               for (j=0; j < i; j++, p+=stride) distance[i][j] = *((double*)p);
00689             }
00690           }
00691           else
00692           { row = PyArray_ContiguousFromObject(row, NPY_DOUBLE, 1, 1);
00693             if (!row)
00694             { PyErr_Format(PyExc_MemoryError,
00695                            "Failed to cast row %d in the distance matrix to double precision.", i);
00696               break;
00697             }
00698             else
00699             { const double* p;
00700               a = (PyArrayObject*)row;
00701               p = PyArray_DATA(a);
00702               distance[i] = malloc(i*sizeof(double));
00703               if(!distance[i])
00704               { Py_DECREF(row);
00705                 PyErr_Format(PyExc_MemoryError,
00706                              "failed to store row %d in the distance matrix.", i);
00707                 break;
00708               }
00709               for (j=0; j < i; j++, p++) distance[i][j] = *p;
00710               Py_DECREF(row);
00711             }
00712           }
00713         }
00714         else
00715         { /* Convert row */
00716           const double* p;
00717           PyArrayObject* a = (PyArrayObject*)PyArray_ContiguousFromObject(row, NPY_DOUBLE, 1, 1);
00718           if(!a)
00719           { PyErr_Format(PyExc_TypeError, "Failed to convert row %d in the distance matrix.", i);
00720             break;
00721           }
00722           if (PyArray_DIM(a, 0) != i)
00723           { PyErr_Format(PyExc_ValueError, "Row %d in the distance matrix has incorrect size (%" NPY_INTP_FMT ", should be %d).", i, PyArray_DIM(a, 0), i);
00724             Py_DECREF((PyObject*)a);
00725             break;
00726           }
00727           if (i > 0)
00728           { distance[i] = malloc(i*sizeof(double));
00729             if(!distance[i])
00730             { Py_DECREF((PyObject*)a);
00731               PyErr_Format(PyExc_MemoryError, "failed to store row %d in the distance matrix.", i);
00732               break;
00733             }
00734             p = PyArray_DATA(a);
00735             for (j=0; j < i; j++) distance[i][j] = p[j];
00736           }
00737           Py_DECREF((PyObject*)a);
00738         }
00739       }
00740       if (i < *n) /* break encountered */
00741       { free_distances(object, NULL, distance, i);
00742         *n = 0;
00743         return NULL;
00744       }
00745       return distance;
00746     }
00747   }
00748   else
00749   { /* User passed an array */
00750     *array = (PyArrayObject*) object;
00751     if (PyArray_TYPE(*array) == NPY_DOUBLE) Py_INCREF(object);
00752     else
00753     { *array = (PyArrayObject*) PyArray_Cast((*array), NPY_DOUBLE);
00754       if (!(*array))
00755       { PyErr_SetString(PyExc_ValueError, "distance cannot be cast to needed type.");
00756         *n = 0;
00757         return NULL;
00758       }
00759     }
00760   }
00761   if (PyArray_NDIM(*array) == 1)
00762   { const npy_intp stride = PyArray_STRIDE(*array, 0);
00763     const char* p = PyArray_BYTES(*array);
00764     const int m = (const int) PyArray_DIM(*array, 0);
00765     if (m != PyArray_DIM(*array, 0))
00766     { PyErr_SetString(PyExc_ValueError, "Array size of distance is too large");
00767       Py_DECREF((PyObject*) (*array));
00768       *array = NULL;
00769       *n = 0;
00770       return NULL;
00771     }
00772     *n = (int) ((1+sqrt(1+8*m))/2);
00773     if ((*n)*(*n)-(*n) != 2 * m)
00774     { PyErr_SetString(PyExc_ValueError,
00775        "Array size of distance is incompatible with a lower triangular matrix");
00776       Py_DECREF((PyObject*) (*array));
00777       *array = NULL;
00778       *n = 0;
00779       return NULL;
00780     }
00781     distance = malloc((*n)*sizeof(double*));
00782     distance[0] = NULL;
00783     if (stride==sizeof(double)) /* Data are contiguous */
00784       for (i=1; i < *n; p+=(i*stride), i++) distance[i] = (double*)p;
00785     else /* We need to create contiguous rows */
00786     { for (i=1; i < *n; i++)
00787       { distance[i] = malloc(i*sizeof(double));
00788         for (j=0; j < i; j++, p+=stride) distance[i][j] = *((double*)p);
00789       }
00790     }
00791   }
00792   else if (PyArray_NDIM(*array) == 2)
00793   { const char* p = PyArray_BYTES(*array);
00794     *n = (int) PyArray_DIM(*array, 0);
00795     if ((*n) != PyArray_DIM(*array, 0))
00796     { PyErr_SetString(PyExc_ValueError, "The distance matrix is too large");
00797       Py_DECREF((PyObject*) (*array));
00798       *array = NULL;
00799       *n = 0;
00800       return NULL;
00801     }
00802     if ((*n) != PyArray_DIM(*array, 1))
00803     { PyErr_SetString(PyExc_ValueError, "The distance matrix should be square");
00804       Py_DECREF((PyObject*) (*array));
00805       *array = NULL;
00806       *n = 0;
00807       return NULL;
00808     }
00809     distance = malloc((*n)*sizeof(double*));
00810     distance[0] = NULL;
00811     if (PyArray_STRIDE(*array, 1)==sizeof(double)) /* Each row is contiguous */
00812     { const npy_intp stride = PyArray_STRIDE(*array, 0);
00813       for (i=0; i < *n; i++, p+=stride) distance[i] = (double*)p;
00814     }
00815     else /* We need to create contiguous rows */
00816     { const npy_intp stride = PyArray_STRIDE(*array, 1);
00817       for (i=0; i < *n; i++)
00818       { distance[i] = malloc(i*sizeof(double));
00819         for (j=0; j < i; j++, p+=stride) distance[i][j] = *((double*)p);
00820       }
00821     }
00822   }
00823   else
00824   { PyErr_Format(PyExc_ValueError,
00825                  "distance has an incorrect rank (%d expected 1 or 2)",
00826                  PyArray_NDIM(*array));
00827     Py_DECREF((PyObject*) (*array));
00828     *array = NULL;
00829     *n = 0;
00830     return NULL;
00831   }
00832   return distance;
00833 }
00834 
00835 /* -- celldata -------------------------------------------------------------- */
00836 
00837 static double***
00838 create_celldata(int nxgrid, int nygrid, int ndata, PyArrayObject** array)
00839 { int i;
00840   npy_intp shape[3];
00841   double* p;
00842   double** pp;
00843   double*** ppp;
00844   shape[0] = (npy_intp) nxgrid;
00845   shape[1] = (npy_intp) nygrid;
00846   shape[2] = (npy_intp) ndata;
00847   if (shape[0]!=nxgrid || shape[1]!=nygrid || shape[2]!=ndata)
00848   { PyErr_SetString(PyExc_RuntimeError, "celldata array too large");
00849     return NULL;
00850   }
00851   *array = (PyArrayObject*) PyArray_SimpleNew(3, shape, NPY_DOUBLE);
00852   pp = malloc(nxgrid*nygrid*sizeof(double*));
00853   ppp = malloc(nxgrid*sizeof(double**));
00854   if (!(*array) || !pp || !ppp)
00855   { Py_XDECREF((PyObject*)(*array));
00856     *array = NULL;
00857     if(pp) free(pp);
00858     if(ppp) free(ppp);
00859     PyErr_SetString(PyExc_MemoryError, "Could not create celldata array -- too big?");
00860     return NULL;
00861   }
00862   p = PyArray_DATA(*array);
00863   for (i=0; i<nxgrid*nygrid; i++, p+=ndata) pp[i]=p;
00864   for (i=0; i<nxgrid; i++, pp+=nygrid) ppp[i]=pp;
00865   return ppp;
00866 }
00867 
00868 static void
00869 free_celldata(double*** celldata)
00870 { double** pp = celldata[0];
00871   free(pp);
00872   free(celldata);
00873 }
00874 
00875 /* -- index ----------------------------------------------------------------- */
00876 
00877 static int*
00878 parse_index(PyObject* object, PyArrayObject** array, int* n)
00879 { int* index;
00880   /* Check if the user specified a single item as an integer */
00881   if(!object || PyLong_Check(object))
00882   { *array = NULL;
00883     index = malloc(sizeof(int));
00884     if (!object) index[0] = 0;
00885 #if PY_MAJOR_VERSION >= 3
00886     else index[0] = (int) PyLong_AS_LONG(object);
00887 #else
00888     else index[0] = (int) PyInt_AS_LONG(object);
00889 #endif
00890     *n = 1;
00891     return index;
00892   }
00893   /* Check if the user specified an array */
00894   if(!PyArray_Check(object)) /* Try to convert to an array of type int */
00895   { *array = (PyArrayObject*)
00896       PyArray_ContiguousFromObject(object, NPY_INT, 1, 1);
00897     if (!(*array))
00898     { PyErr_SetString(PyExc_TypeError, "index argument cannot be converted to needed type.");
00899       *n = 0;
00900       return NULL;
00901     }
00902   }
00903   else
00904   { *array = (PyArrayObject*) object;
00905     /* -- Check if the array contains integers ------------------------ */
00906     if (PyArray_TYPE(*array) == NPY_INT) Py_INCREF(object);
00907     else
00908     { object = PyArray_Cast(*array, NPY_INT);
00909       if (!object)
00910       { PyErr_SetString(PyExc_ValueError,
00911                         "index argument cannot be cast to needed type.");
00912         *n = 0;
00913         return NULL;
00914       }
00915       *array = (PyArrayObject*) object;
00916     } 
00917   }
00918   /* We have an array */
00919   *n = (int) PyArray_DIM(*array, 0);
00920   if(PyArray_DIM(*array, 0) != *n)
00921   { PyErr_SetString(PyExc_ValueError, "data array is too large");
00922     Py_DECREF(object); /* can only happen if *array==(PyArrayObject*)object */
00923     *array = NULL;
00924     *n = 0;
00925     return NULL;
00926   }
00927   if(PyArray_NDIM(*array) != 1 && (PyArray_NDIM(*array) > 0 || *n != 1))
00928   { PyErr_Format(PyExc_ValueError,
00929                  "index argument has incorrect rank (%d expected 1)",
00930                  PyArray_NDIM(*array));
00931     Py_DECREF(object); /* can only happen if *array==(PyArrayObject*)object */
00932     *array = NULL;
00933     *n = 0;
00934     return NULL;
00935   }
00936   if (!PyArray_ISCONTIGUOUS(*array))
00937   { *array = (PyArrayObject*) PyArray_ContiguousFromObject(object, NPY_INT, 1, 1);
00938     Py_DECREF(object);
00939     if(!(*array))
00940     { PyErr_SetString(PyExc_ValueError,
00941                       "Failed making argument index contiguous.");
00942       *array = NULL;
00943       *n = 0;
00944       return NULL;
00945     }
00946   }
00947   index = PyArray_DATA(*array);
00948   return index;
00949 }
00950 
00951 static void
00952 free_index(PyArrayObject* array, int* index)
00953 { if (array) Py_DECREF((PyObject*) array);
00954   else free(index);
00955 }
00956 
00957 /* ========================================================================== */
00958 /* -- Classes --------------------------------------------------------------- */
00959 /* ========================================================================== */
00960 
00961 typedef struct {
00962     PyObject_HEAD
00963     Node node;
00964 } PyNode;
00965 
00966 static int
00967 PyNode_init(PyNode *self, PyObject *args, PyObject *kwds)
00968 {
00969     int left, right;
00970     double distance = 0.0;
00971     static char *kwlist[] = {"left", "right", "distance", NULL};
00972 
00973     if (!PyArg_ParseTupleAndKeywords(args, kwds, "ii|d", kwlist, 
00974                                       &left, &right, &distance))
00975         return -1; 
00976     self->node.left = left;
00977     self->node.right = right;
00978     self->node.distance = distance;
00979 
00980     return 0;
00981 }
00982 
00983 static PyObject*
00984 PyNode_repr(PyNode* self)
00985 { char string[64];
00986   sprintf(string, "(%d, %d): %g",
00987                   self->node.left, self->node.right, self->node.distance);
00988 #if PY_MAJOR_VERSION >= 3
00989   return PyUnicode_FromString(string);
00990 #else
00991   return PyString_FromString(string);
00992 #endif
00993 }
00994 
00995 static char PyNode_left__doc__[] =
00996 "integer representing the first member of this node";
00997 
00998 static PyObject*
00999 PyNode_getleft(PyNode* self, void* closure)
01000 { int left = self->node.left;
01001 #if PY_MAJOR_VERSION >= 3
01002     return PyLong_FromLong((long)left);
01003 #else
01004     return PyInt_FromLong((long)left);
01005 #endif
01006 }
01007 
01008 static int
01009 PyNode_setleft(PyNode* self, PyObject* value, void* closure)
01010 { long left = PyLong_AsLong(value);
01011   if (PyErr_Occurred()) return -1;
01012   self->node.left = (int) left;
01013   return 0;
01014 }
01015 
01016 static char PyNode_right__doc__[] =
01017 "integer representing the second member of this node";
01018 
01019 static PyObject*
01020 PyNode_getright(PyNode* self, void* closure)
01021 { int right = self->node.right;
01022 #if PY_MAJOR_VERSION >= 3
01023     return PyLong_FromLong((long)right);
01024 #else
01025     return PyInt_FromLong((long)right);
01026 #endif
01027 }
01028 
01029 static int
01030 PyNode_setright(PyNode* self, PyObject* value, void* closure)
01031 { long right = PyLong_AsLong(value);
01032   if (PyErr_Occurred()) return -1;
01033   self->node.right = (int) right;
01034   return 0;
01035 }
01036 
01037 static PyObject*
01038 PyNode_getdistance(PyNode* self, void* closure)
01039 { return PyFloat_FromDouble(self->node.distance);
01040 }
01041 
01042 static int
01043 PyNode_setdistance(PyNode* self, PyObject* value, void* closure)
01044 { const double distance = PyFloat_AsDouble(value);
01045   if (PyErr_Occurred()) return -1;
01046   self->node.distance = distance;
01047   return 0;
01048 }
01049 
01050 static char PyNode_distance__doc__[] =
01051 "the distance between the two members of this node\n";
01052 
01053 static PyGetSetDef PyNode_getset[] = {
01054     {"left", (getter)PyNode_getleft, (setter)PyNode_setleft, PyNode_left__doc__, NULL},
01055     {"right", (getter)PyNode_getright, (setter)PyNode_setright, PyNode_right__doc__, NULL},
01056     {"distance", (getter)PyNode_getdistance, (setter)PyNode_setdistance, PyNode_distance__doc__, NULL},
01057     {NULL}  /* Sentinel */
01058 };
01059 
01060 static char PyNode_doc[] =
01061 "A Node object describes a single node in a hierarchical clustering tree.\n"
01062 "The integer attributes 'left' and 'right' represent the two members that\n"
01063 "make up this node; the floating point attribute 'distance' contains the\n"
01064 "distance between the two members of this node.\n";
01065 
01066 static PyTypeObject PyNodeType = {
01067     PyVarObject_HEAD_INIT(NULL, 0)
01068     "cluster.Node",            /* tp_name */
01069     sizeof(PyNode),            /* tp_basicsize */
01070     0,                         /* tp_itemsize */
01071     0,                         /* tp_dealloc */
01072     0,                         /* tp_print */
01073     0,                         /* tp_getattr */
01074     0,                         /* tp_setattr */
01075     0,                         /* tp_compare */
01076     (reprfunc)PyNode_repr,     /* tp_repr */
01077     0,                         /* tp_as_number */
01078     0,                         /* tp_as_sequence */
01079     0,                         /* tp_as_mapping */
01080     0,                         /* tp_hash */
01081     0,                         /* tp_call */
01082     0,                         /* tp_str */
01083     0,                         /* tp_getattro */
01084     0,                         /* tp_setattro */
01085     0,                         /* tp_as_buffer */
01086     Py_TPFLAGS_DEFAULT,        /* tp_flags */
01087     PyNode_doc,                /* tp_doc */
01088     0,                       /* tp_traverse */
01089     0,                       /* tp_clear */
01090     0,                       /* tp_richcompare */
01091     0,                       /* tp_weaklistoffset */
01092     0,                       /* tp_iter */
01093     0,                       /* tp_iternext */
01094     0,                         /* tp_methods */
01095     0,                         /* tp_members */
01096     PyNode_getset,             /* tp_getset */
01097     0,                         /* tp_base */
01098     0,                         /* tp_dict */
01099     0,                         /* tp_descr_get */
01100     0,                         /* tp_descr_set */
01101     0,                         /* tp_dictoffset */
01102     (initproc)PyNode_init,     /* tp_init */
01103 };
01104 
01105 typedef struct {
01106     PyObject_HEAD
01107     Node* nodes;
01108     int n;
01109 } PyTree;
01110 
01111 static void
01112 PyTree_dealloc(PyTree* self)
01113 { if (self->n) free(self->nodes);
01114   Py_TYPE(self)->tp_free((PyObject*)self);
01115 }
01116 
01117 static int
01118 PyTree_init(PyTree* self, PyObject* args, PyObject* kwds)
01119 { int i;
01120   int n;
01121   Node* nodes;
01122   PyObject* arg;
01123   int* flag;
01124 
01125   if (!PyArg_ParseTuple(args, "O", &arg)) return -1;
01126 
01127   if (!PyList_Check(arg))
01128   { PyErr_SetString(PyExc_TypeError, "Argument should be a list of Node objects");
01129     return -1;
01130   }
01131 
01132   n = PyList_GET_SIZE(arg);
01133   if (n < 1)
01134   { PyErr_SetString(PyExc_ValueError, "List is empty");
01135     return -1;
01136   }
01137   nodes = malloc(n*sizeof(Node));
01138   for (i = 0; i < n; i++)
01139   { PyNode* p;
01140     PyObject* row = PyList_GET_ITEM(arg, i);
01141     if (row->ob_type != &PyNodeType)
01142     { free(nodes);
01143       PyErr_Format(PyExc_TypeError, "Row %d in list is not a Node object", i);
01144       return -1;
01145     }
01146     p = (PyNode*)row;
01147     nodes[i] = p->node;
01148   }
01149   /* --- Check if this is a bona fide tree ------------------------------- */
01150   flag = malloc((2*n+1)*sizeof(int));
01151   if(flag) /* Otherwise, we're in enough trouble already */
01152   { int j;
01153     for (i = 0; i < 2*n+1; i++) flag[i] = 0; 
01154     for (i = 0; i < n; i++)
01155     { j = nodes[i].left;
01156       if (j < 0)
01157       { j = -j-1;
01158         if (j>=i) break;
01159       }
01160       else j+=n;
01161       if (flag[j]) break;
01162       flag[j] = 1;
01163       j = nodes[i].right;
01164       if (j < 0)
01165       { j = -j-1;
01166         if (j>=i) break;
01167       }
01168       else j+=n;
01169       if (flag[j]) break;
01170       flag[j] = 1;
01171     }
01172     free(flag);
01173   }
01174   if (!flag || i < n) /* break encountered */
01175   { free(nodes);
01176     PyErr_SetString(PyExc_ValueError, "Inconsistent tree");
01177     return -1;
01178   }
01179   /* --------------------------------------------------------------------- */
01180   self->n = n;
01181   self->nodes = nodes;
01182   return 0;
01183 }
01184 
01185 static PyObject*
01186 PyTree_str(PyTree* self)
01187 { int i;
01188   const int n = self->n;
01189   char string[128];
01190   Node node;
01191   PyObject* line;
01192   PyObject* output;
01193 #if PY_MAJOR_VERSION >= 3
01194   PyObject* temp;
01195   output = PyUnicode_FromString("");
01196 #else
01197   output = PyString_FromString("");
01198 #endif
01199   for (i = 0; i < n; i++)
01200   { node = self->nodes[i];
01201     sprintf(string, "(%d, %d): %g", node.left, node.right, node.distance);
01202     if (i < n-1) strcat(string, "\n");
01203 #if PY_MAJOR_VERSION >= 3
01204     line = PyUnicode_FromString(string);
01205 #else
01206     line = PyString_FromString(string);
01207 #endif
01208     if(!line)
01209     { Py_DECREF(output);
01210       return NULL;
01211     }
01212 #if PY_MAJOR_VERSION >= 3
01213     temp = PyUnicode_Concat(output, line);
01214     if (!temp)
01215     { Py_DECREF(output);
01216       Py_DECREF(line);
01217       return NULL;
01218     }
01219     output = temp;
01220 #else
01221     PyString_ConcatAndDel(&output, line);
01222     if(!output)
01223     {   Py_DECREF(line);
01224         return NULL;
01225     }
01226 #endif
01227   }
01228   return output;
01229 }
01230 
01231 static int
01232 PyTree_length(PyTree *self)
01233 {
01234   return self->n;
01235 }
01236 
01237 static PyObject*
01238 PyTree_item(PyTree* self, int i)
01239 { PyNode* result;
01240   if (i < 0 || i >= self->n)
01241   { PyErr_SetString(PyExc_IndexError, "tree index out of range");
01242     return NULL;
01243   }
01244   result = (PyNode*) PyNodeType.tp_alloc(&PyNodeType, 0);
01245   if(!result)
01246   { PyErr_SetString(PyExc_MemoryError,
01247       "could not create node for return value");
01248     return NULL;
01249   }
01250   result->node = self->nodes[i];
01251   return (PyObject*) result;
01252 }
01253 
01254 static PyObject*
01255 PyTree_slice(PyTree* self, int i, int j)
01256 { int row;
01257   const int n = self->n;
01258   PyObject* item;
01259   PyObject* result;
01260   if (i < 0) i = 0;
01261   if (j < 0) j = 0; /* Avoid signed/unsigned bug in next line */
01262   if (j > n) j = n;
01263   if (j < i) j = i;
01264   result = PyList_New(j-i);
01265   if(!result)
01266   { PyErr_SetString(PyExc_MemoryError,
01267       "could not create list for return value");
01268     return NULL;
01269   }
01270   for (row = 0; i < j; i++, row++)
01271   { item = PyTree_item(self, i);
01272     if(!item)
01273     { Py_DECREF(result);
01274       PyErr_SetString(PyExc_MemoryError,
01275         "could not create node for return value");
01276       return NULL;
01277     }
01278     PyList_SET_ITEM(result, row, item);
01279   }
01280   return result;
01281 }
01282 
01283 #if (PY_MAJOR_VERSION <= 2) & (PY_MINOR_VERSION <= 4)
01284 #define lenfunc inquiry
01285 #define ssizeargfunc intargfunc
01286 #define ssizessizeargfunc intintargfunc
01287 #endif
01288 
01289 static PySequenceMethods PyTree_sequence = {
01290         (lenfunc)PyTree_length, /*sq_length*/
01291         NULL,                 /*sq_concat*/
01292         NULL,                 /*sq_repeat*/
01293         (ssizeargfunc)PyTree_item, /*sq_item*/
01294         (ssizessizeargfunc)PyTree_slice, /*sq_slice*/
01295         NULL,                 /*sq_ass_item*/
01296         NULL,                 /*sq_ass_slice*/
01297         NULL                  /*sq_contains*/
01298 };
01299 
01300 static char PyTree_scale__doc__[] =
01301 "mytree.scale()\n"
01302 "This method scales the node distances in the tree such that they are\n"
01303 "all between one and zero.\n";
01304 
01305 static PyObject*
01306 PyTree_scale(PyTree* self)
01307 { int i;
01308   const int n = self->n;
01309   Node* nodes = self->nodes;
01310   double maximum = DBL_MIN;
01311   /* --------------------------------------------------------------------- */
01312   for (i = 0; i < n; i++)
01313   { double distance = nodes[i].distance;
01314     if (distance > maximum) maximum = distance;
01315   }
01316   if (maximum!=0.0)
01317     for (i = 0; i < n; i++) nodes[i].distance /= maximum;
01318   /* --------------------------------------------------------------------- */
01319   Py_INCREF(Py_None);
01320   return Py_None;
01321 }
01322 
01323 static char PyTree_cut__doc__[] =
01324 "clusterid = mytree.cut(nclusters=2)\n"
01325 "Given a hierarchical clustering result mytree, cut() divides the\n"
01326 "elements in the tree into clusters. The number of clusters is given\n"
01327 "by nclusters.\n";
01328 
01329 static PyObject*
01330 PyTree_cut(PyTree* self, PyObject* args)
01331 { int nclusters = 2;
01332   npy_intp n = (npy_intp) (self->n + 1);
01333   PyArrayObject* aCLUSTERID = (PyArrayObject*) NULL;
01334   int* clusterid = NULL;
01335   /* -- Check to make sure the tree isn't too large ---------------------- */
01336   if (n != (int)n)
01337   { PyErr_SetString(PyExc_RuntimeError, "cut: tree is too large");
01338     return NULL;
01339   }
01340   /* -- Read the input variables ----------------------------------------- */
01341   if(!PyArg_ParseTuple(args, "|i", &nclusters)) return NULL;
01342   /* -- Check the nclusters variable ------------------------------------- */
01343   if (nclusters < 1)
01344   { PyErr_SetString(PyExc_ValueError,
01345       "cut: Requested number of clusters should be positive");
01346     return NULL;
01347   }
01348   if (nclusters > n)
01349   { PyErr_SetString(PyExc_ValueError,
01350       "cut: More clusters requested than items available");
01351     return NULL;
01352   }
01353   /* -- Create the clusterid output variable ----------------------------- */
01354   aCLUSTERID = (PyArrayObject*) PyArray_SimpleNew(1, &n, NPY_INT);
01355   if (!aCLUSTERID)
01356   { PyErr_SetString(PyExc_MemoryError,
01357       "cut: Could not create array for return value");
01358     return NULL;
01359   }
01360   clusterid = PyArray_DATA(aCLUSTERID);
01361   /* --------------------------------------------------------------------- */
01362   cuttree((int) n, self->nodes, nclusters, clusterid);
01363   /* -- Check for errors flagged by the C routine ------------------------ */
01364   if (clusterid[0]==-1)
01365   { PyErr_SetString(PyExc_MemoryError, "cut: Error in the cuttree routine");
01366     Py_DECREF((PyObject*) aCLUSTERID);
01367     return NULL;
01368   }
01369   /* --------------------------------------------------------------------- */
01370   return PyArray_Return(aCLUSTERID);
01371 }
01372 
01373 static PyMethodDef PyTree_methods[] = {
01374     {"scale", (PyCFunction)PyTree_scale, METH_NOARGS, PyTree_scale__doc__},
01375     {"cut", (PyCFunction)PyTree_cut, METH_VARARGS, PyTree_cut__doc__},
01376     {NULL}  /* Sentinel */
01377 };
01378 
01379 static char PyTree_doc[] =
01380 "Tree objects store a hierarchical clustering solution.\n"
01381 "Individual nodes in the tree can be accessed with tree[i], where i is\n"
01382 "an integer. Whereas the tree itself is a read-only object, tree[:]\n"
01383 "returns a list of all the nodes, which can then be modified. To create\n"
01384 "a new Tree from this list, use Tree(list).\n"
01385 "See the description of the Node class for more information.";
01386 
01387 static PyTypeObject PyTreeType = {
01388     PyVarObject_HEAD_INIT(NULL, 0)
01389     "cluster.Tree",              /*tp_name*/
01390     sizeof(PyTree),              /*tp_basicsize*/
01391     0,                           /*tp_itemsize*/
01392     (destructor)PyTree_dealloc,  /*tp_dealloc*/
01393     0,                           /*tp_print*/
01394     0,                           /*tp_getattr*/
01395     0,                           /*tp_setattr*/
01396     0,                           /*tp_compare*/
01397     0,                           /*tp_repr*/
01398     0,                           /*tp_as_number*/
01399     &PyTree_sequence,            /*tp_as_sequence*/
01400     0,                           /*tp_as_mapping*/
01401     0,                           /*tp_hash */
01402     0,                           /*tp_call*/
01403     (reprfunc)PyTree_str,        /*tp_str*/
01404     0,                           /*tp_getattro*/
01405     0,                           /*tp_setattro*/
01406     0,                           /*tp_as_buffer*/
01407     Py_TPFLAGS_DEFAULT,          /*tp_flags*/
01408     PyTree_doc,                  /* tp_doc */
01409     0,                         /* tp_traverse */
01410     0,                         /* tp_clear */
01411     0,                         /* tp_richcompare */
01412     0,                         /* tp_weaklistoffset */
01413     0,                         /* tp_iter */
01414     0,                         /* tp_iternext */
01415     PyTree_methods,              /* tp_methods */
01416     NULL,                        /* tp_members */
01417     0,                           /* tp_getset */
01418     0,                           /* tp_base */
01419     0,                           /* tp_dict */
01420     0,                           /* tp_descr_get */
01421     0,                           /* tp_descr_set */
01422     0,                           /* tp_dictoffset */
01423     (initproc)PyTree_init,       /* tp_init */
01424 };
01425 
01426 /* ========================================================================== */
01427 /* -- Methods --------------------------------------------------------------- */
01428 /* ========================================================================== */
01429 
01430 /* version */
01431 static char version__doc__[] =
01432 "version()\n"
01433 "\n"
01434 "This function returns the version number of the C Clustering Library\n"
01435 "as a string.\n";
01436 
01437 static PyObject*
01438 py_version(PyObject* self)
01439 {
01440 #if PY_MAJOR_VERSION >= 3
01441   return PyUnicode_FromString( CLUSTERVERSION );
01442 #else
01443   return PyString_FromString( CLUSTERVERSION );
01444 #endif
01445 } 
01446 
01447 /* kcluster */
01448 static char kcluster__doc__[] =
01449 "kcluster(data, nclusters=2, mask=None, weight=None,\n"
01450 "         transpose=0, npass=1, method='a', dist='e',\n"
01451 "         initialid=None) -> clusterid, error, nfound\n"
01452 "\n"
01453 "This function implements k-means clustering.\n"
01454 "data     : nrows x ncolumns array containing the expression data\n"
01455 "nclusters: number of clusters (the 'k' in k-means)\n"
01456 "mask     : nrows x ncolumns array of integers, showing which data are\n"
01457 "           missing. If mask[i][j]==0, then data[i][j] is missing.\n"
01458 "weight   : the weights to be used when calculating distances\n"
01459 "transpose: if equal to 0, genes (rows) are clustered;\n"
01460 "           if equal to 1, microarrays (columns) are clustered.\n"
01461 "npass    : number of times the k-means clustering algorithm is\n"
01462 "           performed, each time with a different (random) initial\n"
01463 "           condition.\n"
01464 "method   : specifies how the center of a cluster is found:\n"
01465 "           method=='a': arithmetic mean\n"
01466 "           method=='m': median\n"
01467 "dist     : specifies the distance function to be used:\n"
01468 "           dist=='e': Euclidean distance\n"
01469 "           dist=='b': City Block distance\n"
01470 "           dist=='c': Pearson correlation\n"
01471 "           dist=='a': absolute value of the correlation\n"
01472 "           dist=='u': uncentered correlation\n"
01473 "           dist=='x': absolute uncentered correlation\n"
01474 "           dist=='s': Spearman's rank correlation\n"
01475 "           dist=='k': Kendall's tau\n"
01476 "initialid: the initial clustering from which the algorithm should start.\n"
01477 "           If initialid is None, the routine carries out npass\n"
01478 "           repetitions of the EM algorithm, each time starting from a\n"
01479 "           different random initial clustering. If initialid is given,\n"
01480 "           the routine carries out the EM algorithm only once, starting\n"
01481 "           from the given initial clustering and without randomizing the\n"
01482 "           order in which items are assigned to clusters (i.e., using\n"
01483 "           the same order as in the data matrix). In that case, the\n"
01484 "           k-means algorithm is fully deterministic.\n"
01485 "\n"
01486 "Return values:\n"
01487 "clusterid: array containing the number of the cluster to which each\n"
01488 "           gene/microarray was assigned in the best k-means clustering\n"
01489 "           solution that was found in the npass runs;\n"
01490 "error:     the within-cluster sum of distances for the returned k-means\n"
01491 "           clustering solution;\n"
01492 "nfound:    the number of times this solution was found.\n";
01493 
01494 static PyObject*
01495 py_kcluster(PyObject* self, PyObject* args, PyObject* keywords)
01496 { int NCLUSTERS = 2;
01497   int nrows, ncolumns;
01498   int nitems;
01499   int ndata;
01500   PyObject* DATA = NULL;
01501   PyArrayObject* aDATA = NULL;
01502   double** data = NULL;
01503   PyObject* MASK = NULL;
01504   PyArrayObject* aMASK = NULL;
01505   int** mask = NULL;
01506   PyObject* WEIGHT = NULL;
01507   PyArrayObject* aWEIGHT = NULL;
01508   double* weight = NULL;
01509   int TRANSPOSE = 0;
01510   int NPASS = 1;
01511   char METHOD = 'a';
01512   char DIST = 'e';
01513   PyObject* INITIALID = NULL;
01514   PyArrayObject* aCLUSTERID = NULL;
01515   double ERROR;
01516   int IFOUND;
01517 
01518   /* -- Read the input variables ----------------------------------------- */
01519   static char* kwlist[] = { "data",
01520                             "nclusters",
01521                             "mask",
01522                             "weight",
01523                             "transpose",
01524                             "npass",
01525                             "method",
01526                             "dist",
01527                             "initialid",
01528                              NULL };
01529   if(!PyArg_ParseTupleAndKeywords(args, keywords, "O|iOOiiO&O&O", kwlist,
01530                                   &DATA,
01531                                   &NCLUSTERS,
01532                                   &MASK,
01533                                   &WEIGHT,
01534                                   &TRANSPOSE,
01535                                   &NPASS,
01536                                   method_kcluster_converter, &METHOD,
01537                                   distance_converter, &DIST,
01538                                   &INITIALID)) return NULL;
01539   /* -- Reset None variables to NULL ------------------------------------- */
01540   if(MASK==Py_None) MASK = NULL;
01541   if(WEIGHT==Py_None) WEIGHT = NULL;
01542   if(INITIALID==Py_None) INITIALID = NULL;
01543   /* -- Check the transpose variable ------------------------------------- */
01544   if (TRANSPOSE) TRANSPOSE = 1;
01545   /* -- Check the npass variable ----------------------------------------- */
01546   if (INITIALID) NPASS = 0;
01547   else if (NPASS <= 0)
01548   { PyErr_SetString(PyExc_ValueError, "npass should be a positive integer");
01549     return NULL;
01550   }
01551   /* -- Check the data input array --------------------------------------- */
01552   data = parse_data(DATA, &aDATA);
01553   if (!data) return NULL;
01554   nrows = (int) PyArray_DIM(aDATA, 0);
01555   ncolumns = (int) PyArray_DIM(aDATA, 1);
01556   if (nrows!=PyArray_DIM(aDATA, 0) || ncolumns!=PyArray_DIM(aDATA, 1))
01557   { PyErr_Format(PyExc_ValueError,
01558             "received too many data (%" NPY_INTP_FMT " x %" NPY_INTP_FMT
01559             "data matrix received)",
01560             PyArray_DIM(aDATA, 0), PyArray_DIM(aDATA, 1));
01561     free_data(aDATA, data);
01562     return NULL;
01563   }
01564   /* -- Check the mask input --------------------------------------------- */
01565   mask = parse_mask(MASK, &aMASK, PyArray_DIMS(aDATA));
01566   if (!mask)
01567   { free_data(aDATA, data);
01568     return NULL;
01569   }
01570   /* -- Create the clusterid output variable ----------------------------- */
01571   ndata = TRANSPOSE ? nrows : ncolumns;
01572   nitems = TRANSPOSE ? ncolumns : nrows;
01573   aCLUSTERID = parse_initialid(INITIALID, &NCLUSTERS, (npy_intp) nitems);
01574   if (!aCLUSTERID)
01575   { free_data(aDATA, data);
01576     free_mask(aMASK, mask, nrows);
01577     return NULL;
01578   }
01579   /* -- Check the number of clusters ------------------------------------- */
01580   if (NCLUSTERS < 1)
01581   { PyErr_SetString(PyExc_ValueError, "nclusters should be positive");
01582     free_data(aDATA, data);
01583     free_mask(aMASK, mask, nrows);
01584     Py_DECREF((PyObject*) aCLUSTERID);
01585     return NULL;
01586   }
01587   if (nitems < NCLUSTERS)
01588   { PyErr_SetString(PyExc_ValueError, "More clusters than items to be clustered");
01589     free_data(aDATA, data);
01590     free_mask(aMASK, mask, nrows);
01591     Py_DECREF((PyObject*) aCLUSTERID);
01592     return NULL;
01593   }
01594   /* -- Check the weight input ------------------------------------------- */
01595   weight = parse_weight(WEIGHT, &aWEIGHT, ndata);
01596   if (!weight)
01597   { free_data(aDATA, data);
01598     free_mask(aMASK, mask, nrows);
01599     Py_DECREF((PyObject*) aCLUSTERID);
01600     return NULL;
01601   }
01602   /* --------------------------------------------------------------------- */
01603   kcluster(NCLUSTERS, 
01604       nrows, 
01605       ncolumns, 
01606       data, 
01607       mask, 
01608       weight,
01609       TRANSPOSE, 
01610       NPASS, 
01611       METHOD, 
01612       DIST, 
01613       PyArray_DATA(aCLUSTERID), 
01614       &ERROR, 
01615       &IFOUND);
01616   /* --------------------------------------------------------------------- */
01617   free_data(aDATA, data);
01618   free_mask(aMASK, mask, nrows);
01619   free_weight(aWEIGHT, weight);
01620   /* --------------------------------------------------------------------- */
01621 
01622   return Py_BuildValue("Ndi", aCLUSTERID, ERROR, IFOUND);
01623 } 
01624 /* end of wrapper for kcluster */
01625 
01626 /* kmedoids */
01627 static char kmedoids__doc__[] =
01628 "kmedoids(distance, nclusters=2, npass=1,\n"
01629 "         initialid=None) -> clusterid, error, nfound.\n"
01630 "\n"
01631 "This function implements k-medoids clustering.\n"
01632 "distance:  The distance matrix between the elements. There are three\n"
01633 "           ways in which you can pass a distance matrix:\n"
01634 "           #1: a 2D Numerical Python array (in which only the left-lower\n"
01635 "               part of the array will be accessed);\n"
01636 "           #2: a 1D Numerical Python array containing the distances\n"
01637 "               consecutively;\n" 
01638 "           #3: a list of rows containing the lower-triangular part of\n"
01639 "               the distance matrix.\n"
01640 "           Examples are:\n"
01641 "           >>> distance = array([[0.0, 1.1, 2.3],\n"
01642 "                                 [1.1, 0.0, 4.5],\n"
01643 "                                 [2.3, 4.5, 0.0]])\n"
01644 "           (option #1)\n"
01645 "           >>> distance = array([1.1, 2.3, 4.5])\n"
01646 "           (option #2)\n"
01647 "           >>> distance = [array([]),\n"
01648 "                           array([1.1]),\n"
01649 "                           array([2.3, 4.5])\n"
01650 "                          ]\n"
01651 "           (option #3)\n"
01652 "           These three correspond to the same distance matrix.\n"
01653 "nclusters: number of clusters (the 'k' in k-medoids)\n"
01654 "npass    : the number of times the k-medoids clustering algorithm is\n"
01655 "           performed, each time with a different (random) initial\n"
01656 "           condition.\n"
01657 "initialid: the initial clustering from which the algorithm should start.\n"
01658 "           If initialid is not given, the routine carries out npass\n"
01659 "           repetitions of the EM algorithm, each time starting from a\n"
01660 "           different random initial clustering. If initialid is given,\n"
01661 "           the routine carries out the EM algorithm only once, starting\n"
01662 "           from the initial clustering specified by initialid and\n"
01663 "           without randomizing the order in which items are assigned to\n"
01664 "           clusters (i.e., using the same order as in the data matrix).\n"
01665 "           In that case, the k-means algorithm is fully deterministic.\n"
01666 "\n"
01667 "Return values:\n"
01668 "clusterid: array containing the number of the cluster to which each\n"
01669 "           gene/microarray was assigned in the best k-means clustering\n"
01670 "           solution that was found in the npass runs;\n"
01671 "error:     the within-cluster sum of distances for the returned k-means\n"
01672 "           clustering solution;\n"
01673 "nfound:    the number of times this solution was found.\n";
01674 
01675 static PyObject*
01676 py_kmedoids(PyObject* self, PyObject* args, PyObject* keywords)
01677 { int NCLUSTERS = 2;
01678   int nitems;
01679   PyObject* DISTANCES = NULL;
01680   PyArrayObject* aDISTANCES = NULL;
01681   double** distances = NULL;
01682   PyObject* INITIALID = NULL;
01683   PyArrayObject* aCLUSTERID = NULL;
01684   int NPASS = 1;
01685   double ERROR;
01686   int IFOUND;
01687 
01688   /* -- Read the input variables ----------------------------------------- */
01689   static char* kwlist[] = { "distance",
01690                             "nclusters",
01691                             "npass",
01692                             "initialid",
01693                              NULL };
01694   if(!PyArg_ParseTupleAndKeywords(args, keywords, "O|iiO", kwlist,
01695                                   &DISTANCES,
01696                                   &NCLUSTERS,
01697                                   &NPASS,
01698                                   &INITIALID)) return NULL;
01699   /* -- Reset None variables to NULL ------------------------------------- */
01700   if (INITIALID==Py_None) INITIALID = NULL;
01701   /* -- Check the npass variable ----------------------------------------- */
01702   if (INITIALID) NPASS = 0;
01703   else if (NPASS < 0)
01704   { PyErr_SetString(PyExc_ValueError, "npass should be a positive integer");
01705     return NULL;
01706   }
01707   /* -- Check the distance matrix ---------------------------------------- */
01708   distances = parse_distance(DISTANCES, &aDISTANCES, &nitems);
01709   if (!distances) return NULL;
01710   /* -- Create the clusterid output variable ----------------------------- */
01711   aCLUSTERID = parse_initialid(INITIALID, &NCLUSTERS, (npy_intp) nitems);
01712   if (!aCLUSTERID)
01713   { free_distances(DISTANCES, aDISTANCES, distances, nitems);
01714     return NULL;
01715   }
01716   /* -- Check the nclusters variable ------------------------------------- */
01717   if (NCLUSTERS <= 0)
01718   { PyErr_SetString(PyExc_ValueError, "nclusters should be a positive integer");
01719     free_distances(DISTANCES, aDISTANCES, distances, nitems);
01720     Py_DECREF((PyObject*) aCLUSTERID);
01721     return NULL;
01722   }
01723   if (nitems < NCLUSTERS)
01724   { PyErr_SetString(PyExc_ValueError,
01725                     "More clusters requested than items to be clustered");
01726     free_distances(DISTANCES, aDISTANCES, distances, nitems);
01727     Py_DECREF((PyObject*) aCLUSTERID);
01728     return NULL;
01729   }
01730   /* --------------------------------------------------------------------- */
01731   kmedoids(NCLUSTERS, 
01732       nitems, 
01733       distances, 
01734       NPASS, 
01735       PyArray_DATA(aCLUSTERID), 
01736       &ERROR, 
01737       &IFOUND);
01738   /* --------------------------------------------------------------------- */
01739   free_distances(DISTANCES, aDISTANCES, distances, nitems);
01740   /* --------------------------------------------------------------------- */
01741   if(IFOUND==0) /* should not occur */
01742   { Py_DECREF((PyObject*) aCLUSTERID);
01743     PyErr_SetString(PyExc_RuntimeError, "Error in kmedoids input arguments");
01744     return NULL;
01745   }
01746   if(IFOUND==-1)
01747   { Py_DECREF((PyObject*) aCLUSTERID);
01748     PyErr_SetString(PyExc_MemoryError, "Memory allocation error in kmedoids");
01749     return NULL;
01750   }
01751   return Py_BuildValue("Ndi",aCLUSTERID, ERROR, IFOUND);
01752 } 
01753 /* end of wrapper for kmedoids */
01754 
01755 /* treecluster */
01756 static char treecluster__doc__[] =
01757 "treecluster(data=None, mask=None, weight=None, transpose=0, dist='e',\n"
01758 "            method='m', distancematrix=None) -> Tree object\n"
01759 "\n"
01760 "This function implements the pairwise single, complete, centroid, and\n"
01761 "average linkage hierarchical clustering methods.\n"
01762 "data     : nrows x ncolumns array containing the gene expression data.\n"
01763 "mask     : nrows x ncolumns array of integers, showing which data are\n"
01764 "           missing. If mask[i][j]==0, then data[i][j] is missing.\n"
01765 "weight   : the weights to be used when calculating distances.\n"
01766 "transpose: if equal to 0, genes (rows) are clustered;\n"
01767 "           if equal to 1, microarrays (columns) are clustered.\n"
01768 "dist     : specifies the distance function to be used:\n"
01769 "           dist=='e': Euclidean distance\n"
01770 "           dist=='b': City Block distance\n"
01771 "           dist=='c': Pearson correlation\n"
01772 "           dist=='a': absolute value of the correlation\n"
01773 "           dist=='u': uncentered correlation\n"
01774 "           dist=='x': absolute uncentered correlation\n"
01775 "           dist=='s': Spearman's rank correlation\n"
01776 "           dist=='k': Kendall's tau\n"
01777 "method   : specifies which linkage method is used:\n"
01778 "           method=='s': Single pairwise linkage\n"
01779 "           method=='m': Complete (maximum) pairwise linkage (default)\n"
01780 "           method=='c': Centroid linkage\n"
01781 "           method=='a': Average pairwise linkage\n"
01782 "distancematrix:  The distance matrix between the elements. There are\n"
01783 "           three ways in which you can pass a distance matrix:\n"
01784 "           #1: a 2D Numerical Python array (in which only the left-lower\n"
01785 "               part of the array will be accessed);\n"
01786 "           #2: a 1D Numerical Python array containing the distances\n"
01787 "               consecutively;\n" 
01788 "           #3: a list of rows containing the lower-triangular part of\n"
01789 "               the distance matrix.\n"
01790 "           Examples are:\n"
01791 "           >>> distance = array([[0.0, 1.1, 2.3],\n"
01792 "                                 [1.1, 0.0, 4.5],\n"
01793 "                                 [2.3, 4.5, 0.0]])\n"
01794 "           (option #1)\n"
01795 "           >>> distance = array([1.1, 2.3, 4.5])\n"
01796 "           (option #2)\n"
01797 "           >>> distance = [array([]),\n"
01798 "                           array([1.1]),\n"
01799 "                           array([2.3, 4.5])\n"
01800 "                          ]\n"
01801 "           (option #3)\n"
01802 "           These three correspond to the same distance matrix.\n"
01803 "           PLEASE NOTE:\n"
01804 "           As the treecluster routine may shuffle the values in the\n"
01805 "           distance matrix as part of the clustering algorithm, be sure\n"
01806 "           to save this array in a different variable before calling\n"
01807 "           treecluster if you need it later.\n"
01808 "\n"
01809 "Either data or distancematrix should be None. If distancematrix==None,\n"
01810 "the hierarchical clustering solution is calculated from the gene\n"
01811 "expression data stored in the argument data. If data==None, the\n"
01812 "hierarchical clustering solution is calculated from the distance matrix\n"
01813 "instead. Pairwise centroid-linkage clustering can be calculated only\n"
01814 "from the gene expression data and not from the distance matrix. Pairwise\n"
01815 "single-, maximum-, and average-linkage clustering can be calculated from\n"
01816 "either the gene expression data or from the distance matrix.\n"
01817 "\n"
01818 "Return value:\n"
01819 "treecluster returns a Tree object describing the hierarchical clustering\n"
01820 "result. See the description of the Tree class for more information.\n";
01821 
01822 static PyObject*
01823 py_treecluster(PyObject* self, PyObject* args, PyObject* keywords)
01824 { PyObject *DATA = NULL;
01825   PyObject *MASK = NULL;
01826   PyObject *WEIGHT = NULL;
01827   int TRANSPOSE = 0;
01828   char DIST = 'e';
01829   char METHOD = 'm';
01830   PyObject *DISTANCEMATRIX = NULL;
01831   PyTree* tree;
01832   Node* nodes;
01833   int nitems;
01834 
01835   /* -- Read the input variables ----------------------------------------- */
01836   static char* kwlist[] = { "data",
01837                             "mask",
01838                             "weight",
01839                             "transpose",
01840                             "method",
01841                             "dist",
01842                             "distancematrix",
01843                              NULL };
01844   if(!PyArg_ParseTupleAndKeywords(args, keywords, "|OOOiO&O&O", kwlist,
01845                                   &DATA,
01846                                   &MASK,
01847                                   &WEIGHT,
01848                                   &TRANSPOSE,
01849                                   method_treecluster_converter, &METHOD,
01850                                   distance_converter, &DIST,
01851                                   &DISTANCEMATRIX)) return NULL;
01852   /* -- Reset None variables to NULL ------------------------------------- */
01853   if(DATA==Py_None) DATA = NULL;
01854   if(MASK==Py_None) MASK = NULL;
01855   if(WEIGHT==Py_None) WEIGHT = NULL;
01856   if(DISTANCEMATRIX==Py_None) DISTANCEMATRIX = NULL;
01857 
01858   /* -- Check if we are using the data matrix or the distance matrix ----- */
01859   if (DATA!=NULL && DISTANCEMATRIX!=NULL)
01860   { PyErr_SetString(PyExc_ValueError,
01861                     "Use either data or distancematrix, do not use both");
01862     return NULL;
01863   }
01864   if (DATA==NULL && DISTANCEMATRIX==NULL)
01865   { PyErr_SetString(PyExc_ValueError,
01866                     "Neither data nor distancematrix was given");
01867     return NULL;
01868   }
01869 
01870   if (DISTANCEMATRIX==NULL) /* DATA contains gene expression data */
01871   { int nrows;
01872     int ncolumns;
01873     int ndata;
01874     PyArrayObject* aDATA = NULL;
01875     PyArrayObject* aMASK = NULL;
01876     PyArrayObject* aWEIGHT = NULL;
01877     double** data = NULL;
01878     int** mask = NULL;
01879     double* weight = NULL;
01880 
01881     /* -- Check the data input array --------------------------------------- */
01882     data = parse_data(DATA, &aDATA);
01883     if (!data) return NULL;
01884     nrows = (int) PyArray_DIM(aDATA, 0);
01885     ncolumns = (int) PyArray_DIM(aDATA, 1);
01886     ndata = TRANSPOSE ? nrows : ncolumns;
01887     nitems = TRANSPOSE ? ncolumns : nrows;
01888     if (nrows!=PyArray_DIM(aDATA, 0) || ncolumns!=PyArray_DIM(aDATA, 1))
01889     { free_data(aDATA, data);
01890       PyErr_SetString(PyExc_ValueError, "data array is too large");
01891       return NULL;
01892     }
01893     /* -- Check the mask input --------------------------------------------- */
01894     mask = parse_mask(MASK, &aMASK, PyArray_DIMS(aDATA));
01895     if (!mask)
01896     { free_data(aDATA, data);
01897       return NULL;
01898     }
01899     /* -- Check the weight input ------------------------------------------- */
01900     weight = parse_weight(WEIGHT, &aWEIGHT, ndata);
01901     if (!weight)
01902     { free_data(aDATA, data);
01903       free_mask(aMASK, mask, nrows);
01904       return NULL;
01905     }
01906     /* -- Call treecluster to perform hierarchical clustering -------------- */
01907     nodes = treecluster(nrows,
01908                         ncolumns,
01909                         data,
01910                         mask,
01911                         weight,
01912                         TRANSPOSE,
01913                         DIST,
01914                         METHOD,
01915                         NULL);
01916     /* --------------------------------------------------------------------- */
01917     free_data(aDATA, data);
01918     free_mask(aMASK, mask, nrows);
01919     free_weight(aWEIGHT, weight);
01920   }
01921   else
01922   { double** distances = NULL;
01923     PyArrayObject* aDISTANCEMATRIX = NULL;
01924     if (!strchr("sma", METHOD))
01925     { PyErr_SetString(PyExc_ValueError, "argument method should be 's', 'm', or 'a' when specifying the distance matrix");
01926       return NULL;
01927     }
01928     /* -- Check the distance matrix ---------------------------------------- */
01929     distances = parse_distance(DISTANCEMATRIX, &aDISTANCEMATRIX, &nitems);
01930     if (!distances) return NULL;
01931     /* --------------------------------------------------------------------- */
01932     nodes = treecluster(nitems,
01933                         nitems,
01934                         0,
01935                         0,
01936                         0,
01937                         TRANSPOSE,
01938                         DIST,
01939                         METHOD,
01940                         distances);
01941     /* --------------------------------------------------------------------- */
01942     free_distances(DISTANCEMATRIX, aDISTANCEMATRIX, distances, nitems);
01943   }
01944 
01945   /* -- Check if a memory allocation error occurred ---------------------- */
01946   if(!nodes)
01947   { PyErr_SetString(PyExc_MemoryError, "error occurred in treecluster");
01948     return NULL;
01949   }
01950   tree = (PyTree*) PyTreeType.tp_alloc(&PyTreeType, 0);
01951   if(!tree)
01952   { PyErr_SetString(PyExc_MemoryError, "error occurred in treecluster");
01953     free(nodes);
01954     return NULL;
01955   }
01956   tree->nodes = nodes;
01957   tree->n = nitems-1;
01958   return (PyObject*) tree;
01959 } 
01960 /* end of wrapper for treecluster */
01961 
01962 /* somcluster */
01963 static char somcluster__doc__[] =
01964 "somcluster(data, mask=None, weight=None, transpose=0,\n"
01965 "           nxgrid=2, nygrid=1, inittau=0.02, niter=1,\n"
01966 "           dist='e') -> clusterid, celldata\n"
01967 "\n"
01968 "This function implements a self-organizing map on a rectangular grid.\n"
01969 "data     : nrows x ncolumns array containing the gene expression data\n"
01970 "mask     : nrows x ncolumns array of integers, showing which data are\n"
01971 "           missing. If mask[i][j]==0, then data[i][j] is missing.\n"
01972 "weight   : the weights to be used when calculating distances\n"
01973 "transpose: if equal to 0, genes (rows) are clustered;\n"
01974 "           if equal to 1, microarrays (columns) are clustered.\n"
01975 "nxgrid   : the horizontal dimension of the rectangular SOM map\n"
01976 "nygrid   : the vertical dimension of the rectangular SOM map\n"
01977 "inittau  : the initial value of tau (the neighborbood function)\n"
01978 "niter    : the number of iterations\n"
01979 "dist     : specifies the distance function to be used:\n"
01980 "           dist=='e': Euclidean distance\n"
01981 "           dist=='b': City Block distance\n"
01982 "           dist=='c': Pearson correlation\n"
01983 "           dist=='a': absolute value of the correlation\n"
01984 "           dist=='u': uncentered correlation\n"
01985 "           dist=='x': absolute uncentered correlation\n"
01986 "           dist=='s': Spearman's rank correlation\n"
01987 "           dist=='k': Kendall's tau\n"
01988 "\n"
01989 "Return values:\n"
01990 "clusterid: array with two columns, while the number of rows is equal to\n"
01991 "           the number of genes or the number of microarrays depending on\n"
01992 "           whether genes or microarrays are being clustered. Each row in\n"
01993 "           the array contains the x and y coordinates of the cell in the\n"
01994 "           rectangular SOM grid to which the gene or microarray was\n"
01995 "           assigned.\n"
01996 "celldata:  an array with dimensions (nxgrid, nygrid, number of\n"
01997 "           microarrays) if genes are being clustered, or (nxgrid,\n"
01998 "           nygrid, number of genes) if microarrays are being clustered.\n"
01999 "           Each element [ix][iy] of this array is a 1D vector containing\n"
02000 "           the gene expression data for the centroid of the cluster in\n"
02001 "           the SOM grid cell with coordinates (ix, iy).\n";
02002 
02003 static PyObject*
02004 py_somcluster(PyObject* self, PyObject* args, PyObject* keywords)
02005 { int nrows;
02006   int ncolumns;
02007   int nitems;
02008   int ndata;
02009   PyObject* DATA = NULL;
02010   PyArrayObject* aDATA = NULL;
02011   double** data = NULL;
02012   PyObject* MASK = NULL;
02013   PyArrayObject* aMASK = NULL;
02014   int** mask = NULL;
02015   PyObject* WEIGHT = NULL;
02016   PyArrayObject* aWEIGHT = NULL;
02017   double* weight = NULL;
02018   int TRANSPOSE = 0;
02019   int NXGRID = 2;
02020   int NYGRID = 1;
02021   double INITTAU = 0.02;
02022   int NITER = 1;
02023   char DIST = 'e';
02024   PyArrayObject* aCELLDATA = NULL;
02025   double*** celldata = NULL;
02026   PyArrayObject* aCLUSTERID = NULL;
02027   npy_intp shape[2];
02028 
02029   /* -- Read the input variables ----------------------------------------- */
02030   static char* kwlist[] = { "data",
02031                             "mask",
02032                             "weight",
02033                             "transpose",
02034                             "nxgrid",
02035                             "nygrid",
02036                             "inittau",
02037                             "niter",
02038                             "dist",
02039                              NULL };
02040   if(!PyArg_ParseTupleAndKeywords(args, keywords, "O|OOiiidiO&", kwlist,
02041                                   &DATA,
02042                                   &MASK,
02043                                   &WEIGHT,
02044                                   &TRANSPOSE,
02045                                   &NXGRID,
02046                                   &NYGRID,
02047                                   &INITTAU,
02048                                   &NITER,
02049                                   distance_converter, &DIST)) return NULL;
02050   /* -- Reset None variables to NULL ------------------------------------- */
02051   if(WEIGHT==Py_None) WEIGHT = NULL;
02052   if(MASK==Py_None) MASK = NULL;
02053   /* -- Check the nxgrid variable ---------------------------------------- */
02054   if (NXGRID < 1)
02055   { PyErr_SetString(PyExc_ValueError,
02056                    "nxgrid should be a positive integer (default is 2)");
02057     return NULL;
02058   }
02059   /* -- Check the nygrid variable ---------------------------------------- */
02060   if (NYGRID < 1)
02061   { PyErr_SetString(PyExc_ValueError,
02062                     "nygrid should be a positive integer (default is 1)");
02063     return NULL;
02064   }
02065   /* -- Check the niter variable ----------------------------------------- */
02066   if (NITER < 1)
02067   { PyErr_SetString(PyExc_ValueError,
02068                     "number of iterations (niter) should be positive");
02069     return NULL;
02070   }
02071   /* -- Check the transpose variable ------------------------------------- */
02072   if (TRANSPOSE) TRANSPOSE = 1;
02073   /* -- Check the data input array --------------------------------------- */
02074   data = parse_data(DATA, &aDATA);
02075   if (!data) return NULL;
02076   nrows = (int) PyArray_DIM(aDATA, 0);
02077   ncolumns = (int) PyArray_DIM(aDATA, 1);
02078   nitems = TRANSPOSE ? ncolumns : nrows;
02079   ndata = TRANSPOSE ? nrows : ncolumns;
02080   if (nrows!=PyArray_DIM(aDATA, 0) || ncolumns!=PyArray_DIM(aDATA, 1))
02081   { PyErr_SetString(PyExc_RuntimeError, "data array too large");
02082     free_data(aDATA, data);
02083     return NULL;
02084   }
02085   /* -- Check the mask input --------------------------------------------- */
02086   mask = parse_mask(MASK, &aMASK, PyArray_DIMS(aDATA));
02087   if (!mask)
02088   { free_data(aDATA, data);
02089     return NULL;
02090   }
02091   /* -- Check the weight input ------------------------------------------- */
02092   weight = parse_weight(WEIGHT, &aWEIGHT, ndata);
02093   if (!weight)
02094   { free_data(aDATA, data);
02095     free_mask(aMASK, mask, nrows);
02096     return NULL;
02097   }
02098   /* --------------------------------------------------------------------- */
02099   shape[0] = nitems;
02100   shape[1] = 2;
02101   aCLUSTERID = (PyArrayObject*) PyArray_SimpleNew(2, shape, NPY_INT);
02102   if (!aCLUSTERID)
02103   { PyErr_SetString(PyExc_MemoryError,
02104                     "somcluster: Could not create clusterid array");
02105     free_data(aDATA, data);
02106     free_mask(aMASK, mask, nrows);
02107     free_weight(aWEIGHT, weight);
02108     return NULL;
02109   }
02110   /* --------------------------------------------------------------------- */
02111   celldata = create_celldata(NXGRID, NYGRID, ndata, &aCELLDATA);
02112   if (!celldata)
02113   { free_data(aDATA, data);
02114     free_mask(aMASK, mask, nrows);
02115     free_weight(aWEIGHT, weight);
02116     Py_DECREF((PyObject*) aCLUSTERID);
02117   }
02118   /* --------------------------------------------------------------------- */
02119   somcluster(nrows,
02120       ncolumns,
02121       data,
02122       mask,
02123       weight,
02124       TRANSPOSE,
02125       NXGRID,
02126       NYGRID,
02127       INITTAU,
02128       NITER,
02129       DIST,
02130       celldata,
02131       PyArray_DATA(aCLUSTERID));
02132   /* --------------------------------------------------------------------- */
02133   free_data(aDATA, data);
02134   free_mask(aMASK, mask, nrows);
02135   free_weight(aWEIGHT, weight);
02136   free_celldata(celldata);
02137   /* --------------------------------------------------------------------- */
02138   return Py_BuildValue("NN",
02139                        PyArray_Return(aCLUSTERID),
02140                        PyArray_Return(aCELLDATA));
02141 } 
02142 /* end of wrapper for somcluster */
02143 
02144 /* median */
02145 static char median__doc__[] =
02146 "median(data) -> median value of the 1D array data\n"
02147 "Note: data will be partially ordered upon return.\n";
02148 
02149 static PyObject*
02150 py_median(PyObject* unused, PyObject* args)
02151 { double result;
02152   PyObject* DATA = NULL;
02153   PyArrayObject* aDATA = NULL;
02154 
02155   /* -- Read the input variables ----------------------------------------- */
02156   if(!PyArg_ParseTuple(args, "O", &DATA)) return NULL;
02157 
02158   /* -- Check the input variable ----------------------------------------- */
02159   if (PyFloat_Check(DATA) || PyLong_Check(DATA))
02160   { Py_INCREF(DATA);
02161     return DATA;
02162   }
02163   if(!PyArray_Check(DATA))
02164   { aDATA = (PyArrayObject *) PyArray_ContiguousFromObject(DATA, PyArray_NOTYPE, 0, 0);
02165     if (!aDATA)
02166     { PyErr_SetString(PyExc_TypeError,
02167                      "Argument cannot be converted to needed array.");
02168       return NULL;
02169     }
02170   }
02171   else
02172   { aDATA = (PyArrayObject*) DATA;
02173     Py_INCREF(DATA);
02174   }
02175   if (PyArray_TYPE(aDATA) != NPY_DOUBLE)
02176   { PyObject* av = PyArray_Cast(aDATA, NPY_DOUBLE);
02177     Py_DECREF((PyObject*) aDATA);
02178     aDATA = (PyArrayObject*) av;
02179     if (!aDATA)
02180     { PyErr_SetString(PyExc_ValueError,
02181                      "Argument cannot be cast to needed type.");
02182       return NULL;
02183     }
02184   } 
02185   if (PyArray_NDIM(aDATA) != 1 && (PyArray_NDIM(aDATA) > 0 || PyArray_DIM(aDATA, 0) != 1))
02186   { PyErr_Format(PyExc_ValueError,
02187                  "median: Argument has incorrect rank (%d expected 1).",
02188                  PyArray_NDIM(aDATA));
02189     Py_DECREF((PyObject*) aDATA);
02190     return NULL;
02191   }
02192   if (!PyArray_ISCONTIGUOUS(aDATA))
02193   { PyObject* av =
02194       PyArray_ContiguousFromObject((PyObject*) aDATA, PyArray_TYPE(aDATA), 0, 0);
02195     Py_DECREF((PyObject*)aDATA);
02196     if(!av)
02197     { PyErr_SetString(PyExc_ValueError, "Failed making argument contiguous.");
02198       return NULL;
02199     }
02200     aDATA = (PyArrayObject*) av;
02201   }
02202   /* --------------------------------------------------------------------- */
02203   result = median(PyArray_DIM(aDATA, 0), PyArray_DATA(aDATA));
02204   /* --------------------------------------------------------------------- */
02205   Py_DECREF((PyObject*) aDATA);
02206   /* --------------------------------------------------------------------- */
02207   return PyFloat_FromDouble(result);
02208 } 
02209 /* end of wrapper for median */
02210 
02211 /* mean */
02212 static char mean__doc__[] =
02213 "mean(data) -> arithmetic mean of the 1D array data.\n";
02214 
02215 static PyObject*
02216 py_mean(PyObject* unused, PyObject* args)
02217 { double result;
02218   PyObject* DATA = NULL;
02219   PyArrayObject* aDATA = NULL;
02220 
02221   /* -- Read the input variables ----------------------------------------- */
02222   if(!PyArg_ParseTuple(args, "O", &DATA)) return NULL;
02223 
02224   /* -- Check the input variable ----------------------------------------- */
02225   if (PyFloat_Check(DATA) || PyLong_Check(DATA))
02226   { Py_INCREF(DATA);
02227     return DATA;
02228   }
02229   if(!PyArray_Check(DATA))
02230   { aDATA = (PyArrayObject *) PyArray_ContiguousFromObject(DATA, PyArray_NOTYPE, 0, 0);
02231     if (!aDATA)
02232     { PyErr_SetString(PyExc_TypeError,
02233                       "Argument cannot be converted to needed array.");
02234       return NULL;
02235     }
02236   }
02237   else
02238   { aDATA = (PyArrayObject*) DATA;
02239     Py_INCREF(DATA);
02240   }
02241   if (PyArray_TYPE(aDATA) != NPY_DOUBLE)
02242   { PyObject* av = PyArray_Cast(aDATA, NPY_DOUBLE);
02243     Py_DECREF((PyObject*) aDATA);
02244     aDATA = (PyArrayObject*) av;
02245     if (!aDATA)
02246     { PyErr_SetString(PyExc_ValueError,
02247                       "Argument cannot be cast to needed type.");
02248       return NULL;
02249     }
02250   } 
02251   if (PyArray_NDIM(aDATA) != 1 && (PyArray_NDIM(aDATA) > 0 || PyArray_DIM(aDATA, 0) != 1))
02252   { PyErr_Format(PyExc_ValueError,
02253                  "Argument has incorrect rank (%d expected 1).",
02254                  PyArray_NDIM(aDATA));
02255     Py_DECREF((PyObject*) aDATA);
02256     return NULL;
02257   }
02258   if (!PyArray_ISCONTIGUOUS(aDATA))
02259   { PyObject* av =
02260       PyArray_ContiguousFromObject((PyObject*) aDATA, PyArray_TYPE(aDATA), 0, 0);
02261     Py_DECREF((PyObject*)aDATA);
02262     if(!av)
02263     { PyErr_SetString(PyExc_ValueError,
02264                       "mean: Failed making argument contiguous.");
02265       return NULL;
02266     }
02267     aDATA = (PyArrayObject*) av;
02268   }
02269   /* --------------------------------------------------------------------- */
02270   result = mean(PyArray_DIM(aDATA, 0), PyArray_DATA(aDATA));
02271   /* --------------------------------------------------------------------- */
02272   Py_DECREF((PyObject*) aDATA);
02273   /* --------------------------------------------------------------------- */
02274   return PyFloat_FromDouble(result);
02275 } 
02276 /* end of wrapper for mean */
02277 
02278 /* clusterdistance */
02279 static char clusterdistance__doc__[] =
02280 "clusterdistance(data, mask=None, weight=None, index1, index2, dist='e',\n"
02281 "                method='a', transpose=0) -> the distance between the\n"
02282 "                                            two clusters\n"
02283 "\n"
02284 "data     : nrows x ncolumns array containing the expression data\n"
02285 "mask     : nrows x ncolumns array of integers, showing which data are\n"
02286 "           missing. If mask[i][j]==0, then data[i][j] is missing.\n"
02287 "weight   : the weights to be used when calculating distances\n"
02288 "index1   : 1D array identifying which genes/microarrays belong to the\n"
02289 "           first cluster. If the cluster contains only one gene, then\n"
02290 "           index1 can also be written as a single integer.\n"
02291 "index2   : 1D array identifying which genes/microarrays belong to the\n"
02292 "           second cluster. If the cluster contains only one gene, then\n"
02293 "           index2 can also be written as a single integer.\n"
02294 "transpose: if equal to 0, genes (rows) are clustered;\n"
02295 "           if equal to 1, microarrays (columns) are clustered.\n"
02296 "dist     : specifies the distance function to be used:\n"
02297 "           dist=='e': Euclidean distance\n"
02298 "           dist=='b': City Block distance\n"
02299 "           dist=='c': Pearson correlation\n"
02300 "           dist=='a': absolute value of the correlation\n"
02301 "           dist=='u': uncentered correlation\n"
02302 "           dist=='x': absolute uncentered correlation\n"
02303 "           dist=='s': Spearman's rank correlation\n"
02304 "           dist=='k': Kendall's tau\n"
02305 "method   : specifies how the distance between two clusters is defined:\n"
02306 "           method=='a': the distance between the arithmetic means of the\n"
02307 "                        two clusters\n"
02308 "           method=='m': the distance between the medians of the two\n"
02309 "                        clusters\n"
02310 "           method=='s': the smallest pairwise distance between members\n"
02311 "                        of the two clusters\n"
02312 "           method=='x': the largest pairwise distance between members of\n"
02313 "                        the two clusters\n"
02314 "           method=='v': average of the pairwise distances between\n"
02315 "                        members of the clusters\n"
02316 "transpose: if equal to 0: clusters of genes (rows) are considered;\n"
02317 "           if equal to 1: clusters of microarrays (columns) are\n"
02318 "                          considered.\n";
02319 
02320 static PyObject*
02321 py_clusterdistance(PyObject* self, PyObject* args, PyObject* keywords)
02322 { double result;
02323   int nrows;
02324   int ncolumns;
02325   int ndata;
02326   PyObject* DATA = NULL;
02327   PyArrayObject* aDATA = NULL;
02328   double** data;
02329   PyObject* MASK = NULL;
02330   PyArrayObject* aMASK = NULL;
02331   int** mask;
02332   PyObject* WEIGHT = NULL;
02333   PyArrayObject* aWEIGHT = NULL;
02334   double* weight;
02335   char DIST = 'e';
02336   char METHOD = 'a';
02337   int TRANSPOSE = 0;
02338   int N1;
02339   int N2;
02340   PyObject* INDEX1 = NULL;
02341   PyArrayObject* aINDEX1 = NULL;
02342   int* index1;
02343   PyObject* INDEX2 = NULL;
02344   PyArrayObject* aINDEX2 = NULL;
02345   int* index2;
02346 
02347   /* -- Read the input variables ----------------------------------------- */
02348   static char* kwlist[] = { "data",
02349                             "mask",
02350                             "weight",
02351                             "index1",
02352                             "index2",
02353                             "method",
02354                             "dist",
02355                             "transpose",
02356                              NULL };
02357   if(!PyArg_ParseTupleAndKeywords(args, keywords, "O|OOOOO&O&i", kwlist,
02358                                   &DATA,
02359                                   &MASK,
02360                                   &WEIGHT,
02361                                   &INDEX1,
02362                                   &INDEX2,
02363                                   method_clusterdistance_converter, &METHOD,
02364                                   distance_converter, &DIST,
02365                                   &TRANSPOSE)) return NULL;
02366   /* -- Reset None variables to NULL ------------------------------------- */
02367   if (MASK==Py_None) MASK = NULL;
02368   if (WEIGHT==Py_None) WEIGHT = NULL;
02369   if (INDEX1==Py_None) INDEX1 = NULL;
02370   if (INDEX2==Py_None) INDEX2 = NULL;
02371   /* -- Check the transpose variable ------------------------------------- */
02372   if (TRANSPOSE) TRANSPOSE = 1;
02373   /* -- Check the data input array --------------------------------------- */
02374   data = parse_data(DATA, &aDATA);
02375   if (!data) return NULL;
02376   nrows = (int) PyArray_DIM(aDATA, 0);
02377   ncolumns = (int) PyArray_DIM(aDATA, 1);
02378   ndata = TRANSPOSE ? nrows : ncolumns;
02379   if (nrows!=PyArray_DIM(aDATA, 0) || ncolumns!=PyArray_DIM(aDATA, 1))
02380   { free_data(aDATA, data);
02381     PyErr_SetString(PyExc_ValueError, "data array is too large");
02382     return NULL;
02383   }
02384   /* -- Check the mask input --------------------------------------------- */
02385   mask = parse_mask(MASK, &aMASK, PyArray_DIMS(aDATA));
02386   if (!mask)
02387   { free_data(aDATA, data);
02388     return NULL;
02389   }
02390   /* -- Check the weight input ------------------------------------------- */
02391   weight = parse_weight(WEIGHT, &aWEIGHT, ndata);
02392   if (!weight)
02393   { free_data(aDATA, data);
02394     free_mask(aMASK, mask, nrows);
02395     return NULL;
02396   }
02397   /* --------------------------------------------------------------------- */
02398   index1 = parse_index(INDEX1, &aINDEX1, &N1);
02399   if (index1==NULL)
02400   { free_data(aDATA, data);
02401     free_mask(aMASK, mask, nrows);
02402     free_weight(aWEIGHT, weight);
02403     return NULL;
02404   }
02405   index2 = parse_index(INDEX2, &aINDEX2, &N2);
02406   if (index2==NULL)
02407   { free_data(aDATA, data);
02408     free_mask(aMASK, mask, nrows);
02409     free_weight(aWEIGHT, weight);
02410     free_index(aINDEX1, index1);
02411     return NULL;
02412   }
02413   /* --------------------------------------------------------------------- */
02414   result = clusterdistance(nrows,
02415       ncolumns,
02416       data,
02417       mask,
02418       weight,
02419       N1,
02420       N2,
02421       index1,
02422       index2,
02423       DIST,
02424       METHOD,
02425       TRANSPOSE);
02426   /* --------------------------------------------------------------------- */
02427   free_data(aDATA, data);
02428   free_mask(aMASK, mask, nrows);
02429   free_weight(aWEIGHT, weight);
02430   free_index(aINDEX1, index1);
02431   free_index(aINDEX2, index2);
02432   /* --------------------------------------------------------------------- */
02433   if (result < -0.5) /* Actually -1.0; avoiding roundoff errors */
02434   { PyErr_SetString(PyExc_IndexError, "index out of range");
02435     return NULL;
02436   }
02437   return PyFloat_FromDouble(result);
02438 } 
02439 /* end of wrapper for clusterdistance */
02440 
02441 /* clustercentroids */
02442 static char clustercentroids__doc__[] =
02443 "clustercentroids(data, mask=None, transport=0, clusterid,\n"
02444 "                 method='a') -> cdata, cmask\n"
02445 "\n"
02446 "The clustercentroids routine calculates the cluster centroids, given to\n"
02447 "which cluster each element belongs. The centroid is defined as either\n"
02448 "the mean or the median over all elements for each dimension.\n"
02449 
02450 "data     : nrows x ncolumns array containing the expression data\n"
02451 "mask     : nrows x ncolumns array of integers, showing which data are\n"
02452 "           missing. If mask[i][j]==0, then data[i][j] is missing.\n"
02453 "transpose: if equal to 0, gene (row) clusters are considered;\n"
02454 "           if equal to 1, microarray (column) clusters are considered.\n"
02455 "clusterid: array containing the cluster number for each gene or\n"
02456 "           microarray. The cluster number should be non-negative.\n"
02457 "method   : specifies whether the centroid is calculated from the\n"
02458 "           arithmetic mean (method=='a', default) or the median\n"
02459 "           (method=='m') over each dimension.\n"
02460 "\n"
02461 "Return values:\n"
02462 "cdata    : 2D array containing the cluster centroids. If transpose==0,\n"
02463 "           then the dimensions of cdata are nclusters x ncolumns. If\n"
02464 "           transpose==1, then the dimensions of cdata are\n"
02465 "           nrows x nclusters.\n"
02466 "cmask    : 2D array of integers describing which elements in cdata,\n"
02467 "           if any, are missing.\n";
02468 
02469 static PyObject*
02470 py_clustercentroids(PyObject* self, PyObject* args, PyObject* keywords)
02471 { int nrows;
02472   int ncolumns;
02473   unsigned int nitems;
02474   int nclusters;
02475   PyObject* DATA = NULL;
02476   PyArrayObject* aDATA = NULL;
02477   double** data;
02478   PyObject* MASK = NULL;
02479   PyArrayObject* aMASK = NULL;
02480   int** mask;
02481   PyObject* CLUSTERID = NULL;
02482   PyArrayObject* aCLUSTERID = NULL;
02483   int* clusterid;
02484   char METHOD = 'a';
02485   npy_intp shape[2];
02486   PyArrayObject* aCDATA = NULL;
02487   double** cdata;
02488   PyArrayObject* aCMASK = NULL;
02489   int** cmask;
02490   int TRANSPOSE = 0;
02491   int i;
02492   int ok;
02493 
02494   /* -- Read the input variables ----------------------------------------- */
02495   static char* kwlist[] = { "data",
02496                             "mask",
02497                             "clusterid",
02498                             "method",
02499                             "transpose",
02500                              NULL };
02501   if(!PyArg_ParseTupleAndKeywords(args, keywords, "O|OOci", kwlist,
02502                                   &DATA,
02503                                   &MASK,
02504                                   &CLUSTERID,
02505                                   &METHOD,
02506                                   &TRANSPOSE)) return NULL;
02507   /* -- Reset None variables to NULL ------------------------------------- */
02508   if (MASK==Py_None) MASK = NULL;
02509   if (CLUSTERID==Py_None) CLUSTERID = NULL;
02510   /* -- Check the data input array --------------------------------------- */
02511   data = parse_data(DATA, &aDATA);
02512   if (!data) return NULL;
02513   nrows = (int) PyArray_DIM(aDATA, 0);
02514   ncolumns = (int) PyArray_DIM(aDATA, 1);
02515   nitems = TRANSPOSE ? ncolumns : nrows;
02516   if (nrows!=PyArray_DIM(aDATA, 0) || ncolumns!=PyArray_DIM(aDATA, 1))
02517   { PyErr_SetString(PyExc_RuntimeError, "data array is too large");
02518     free_data(aDATA, data);
02519     return NULL;
02520   }
02521   /* -- Check the mask input --------------------------------------------- */
02522   mask = parse_mask(MASK, &aMASK, PyArray_DIMS(aDATA));
02523   if (!mask)
02524   { free_data(aDATA, data);
02525     return NULL;
02526   }
02527   /* -- Check the cluster assignments ------------------------------------ */
02528   clusterid = parse_clusterid(CLUSTERID, &aCLUSTERID, nitems, &nclusters);
02529   if (!clusterid)
02530   { free_data(aDATA, data);
02531     free_mask(aMASK, mask, nrows);
02532     return NULL;
02533   }
02534   /* -- Create the centroid data output variable ------------------------- */
02535   shape[0] = TRANSPOSE ? nrows : nclusters;
02536   shape[1] = TRANSPOSE ? nclusters : ncolumns;
02537   aCDATA = (PyArrayObject*) PyArray_SimpleNew(2, shape, NPY_DOUBLE);
02538   if (!aCDATA)
02539   { PyErr_SetString(PyExc_MemoryError, "could not create centroids array");
02540     free_data(aDATA, data);
02541     free_mask(aMASK, mask, nrows);
02542     free_clusterid(aCLUSTERID, clusterid);
02543     return NULL;
02544   }
02545   cdata = malloc(shape[0]*sizeof(double*));
02546   for (i=0; i<shape[0]; i++)
02547     cdata[i] = ((double*) PyArray_DATA(aCDATA)) + i*shape[1];
02548   /* -- Create the centroid mask output variable ------------------------- */
02549   aCMASK = (PyArrayObject*) PyArray_SimpleNew(2, shape, NPY_INT);
02550   if (!aCMASK)
02551   { PyErr_SetString(PyExc_MemoryError, "could not create centroids array");
02552     free_data(aDATA, data);
02553     free_mask(aMASK, mask, nrows);
02554     free_clusterid(aCLUSTERID, clusterid);
02555     Py_DECREF((PyObject*) aCDATA);
02556     free(cdata);
02557     return NULL;
02558   }
02559   cmask = malloc(shape[0]*sizeof(int*));
02560   for (i=0; i<shape[0]; i++)
02561     cmask[i] = ((int*) PyArray_DATA(aCMASK)) + i*shape[1];
02562   /* --------------------------------------------------------------------- */
02563   ok = getclustercentroids(nclusters,
02564                            nrows,
02565                            ncolumns,
02566                            data,
02567                            mask,
02568                            clusterid,
02569                            cdata,
02570                            cmask,
02571                            TRANSPOSE,
02572                            METHOD);
02573   /* --------------------------------------------------------------------- */
02574   free_data(aDATA, data);
02575   free_mask(aMASK, mask, nrows);
02576   free(cdata);
02577   free(cmask);
02578   free_clusterid(aCLUSTERID, clusterid);
02579   /* --------------------------------------------------------------------- */
02580   if (!ok)
02581   { PyErr_SetString(PyExc_MemoryError, "allocation error in clustercentroids");
02582     return NULL;
02583   }
02584   return Py_BuildValue("NN", PyArray_Return(aCDATA), PyArray_Return(aCMASK));
02585 } 
02586 /* end of wrapper for clustercentroids */
02587 
02588 /* distancematrix */
02589 static char distancematrix__doc__[] =
02590 "distancematrix(data, mask=None, weight=None, transpose=0,\n"
02591 "               dist='e') -> distance matrix as a list of arrays\n"
02592 "\n"
02593 "This function returns the distance matrix between gene expression data.\n"
02594 "data     : nrows x ncolumns array containing the expression data\n"
02595 "mask     : nrows x ncolumns array of integers, showing which data are\n"
02596 "           missing. If mask[i][j]==0, then data[i][j] is missing.\n"
02597 "weight   : the weights to be used when calculating distances.\n"
02598 "transpose: if equal to 0: the distances between genes (rows) are\n"
02599 "                          calculated;\n"
02600 "           if equal to 1, the distances beteeen microarrays (columns)\n"
02601 "                          are calculated.\n"
02602 "dist     : specifies the distance function to be used:\n"
02603 "           dist=='e': Euclidean distance\n"
02604 "           dist=='b': City Block distance\n"
02605 "           dist=='c': Pearson correlation\n"
02606 "           dist=='a': absolute value of the correlation\n"
02607 "           dist=='u': uncentered correlation\n"
02608 "           dist=='x': absolute uncentered correlation\n"
02609 "           dist=='s': Spearman's rank correlation\n"
02610 "           dist=='k': Kendall's tau\n"
02611 "\n"
02612 "Return value:\n"
02613 "The distance matrix is returned as a list of 1D arrays containing the\n"
02614 "distance matrix between the gene expression data. The number of columns\n"
02615 "in each row is equal to the row number. Hence, the first row has zero\n"
02616 "elements. An example of the return value is\n"
02617 "matrix = [[],\n"
02618 "          array([1.]),\n"
02619 "          array([7., 3.]),\n"
02620 "          array([4., 2., 6.])]\n"
02621 "This corresponds to the distance matrix\n"
02622 " [0., 1., 7., 4.]\n"
02623 " [1., 0., 3., 2.]\n"
02624 " [7., 3., 0., 6.]\n"
02625 " [4., 2., 6., 0.]\n";
02626 
02627 static PyObject*
02628 py_distancematrix(PyObject* self, PyObject* args, PyObject* keywords)
02629 { PyObject* result = NULL;
02630   PyObject* DATA = NULL;
02631   PyArrayObject* aDATA = NULL;
02632   double** data = NULL;
02633   PyObject* MASK = NULL;
02634   PyArrayObject* aMASK = NULL;
02635   int** mask = (int**) NULL;
02636   PyObject* WEIGHT = NULL;
02637   PyArrayObject* aWEIGHT = NULL;
02638   double* weight = NULL;
02639   int TRANSPOSE = 0;
02640   char DIST = 'e';
02641   double** distances = NULL;
02642   int nrows, ncolumns, nelements, ndata;
02643  
02644   /* -- Read the input variables ----------------------------------------- */
02645   static char* kwlist[] = { "data",
02646                             "mask",
02647                             "weight",
02648                             "transpose",
02649                             "dist",
02650                              NULL };
02651   if(!PyArg_ParseTupleAndKeywords(args, keywords, "O|OOiO&", kwlist,
02652                                   &DATA,
02653                                   &MASK,
02654                                   &WEIGHT,
02655                                   &TRANSPOSE,
02656                                   distance_converter, &DIST)) return NULL;
02657   /* -- Reset None variables to NULL ------------------------------------- */
02658   if (MASK==Py_None) MASK = NULL;
02659   if (WEIGHT==Py_None) WEIGHT = NULL;
02660   /* -- Check the transpose variable ------------------------------------- */
02661   if (TRANSPOSE) TRANSPOSE = 1;
02662   /* -- Check the data input array --------------------------------------- */
02663   data = parse_data(DATA, &aDATA);
02664   if (!data) return NULL;
02665   nrows = (int) PyArray_DIM(aDATA, 0);
02666   ncolumns = (int) PyArray_DIM(aDATA, 1);
02667   if (nrows!=PyArray_DIM(aDATA, 0) || ncolumns!=PyArray_DIM(aDATA, 1))
02668   { PyErr_SetString(PyExc_RuntimeError, "data array is too large");
02669     return NULL;
02670   }
02671   ndata = (TRANSPOSE==0) ? ncolumns : nrows;
02672   nelements = (TRANSPOSE==0) ? nrows : ncolumns;
02673   /* -- Check the mask input --------------------------------------------- */
02674   mask = parse_mask(MASK, &aMASK, PyArray_DIMS(aDATA));
02675   if (!mask)
02676   { free_data(aDATA, data);
02677     return NULL;
02678   }
02679   /* -- Check the weight input ------------------------------------------- */
02680   weight = parse_weight(WEIGHT, &aWEIGHT, ndata);
02681   if (!weight)
02682   { free_data(aDATA, data);
02683     free_mask(aMASK, mask, nrows);
02684     return NULL;
02685   }
02686   /* -- Create the matrix output variable -------------------------------- */
02687   result = PyList_New(nelements);
02688   if (result)
02689   { npy_intp i, j;
02690     /* ------------------------------------------------------------------- */
02691     distances = distancematrix(nrows,
02692                                ncolumns,
02693                                data,
02694                                mask,
02695                                weight,
02696                                DIST,
02697                                TRANSPOSE);
02698     /* ------------------------------------------------------------------- */
02699     if (distances)
02700     { for (i = 0; i < nelements; i++)
02701       { double* rowdata = NULL;
02702         PyObject* row = PyArray_SimpleNew(1, &i, NPY_DOUBLE);
02703         if (!row)
02704         { PyErr_SetString(PyExc_MemoryError, "could not create distance matrix");
02705           break;
02706         }
02707         rowdata = PyArray_DATA((PyArrayObject*)row);
02708         for (j = 0; j < i; j++) rowdata[j] = distances[i][j];
02709         if (i!=0) /* distances[0]==NULL */
02710           free(distances[i]);
02711         PyList_SET_ITEM(result, i, row);
02712       }
02713       if (i < nelements)
02714       { for (j = 0; j < i; j++)
02715         { PyObject* row =  PyList_GET_ITEM(result, i);
02716           Py_DECREF(row);
02717         }
02718         if (i==0) i = 1; /* distances[0]==NULL */
02719         for (j = i; j < nelements; j++) free(distances[j]);
02720         Py_DECREF(result);
02721         result = NULL;
02722       }
02723       free(distances);
02724     }
02725     else
02726     { Py_DECREF(result);
02727       result = NULL;
02728     }
02729   }
02730   /* --------------------------------------------------------------------- */
02731   free_data(aDATA, data);
02732   free_mask(aMASK, mask, nrows);
02733   free_weight(aWEIGHT, weight);
02734   /* --------------------------------------------------------------------- */
02735   if(result==NULL)
02736     PyErr_SetString(PyExc_MemoryError, "Could not create distance matrix");
02737   return result;
02738 }
02739 /* end of wrapper for distancematrix */
02740 
02741 
02742 /* pca */
02743 static char pca__doc__[] =
02744 "pca(data) -> (columnmean, coordinates, pc, eigenvalues)\n"
02745 "\n"
02746 "This function returns the principal component decomposition of the gene\n"
02747 "expression data.\n"
02748 "data     : nrows x ncolumns array containing the expression data\n"
02749 "\n"
02750 "Return value:\n"
02751 "This function returns an array containing the mean of each column, the\n"
02752 "principal components as an nmin x ncolumns array, as well as the\n"
02753 "coordinates (an nrows x nmin array) of the data along the principal\n"
02754 "components, and the associated eigenvalues. The principal components, the\n"
02755 "coordinates, and the eigenvalues are sorted by the magnitude of the\n"
02756 "eigenvalue, with the largest eigenvalues appearing first. Here, nmin is\n"
02757 "the smaller of nrows and ncolumns.\n"
02758 "Adding the column means to the dot product of the coordinates and the\n"
02759 "principal components,\n"
02760 ">>> columnmean + dot(coordinates, pc)\n"
02761 "recreates the data matrix.\n";
02762 
02763 static PyObject*
02764 py_pca(PyObject* self, PyObject* args)
02765 { PyArrayObject* aMEAN = NULL;
02766   PyArrayObject* aPC = NULL;
02767   PyArrayObject* aCOORDINATES = NULL;
02768   PyArrayObject* aEIGENVALUES = NULL;
02769   double** u;
02770   double** v;
02771   double* w;
02772   PyObject* DATA = NULL;
02773   PyArrayObject* aDATA = NULL;
02774   double** data = NULL;
02775   int nrows, ncolumns;
02776   npy_intp shape[2];
02777   npy_intp nmin;
02778   int error;
02779   double* p;
02780   double* q;
02781   int i, j;
02782  
02783   /* -- Read the input variables ----------------------------------------- */
02784   if(!PyArg_ParseTuple(args, "O", &DATA)) return NULL;
02785   /* -- Check the data input array --------------------------------------- */
02786   data = parse_data(DATA, &aDATA);
02787   if (!data) return NULL;
02788   nrows = (int) PyArray_DIM(aDATA, 0);
02789   ncolumns = (int) PyArray_DIM(aDATA, 1);
02790   if (nrows!=PyArray_DIM(aDATA, 0) || ncolumns!=PyArray_DIM(aDATA, 1))
02791   { PyErr_SetString(PyExc_RuntimeError, "data array is too large");
02792     return NULL;
02793   }
02794   nmin = nrows < ncolumns ? nrows : ncolumns;
02795   /* -- Create the output variables -------------------------------------- */
02796   u = malloc(nrows*sizeof(double*));
02797   v = malloc(nmin*sizeof(double*));
02798   aEIGENVALUES = (PyArrayObject*) PyArray_SimpleNew(1, &nmin, NPY_DOUBLE);
02799   shape[0] = nmin;
02800   shape[1] = ncolumns;
02801   aPC = (PyArrayObject*) PyArray_SimpleNew(2, shape, NPY_DOUBLE);
02802   aMEAN = (PyArrayObject*) PyArray_SimpleNew(1, &shape[1], NPY_DOUBLE);
02803   shape[0] = nrows;
02804   shape[1] = nmin;
02805   aCOORDINATES = (PyArrayObject*) PyArray_SimpleNew(2, shape, NPY_DOUBLE);
02806   if (!u || !v || !aPC || !aEIGENVALUES || !aCOORDINATES || !aMEAN)
02807   {   error = -2;
02808       goto exit;
02809   }
02810   if (nrows >= ncolumns)
02811   { p = PyArray_DATA(aCOORDINATES);
02812     q = PyArray_DATA(aPC);
02813   }
02814   else /* nrows < ncolums */
02815   { p = PyArray_DATA(aPC);
02816     q = PyArray_DATA(aCOORDINATES);
02817   }
02818   for (i=0; i<nrows; i++, p+=ncolumns) u[i]=p;
02819   for (i=0; i<nmin; i++, q+=nmin) v[i]=q;
02820   w = (double*) PyArray_DATA(aEIGENVALUES);
02821   /* -- Calculate the mean of each column ------------------------------ */
02822   p = PyArray_DATA(aMEAN);
02823   for (j = 0; j < ncolumns; j++)
02824   { p[j] = 0.0;
02825     for (i = 0; i < nrows; i++) p[j] += data[i][j];
02826     p[j] /= nrows;
02827   }
02828   /* -- Subtract the mean of each column --------------------------------- */
02829   for (i = 0; i < nrows; i++)
02830       for (j = 0; j < ncolumns; j++)
02831           u[i][j] = data[i][j] - p[j];
02832   /* -- Perform the principal component analysis ----------------------- */
02833   error = pca(nrows, ncolumns, u, v, w);
02834   /* --------------------------------------------------------------------- */
02835 exit:
02836   free_data(aDATA, data);
02837   if (u) free(u);
02838   if (v) free(v);
02839   if (error==0)
02840       return Py_BuildValue("NNNN",
02841                            PyArray_Return(aMEAN),
02842                            PyArray_Return(aCOORDINATES),
02843                            PyArray_Return(aPC),
02844                            PyArray_Return(aEIGENVALUES));
02845   else if (error==-2)
02846           PyErr_SetString(PyExc_MemoryError,
02847               "Insufficient memory for to store the output variables of principal components analysis");
02848   else if (error==-1)
02849       PyErr_SetString(PyExc_MemoryError,
02850           "Insufficient memory for principal components analysis");
02851   else if (error > 0)
02852       PyErr_SetString(PyExc_RuntimeError,
02853           "Singular value decomposition failed to converge");
02854   else
02855       PyErr_SetString(PyExc_RuntimeError, "Unknown error");
02856   Py_XDECREF(aMEAN);
02857   Py_XDECREF(aPC);
02858   Py_XDECREF(aCOORDINATES);
02859   Py_XDECREF(aEIGENVALUES);
02860   return NULL;
02861 }
02862 /* end of wrapper for pca */
02863 
02864 /* ========================================================================== */
02865 /* -- The methods table ----------------------------------------------------- */
02866 /* ========================================================================== */
02867 
02868 
02869 static struct PyMethodDef cluster_methods[] = {
02870    {"version", (PyCFunction) py_version, METH_NOARGS, version__doc__},
02871    {"kcluster", (PyCFunction) py_kcluster, METH_VARARGS | METH_KEYWORDS, kcluster__doc__},
02872    {"kmedoids", (PyCFunction) py_kmedoids, METH_VARARGS | METH_KEYWORDS, kmedoids__doc__},
02873    {"treecluster", (PyCFunction) py_treecluster, METH_VARARGS | METH_KEYWORDS, treecluster__doc__},
02874    {"somcluster", (PyCFunction) py_somcluster, METH_VARARGS | METH_KEYWORDS, somcluster__doc__},
02875    {"median", (PyCFunction) py_median, METH_VARARGS, median__doc__},
02876    {"mean", (PyCFunction) py_mean, METH_VARARGS, mean__doc__},
02877    {"clusterdistance", (PyCFunction) py_clusterdistance, METH_VARARGS | METH_KEYWORDS, clusterdistance__doc__},
02878    {"clustercentroids", (PyCFunction) py_clustercentroids, METH_VARARGS | METH_KEYWORDS, clustercentroids__doc__},
02879    {"distancematrix", (PyCFunction) py_distancematrix, METH_VARARGS | METH_KEYWORDS, distancematrix__doc__},
02880    {"pca", (PyCFunction) py_pca, METH_VARARGS | METH_KEYWORDS, pca__doc__},
02881    {NULL,          NULL, 0, NULL}/* sentinel */
02882 };
02883 
02884 /* ========================================================================== */
02885 /* -- Initialization -------------------------------------------------------- */
02886 /* ========================================================================== */
02887 
02888 #if PY_MAJOR_VERSION >= 3
02889 
02890 static struct PyModuleDef moduledef = {
02891         PyModuleDef_HEAD_INIT,
02892         "cluster",
02893         "C Clustering Library",
02894         -1,
02895         cluster_methods,
02896         NULL,
02897         NULL,
02898         NULL,
02899         NULL
02900 };
02901 
02902 PyObject *
02903 PyInit_cluster(void)
02904 
02905 #else
02906 
02907 void
02908 initcluster(void)
02909 #endif
02910 {
02911   PyObject *module;
02912 
02913   import_array();
02914 
02915   PyNodeType.tp_new = PyType_GenericNew;
02916   PyTreeType.tp_new = PyType_GenericNew;
02917   if (PyType_Ready(&PyNodeType) < 0)
02918 #if PY_MAJOR_VERSION >= 3
02919       return NULL;
02920 #else
02921       return;
02922 #endif
02923   if (PyType_Ready(&PyTreeType) < 0)
02924 #if PY_MAJOR_VERSION >= 3
02925       return NULL;
02926 #else
02927       return;
02928 #endif
02929 
02930 #if PY_MAJOR_VERSION >= 3
02931   module = PyModule_Create(&moduledef);
02932   if (module==NULL) return NULL;
02933 #else
02934   module = Py_InitModule4("cluster",
02935                           cluster_methods,
02936                           "C Clustering Library",
02937                           NULL,
02938                           PYTHON_API_VERSION);
02939   if (module==NULL) return;
02940 #endif
02941 
02942   Py_INCREF(&PyTreeType);
02943   Py_INCREF(&PyNodeType);
02944   PyModule_AddObject(module, "Tree", (PyObject*) &PyTreeType);
02945   PyModule_AddObject(module, "Node", (PyObject*) &PyNodeType);
02946 
02947 #if PY_MAJOR_VERSION >= 3
02948     return module;
02949 #endif
02950 }