Back to index

python-biopython  1.60
cluster.h
Go to the documentation of this file.
00001 /******************************************************************************/
00002 /* The C Clustering Library.
00003  * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
00004  *
00005  * This library was written at the Laboratory of DNA Information Analysis,
00006  * Human Genome Center, Institute of Medical Science, University of Tokyo,
00007  * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
00008  * Contact: mdehoon 'AT' gsc.riken.jp
00009  * 
00010  * Permission to use, copy, modify, and distribute this software and its
00011  * documentation with or without modifications and for any purpose and
00012  * without fee is hereby granted, provided that any copyright notices
00013  * appear in all copies and that both those copyright notices and this
00014  * permission notice appear in supporting documentation, and that the
00015  * names of the contributors or copyright holders not be used in
00016  * advertising or publicity pertaining to distribution of the software
00017  * without specific prior permission.
00018  * 
00019  * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
00020  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
00021  * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
00022  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
00023  * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
00024  * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
00025  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
00026  * OR PERFORMANCE OF THIS SOFTWARE.
00027  * 
00028  */
00029 
00030 #ifndef min
00031 #define min(x, y)    ((x) < (y) ? (x) : (y))
00032 #endif
00033 #ifndef max
00034 #define       max(x, y)     ((x) > (y) ? (x) : (y))
00035 #endif
00036 
00037 #ifdef WINDOWS
00038 #  include <windows.h>
00039 #endif
00040 
00041 #define CLUSTERVERSION "1.50"
00042 
00043 /* Chapter 2 */
00044 double clusterdistance (int nrows, int ncolumns, double** data, int** mask,
00045   double weight[], int n1, int n2, int index1[], int index2[], char dist,
00046   char method, int transpose);
00047 double** distancematrix (int ngenes, int ndata, double** data,
00048   int** mask, double* weight, char dist, int transpose);
00049 
00050 /* Chapter 3 */
00051 int getclustercentroids(int nclusters, int nrows, int ncolumns,
00052   double** data, int** mask, int clusterid[], double** cdata, int** cmask,
00053   int transpose, char method);
00054 void getclustermedoids(int nclusters, int nelements, double** distance,
00055   int clusterid[], int centroids[], double errors[]);
00056 void kcluster (int nclusters, int ngenes, int ndata, double** data,
00057   int** mask, double weight[], int transpose, int npass, char method, char dist,
00058   int clusterid[], double* error, int* ifound);
00059 void kmedoids (int nclusters, int nelements, double** distance,
00060   int npass, int clusterid[], double* error, int* ifound);
00061 
00062 /* Chapter 4 */
00063 typedef struct {int left; int right; double distance;} Node;
00064 /*
00065  * A Node struct describes a single node in a tree created by hierarchical
00066  * clustering. The tree can be represented by an array of n Node structs,
00067  * where n is the number of elements minus one. The integers left and right
00068  * in each Node struct refer to the two elements or subnodes that are joined
00069  * in this node. The original elements are numbered 0..nelements-1, and the
00070  * nodes -1..-(nelements-1). For each node, distance contains the distance
00071  * between the two subnodes that were joined.
00072  */
00073 
00074 Node* treecluster (int nrows, int ncolumns, double** data, int** mask,
00075   double weight[], int transpose, char dist, char method, double** distmatrix);
00076 void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
00077 
00078 /* Chapter 5 */
00079 void somcluster (int nrows, int ncolumns, double** data, int** mask,
00080   const double weight[], int transpose, int nxnodes, int nynodes,
00081   double inittau, int niter, char dist, double*** celldata,
00082   int clusterid[][2]);
00083 
00084 /* Chapter 6 */
00085 int pca(int m, int n, double** u, double** v, double* w);
00086 
00087 /* Utility routines, currently undocumented */
00088 void sort(int n, const double data[], int index[]);
00089 double mean(int n, double x[]);
00090 double median (int n, double x[]);
00091 
00092 double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
00093   double weights[], int transpose, char dist, double cutoff, double exponent);