Back to index

python-biopython  1.60
KDTree.py
Go to the documentation of this file.
00001 # Copyright 2004 by Thomas Hamelryck. 
00002 # All rights reserved. 
00003 # This code is part of the Biopython distribution and governed by its 
00004 # license.  Please see the LICENSE file that should have been included 
00005 # as part of this package. 
00006 """
00007 KD tree data structure for searching N-dimensional vectors.
00008 
00009 The KD tree data structure can be used for all kinds of searches that
00010 involve N-dimensional vectors, e.g.  neighbor searches (find all points
00011 within a radius of a given point) or finding all point pairs in a set
00012 that are within a certain radius of each other. See "Computational Geometry: 
00013 Algorithms and Applications" (Mark de Berg, Marc van Kreveld, Mark Overmars, 
00014 Otfried Schwarzkopf). Author: Thomas Hamelryck.
00015 """
00016 
00017 from numpy import sum, sqrt, dtype, array
00018 from numpy.random import random
00019 
00020 from Bio.KDTree import _CKDTree 
00021 
00022 def _dist(p, q):
00023     diff=p-q
00024     return sqrt(sum(diff*diff))
00025 
00026 def _neighbor_test(nr_points, dim, bucket_size, radius):
00027     """ Test all fixed radius neighbor search.
00028 
00029     Test all fixed radius neighbor search using the 
00030     KD tree C module.
00031 
00032     o nr_points - number of points used in test
00033     o dim - dimension of coords
00034     o bucket_size - nr of points per tree node
00035     o radius - radius of search (typically 0.05 or so) 
00036     """
00037     # KD tree search
00038     kdt=_CKDTree.KDTree(dim, bucket_size)
00039     coords=random((nr_points, dim))
00040     kdt.set_data(coords)
00041     neighbors = kdt.neighbor_search(radius)
00042     r = [neighbor.radius for neighbor in neighbors]
00043     if r is None:
00044         l1=0
00045     else:
00046         l1=len(r)
00047     # now do a slow search to compare results
00048     neighbors = kdt.neighbor_simple_search(radius)
00049     r = [neighbor.radius for neighbor in neighbors]
00050     if r is None:
00051         l2=0
00052     else:
00053         l2=len(r)
00054     if l1==l2:
00055         print "Passed."
00056     else:
00057         print "Not passed: %i != %i." % (l1, l2)
00058 
00059 def _test(nr_points, dim, bucket_size, radius):
00060     """Test neighbor search.
00061 
00062     Test neighbor search using the KD tree C module.
00063 
00064     o nr_points - number of points used in test
00065     o dim - dimension of coords
00066     o bucket_size - nr of points per tree node
00067     o radius - radius of search (typically 0.05 or so) 
00068     """
00069     # kd tree search
00070     kdt=_CKDTree.KDTree(dim, bucket_size)
00071     coords=random((nr_points, dim))
00072     center=coords[0]
00073     kdt.set_data(coords)
00074     kdt.search_center_radius(center, radius)
00075     r=kdt.get_indices()
00076     if r is None:
00077         l1=0
00078     else:
00079         l1=len(r)
00080     l2=0
00081     # now do a manual search to compare results
00082     for i in range(0, nr_points):
00083         p=coords[i]
00084         if _dist(p, center)<=radius:
00085             l2=l2+1
00086     if l1==l2:
00087         print "Passed."
00088     else:
00089         print "Not passed: %i != %i." % (l1, l2)
00090 
00091 class KDTree(object):
00092     """
00093     KD tree implementation (C++, SWIG python wrapper)
00094 
00095     The KD tree data structure can be used for all kinds of searches that
00096     involve N-dimensional vectors, e.g.  neighbor searches (find all points
00097     within a radius of a given point) or finding all point pairs in a set
00098     that are within a certain radius of each other.
00099 
00100     Reference:
00101 
00102     Computational Geometry: Algorithms and Applications
00103     Second Edition
00104     Mark de Berg, Marc van Kreveld, Mark Overmars, Otfried Schwarzkopf
00105     published by Springer-Verlag
00106     2nd rev. ed. 2000. 
00107     ISBN: 3-540-65620-0
00108 
00109     The KD tree data structure is described in chapter 5, pg. 99. 
00110 
00111     The following article made clear to me that the nodes should 
00112     contain more than one point (this leads to dramatic speed 
00113     improvements for the "all fixed radius neighbor search", see
00114     below):
00115 
00116     JL Bentley, "Kd trees for semidynamic point sets," in Sixth Annual ACM
00117     Symposium on Computational Geometry, vol. 91. San Francisco, 1990
00118 
00119     This KD implementation also performs a "all fixed radius neighbor search",
00120     i.e. it can find all point pairs in a set that are within a certain radius
00121     of each other. As far as I know the algorithm has not been published.
00122     """
00123 
00124     def __init__(self, dim, bucket_size=1):
00125         self.dim=dim
00126         self.kdt=_CKDTree.KDTree(dim, bucket_size)
00127         self.built=0
00128 
00129     # Set data
00130 
00131     def set_coords(self, coords):
00132         """Add the coordinates of the points.
00133 
00134         o coords - two dimensional NumPy array. E.g. if the points
00135         have dimensionality D and there are N points, the coords 
00136         array should be NxD dimensional. 
00137         """
00138         if coords.min()<=-1e6 or coords.max()>=1e6:
00139                 raise Exception("Points should lie between -1e6 and 1e6")
00140         if len(coords.shape)!=2 or coords.shape[1]!=self.dim:
00141                 raise Exception("Expected a Nx%i NumPy array" % self.dim)
00142         self.kdt.set_data(coords)
00143         self.built=1
00144 
00145     # Fixed radius search for a point
00146 
00147     def search(self, center, radius):
00148         """Search all points within radius of center.
00149 
00150         o center - one dimensional NumPy array. E.g. if the points have
00151         dimensionality D, the center array should be D dimensional. 
00152         o radius - float>0
00153         """
00154         if not self.built:
00155                 raise Exception("No point set specified")
00156         if center.shape!=(self.dim,):
00157                 raise Exception("Expected a %i-dimensional NumPy array" \
00158                                 % self.dim)
00159         self.kdt.search_center_radius(center, radius)
00160 
00161     def get_radii(self):
00162         """Return radii.
00163 
00164         Return the list of distances from center after
00165         a neighbor search.
00166         """
00167         a=self.kdt.get_radii()
00168         if a is None:
00169             return []
00170         return a
00171     
00172     def get_indices(self):
00173         """Return the list of indices.
00174 
00175         Return the list of indices after a neighbor search.
00176         The indices refer to the original coords NumPy array. The
00177         coordinates with these indices were within radius of center.
00178 
00179         For an index pair, the first index<second index. 
00180         """
00181         a=self.kdt.get_indices()
00182         if a is None:
00183             return []
00184         return a
00185 
00186     # Fixed radius search for all points
00187 
00188 
00189     def all_search(self, radius):
00190         """All fixed neighbor search.
00191 
00192         Search all point pairs that are within radius.
00193 
00194         o radius - float (>0)
00195         """
00196         if not self.built:
00197                 raise Exception("No point set specified")
00198         self.neighbors = self.kdt.neighbor_search(radius)
00199 
00200     def all_get_indices(self):
00201         """Return All Fixed Neighbor Search results.
00202 
00203         Return a Nx2 dim NumPy array containing
00204         the indices of the point pairs, where N
00205         is the number of neighbor pairs.
00206         """
00207         a = array([[neighbor.index1, neighbor.index2] for neighbor in self.neighbors])
00208         return a
00209 
00210     def all_get_radii(self):
00211         """Return All Fixed Neighbor Search results.
00212 
00213         Return an N-dim array containing the distances
00214         of all the point pairs, where N is the number 
00215         of neighbor pairs..
00216         """
00217         return [neighbor.radius for neighbor in self.neighbors]
00218 
00219 if __name__=="__main__":
00220 
00221     from numpy.random import random
00222 
00223     nr_points=100000
00224     dim=3
00225     bucket_size=10
00226     query_radius=10
00227 
00228     coords=(200*random((nr_points, dim)))
00229 
00230     kdtree=KDTree(dim, bucket_size)
00231 
00232     # enter coords
00233     kdtree.set_coords(coords)
00234 
00235     # Find all point pairs within radius
00236 
00237     kdtree.all_search(query_radius)
00238 
00239     # get indices & radii of points
00240 
00241     # indices is a list of tuples. Each tuple contains the 
00242     # two indices of a point pair within query_radius of 
00243     # each other.
00244     indices=kdtree.all_get_indices() 
00245     radii=kdtree.all_get_radii()
00246 
00247     print "Found %i point pairs within radius %f." % (len(indices), query_radius)
00248 
00249     # Do 10 individual queries
00250 
00251     for i in range(0, 10):
00252         # pick a random center
00253         center=random(dim)
00254         
00255         # search neighbors
00256         kdtree.search(center, query_radius)
00257 
00258         # get indices & radii of points
00259         indices=kdtree.get_indices()
00260         radii=kdtree.get_radii()
00261 
00262         x, y, z=center
00263         print "Found %i points in radius %f around center (%.2f, %.2f, %.2f)." % (len(indices), query_radius, x, y, z)
00264