Back to index

python-biopython  1.60
Classes | Functions | Variables
Bio.Nexus.Trees Namespace Reference

Classes

class  TreeError
class  NodeData
class  Tree

Functions

def consensus

Variables

int PRECISION_BRANCHLENGTH = 6
int PRECISION_SUPPORT = 6
string NODECOMMENT_START = '[&'
string NODECOMMENT_END = ']'

Class Documentation

class Bio::Nexus::Trees::TreeError

Definition at line 23 of file Trees.py.


Function Documentation

def Bio.Nexus.Trees.consensus (   trees,
  threshold = 0.5,
  outgroup = None 
)
Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.

Definition at line 783 of file Trees.py.

00783 
00784 def consensus(trees, threshold=0.5,outgroup=None):
00785     """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees."""
00786 
00787     total=len(trees)
00788     if total==0:
00789         return None
00790     # shouldn't we make sure that it's NodeData or subclass??
00791     dataclass=trees[0].dataclass
00792     max_support=trees[0].max_support
00793     clades={}
00794     #countclades={}
00795     alltaxa=set(trees[0].get_taxa())
00796     # calculate calde frequencies
00797     c=0
00798     for t in trees:
00799         c+=1
00800         #if c%100==0:
00801         #    print c
00802         if alltaxa!=set(t.get_taxa()):
00803             raise TreeError('Trees for consensus must contain the same taxa')
00804         t.root_with_outgroup(outgroup=outgroup)
00805         for st_node in t._walk(t.root):
00806             subclade_taxa=t.get_taxa(st_node)
00807             subclade_taxa.sort()
00808             subclade_taxa=str(subclade_taxa) # lists are not hashable
00809             if subclade_taxa in clades:
00810                 clades[subclade_taxa]+=float(t.weight)/total
00811             else:
00812                 clades[subclade_taxa]=float(t.weight)/total
00813             #if subclade_taxa in countclades:
00814             #    countclades[subclade_taxa]+=t.weight
00815             #else:
00816             #    countclades[subclade_taxa]=t.weight
00817     # weed out clades below threshold
00818     delclades=[c for c,p in clades.iteritems() if round(p,3)<threshold] # round can be necessary 
00819     for c in delclades:
00820         del clades[c]
00821     # create a tree with a root node
00822     consensus=Tree(name='consensus_%2.1f' % float(threshold),data=dataclass)
00823     # each clade needs a node in the new tree, add them as isolated nodes
00824     for c, s in clades.iteritems():
00825         node=Nodes.Node(data=dataclass())
00826         node.data.support=s
00827         node.data.taxon=set(eval(c))
00828         consensus.add(node)
00829     # set root node data
00830     consensus.node(consensus.root).data.support=None
00831     consensus.node(consensus.root).data.taxon=alltaxa
00832     # we sort the nodes by no. of taxa in the clade, so root will be the last
00833     consensus_ids=consensus.all_ids()
00834     consensus_ids.sort(lambda x,y:len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon))
00835     # now we just have to hook each node to the next smallest node that includes all taxa of the current 
00836     for i,current in enumerate(consensus_ids[:-1]): # skip the last one which is the root
00837         #print '----'
00838         #print 'current: ',consensus.node(current).data.taxon
00839         # search remaining nodes
00840         for parent in consensus_ids[i+1:]:
00841             #print 'parent: ',consensus.node(parent).data.taxon
00842             if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon):
00843                 break
00844         else:
00845             sys.exit('corrupt tree structure?')
00846         # internal nodes don't have taxa
00847         if len(consensus.node(current).data.taxon)==1:
00848             consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop()
00849             # reset the support for terminal nodes to maximum
00850             #consensus.node(current).data.support=max_support
00851         else:
00852             consensus.node(current).data.taxon=None
00853         consensus.link(parent,current)
00854     # eliminate root taxon name
00855     consensus.node(consensus_ids[-1]).data.taxon=None 
00856     if alltaxa != set(consensus.get_taxa()):
00857         raise TreeError('FATAL ERROR: consensus tree is corrupt') 
00858     return consensus
00859 

Here is the caller graph for this function:


Variable Documentation

Definition at line 21 of file Trees.py.

Definition at line 20 of file Trees.py.

Definition at line 18 of file Trees.py.

Definition at line 19 of file Trees.py.