Back to index

python-biopython  1.60
Public Member Functions | Public Attributes | Private Member Functions | Static Private Attributes
Bio.Nexus.Nexus.Nexus Class Reference

List of all members.

Public Member Functions

def __init__
def get_original_taxon_order
def set_original_taxon_order
def read
def write_nexus_data_partitions
def write_nexus_data
def append_sets
def export_fasta
def export_phylip
def constant
def cstatus
def weighted_stepmatrix
def crop_matrix
def bootstrap
def add_sequence
def insert_gap
def invert
def gaponly
def terminal_gap_to_missing

Public Attributes

 ntax
 nchar
 unaltered_taxlabels
 taxlabels
 charlabels
 statelabels
 datatype
 respectcase
 missing
 gap
 symbols
 equate
 matchchar
 labels
 transpose
 interleave
 tokens
 eliminate
 matrix
 unknown_blocks
 taxsets
 charsets
 charpartitions
 taxpartitions
 trees
 translate
 structured
 set
 options
 codonposset
 filename
 alphabet
 ambiguous_values
 unambiguous_letters
 valid_characters
 rev_ambiguous_values

Private Member Functions

def _get_nexus_block
def _unknown_nexus_block
def _parse_nexus_block
def _title
def _dimensions
def _format
def _set
def _options
def _eliminate
def _taxlabels
def _check_taxlabels
def _charlabels
def _charstatelabels
def _statelabels
def _matrix
def _translate
def _utree
def _tree
def _apply_block_structure
def _taxset
def _charset
def _taxpartition
def _codonposset
def _codeset
def _charpartition
def _get_indices
def _name_n_vector
def _parse_list
def _resolve
def _stateset
def _changeset
def _treeset
def _treepartition
def _adjust_charlabels

Static Private Attributes

list __slots__ = ['original_taxon_order','__dict__']

Detailed Description

Definition at line 512 of file Nexus.py.


Constructor & Destructor Documentation

def Bio.Nexus.Nexus.Nexus.__init__ (   self,
  input = None 
)

Definition at line 516 of file Nexus.py.

00516 
00517     def __init__(self, input=None):
00518         self.ntax=0                     # number of taxa
00519         self.nchar=0                    # number of characters
00520         self.unaltered_taxlabels=[]          # taxlabels as the appear in the input file (incl. duplicates, etc.)
00521         self.taxlabels=[]               # labels for taxa, ordered by their id
00522         self.charlabels=None            # ... and for characters
00523         self.statelabels=None           # ... and for states
00524         self.datatype='dna'             # (standard), dna, rna, nucleotide, protein
00525         self.respectcase=False          # case sensitivity
00526         self.missing='?'                # symbol for missing characters
00527         self.gap='-'                    # symbol for gap
00528         self.symbols=None               # set of symbols
00529         self.equate=None                # set of symbol synonyms
00530         self.matchchar=None             # matching char for matrix representation
00531         self.labels=None                # left, right, no
00532         self.transpose=False            # whether matrix is transposed
00533         self.interleave=False           # whether matrix is interleaved
00534         self.tokens=False               # unsupported          
00535         self.eliminate=None             # unsupported 
00536         self.matrix=None                # ...
00537         self.unknown_blocks=[]          # blocks we don't care about
00538         self.taxsets={}
00539         self.charsets={}
00540         self.charpartitions={}
00541         self.taxpartitions={}
00542         self.trees=[]                   # list of Trees (instances of Tree class)
00543         self.translate=None             # Dict to translate taxon <-> taxon numbers
00544         self.structured=[]              # structured input representation
00545         self.set={}                     # dict of the set command to set various options 
00546         self.options={}                 # dict of the options command in the data block
00547         self.codonposset=None           # name of the charpartition that defines codon positions
00548 
00549         # some defaults
00550         self.options['gapmode']='missing'
00551         
00552         if input:
00553             self.read(input)
00554         else:
00555             self.read(DEFAULTNEXUS)

Here is the caller graph for this function:


Member Function Documentation

def Bio.Nexus.Nexus.Nexus._adjust_charlabels (   self,
  exclude = None,
  insert = None 
) [private]
Return adjusted indices of self.charlabels if characters are excluded or inserted.

Definition at line 1648 of file Nexus.py.

01648 
01649     def _adjust_charlabels(self,exclude=None,insert=None):
01650         """Return adjusted indices of self.charlabels if characters are excluded or inserted."""
01651         if exclude and insert:
01652             raise NexusError('Can\'t exclude and insert at the same time')
01653         if not self.charlabels:
01654             return None
01655         labels=sorted(self.charlabels)
01656         newcharlabels={}
01657         if exclude:
01658             exclude.sort()
01659             exclude.append(sys.maxint)
01660             excount=0
01661             for c in labels:
01662                 if not c in exclude:
01663                     while c>exclude[excount]:
01664                         excount+=1
01665                     newcharlabels[c-excount]=self.charlabels[c]
01666         elif insert:
01667             insert.sort()
01668             insert.append(sys.maxint)
01669             icount=0
01670             for c in labels:
01671                 while c>=insert[icount]:
01672                     icount+=1
01673                 newcharlabels[c+icount]=self.charlabels[c]
01674         else:
01675             return self.charlabels
01676         return newcharlabels

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._apply_block_structure (   self,
  title,
  lines 
) [private]

Definition at line 952 of file Nexus.py.

00952 
00953     def _apply_block_structure(self,title,lines):
00954         block=Block('')
00955         block.title = title            
00956         for line in lines:
00957             block.commandlines.append(Commandline(line, title))
00958         self.structured.append(block)
       

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._changeset (   self,
  options 
) [private]

Definition at line 1176 of file Nexus.py.

01176 
01177     def _changeset(self, options):
01178         #Not implemented
01179         pass

def Bio.Nexus.Nexus.Nexus._charlabels (   self,
  options 
) [private]

Definition at line 771 of file Nexus.py.

00771 
00772     def _charlabels(self,options):
00773         self.charlabels={}
00774         opts=CharBuffer(options)
00775         while True:
00776             try:
00777                 # get id and state
00778                 w=opts.next_word()
00779                 if w is None: # McClade saves and reads charlabel-lists with terminal comma?!
00780                     break
00781                 identifier=self._resolve(w,set_type=CHARSET) 
00782                 state=quotestrip(opts.next_word())
00783                 self.charlabels[identifier]=state
00784                 # check for comma or end of command
00785                 c=opts.next_nonwhitespace()
00786                 if c is None:
00787                     break
00788                 elif c!=',':
00789                     raise NexusError('Missing \',\' in line %s.' % options)
00790             except NexusError:
00791                 raise
00792             except:
00793                 raise NexusError('Format error in line %s.' % options)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._charpartition (   self,
  options 
) [private]

Definition at line 1011 of file Nexus.py.

01011 
01012     def _charpartition(self, options):
01013         charpartition={}
01014         quotelevel=False
01015         opts=CharBuffer(options)
01016         name=self._name_n_vector(opts)
01017         if not name:
01018             raise NexusError('Formatting error in charpartition: %s ' % options)
01019         # now collect thesubbpartitions and parse them
01020         # subpartitons separated by commas - which unfortunately could be part of a quoted identifier...
01021         sub=''
01022         while True:
01023             w=opts.next()
01024             if w is None or (w==',' and not quotelevel):
01025                 subname,subindices=self._get_indices(sub,set_type=CHARSET,separator=':')
01026                 charpartition[subname]=_make_unique(subindices)
01027                 sub=''
01028                 if w is None:
01029                     break
01030             else:
01031                 if w=="'":
01032                     quotelevel=not quotelevel
01033                 sub+=w
01034         self.charpartitions[name]=charpartition

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._charset (   self,
  options 
) [private]

Definition at line 963 of file Nexus.py.

00963 
00964     def _charset(self, options):
00965         name,sites=self._get_indices(options,set_type=CHARSET)
00966         self.charsets[name]=_make_unique(sites)
        

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._charstatelabels (   self,
  options 
) [private]

Definition at line 794 of file Nexus.py.

00794 
00795     def _charstatelabels(self,options):
00796         # warning: charstatelabels supports only charlabels-syntax!
00797         self._charlabels(options)

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._check_taxlabels (   self,
  taxon 
) [private]
Check for presence of taxon in self.taxlabels.

Definition at line 763 of file Nexus.py.

00763 
00764     def _check_taxlabels(self,taxon): 
00765         """Check for presence of taxon in self.taxlabels."""
00766         # According to NEXUS standard, underscores shall be treated as spaces...,
00767         # so checking for identity is more difficult
00768         nextaxa=dict([(t.replace(' ','_'),t) for t in self.taxlabels])
00769         nexid=taxon.replace(' ','_')
00770         return nextaxa.get(nexid)

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._codeset (   self,
  options 
) [private]

Definition at line 1008 of file Nexus.py.

01008 
01009     def _codeset(self,options):
01010         pass

def Bio.Nexus.Nexus.Nexus._codonposset (   self,
  options 
) [private]
Read codon positions from a codons block as written from McClade.

Here codonposset is just a fancy name for a character partition with
the name CodonPositions and the partitions N,1,2,3

Definition at line 992 of file Nexus.py.

00992 
00993     def _codonposset(self,options):
00994         """Read codon positions from a codons block as written from McClade.
00995 
00996         Here codonposset is just a fancy name for a character partition with
00997         the name CodonPositions and the partitions N,1,2,3
00998         """
00999 
01000         prev_partitions=self.charpartitions.keys()
01001         self._charpartition(options)
01002         # mcclade calls it CodonPositions, but you never know...
01003         codonname=[n for n in self.charpartitions if n not in prev_partitions]
01004         if codonname==[] or len(codonname)>1:
01005             raise NexusError('Formatting Error in codonposset: %s ' % options)
01006         else:
01007             self.codonposset=codonname[0]
  

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._dimensions (   self,
  options 
) [private]

Definition at line 650 of file Nexus.py.

00650 
00651     def _dimensions(self,options):
00652         if 'ntax' in options:
00653             self.ntax=eval(options['ntax'])
00654         if 'nchar' in options:
00655             self.nchar=eval(options['nchar'])

def Bio.Nexus.Nexus.Nexus._eliminate (   self,
  options 
) [private]

Definition at line 743 of file Nexus.py.

00743 
00744     def _eliminate(self,options):
00745         self.eliminate=options

def Bio.Nexus.Nexus.Nexus._format (   self,
  options 
) [private]

Definition at line 656 of file Nexus.py.

00656 
00657     def _format(self,options):
00658         # print options
00659         # we first need to test respectcase, then symbols (which depends on respectcase)
00660         # then datatype (which, if standard, depends on symbols and respectcase in order to generate
00661         # dicts for ambiguous values and alphabet
00662         if 'respectcase' in options:
00663             self.respectcase=True
00664         # adjust symbols to for respectcase
00665         if 'symbols' in options:
00666             self.symbols=options['symbols']
00667             if (self.symbols.startswith('"') and self.symbols.endswith('"')) or\
00668             (self.symbold.startswith("'") and self.symbols.endswith("'")):
00669                 self.symbols=self.symbols[1:-1].replace(' ','')
00670             if not self.respectcase:
00671                 self.symbols=self.symbols.lower()+self.symbols.upper()
00672                 self.symbols=list(set(self.symbols))
00673         if 'datatype' in options:
00674             self.datatype=options['datatype'].lower()
00675             if self.datatype=='dna' or self.datatype=='nucleotide':
00676                 self.alphabet=copy.deepcopy(IUPAC.ambiguous_dna)
00677                 self.ambiguous_values=copy.deepcopy(IUPACData.ambiguous_dna_values)
00678                 self.unambiguous_letters=copy.deepcopy(IUPACData.unambiguous_dna_letters)
00679             elif self.datatype=='rna':
00680                 self.alphabet=copy.deepcopy(IUPAC.ambiguous_rna)
00681                 self.ambiguous_values=copy.deepcopy(IUPACData.ambiguous_rna_values)
00682                 self.unambiguous_letters=copy.deepcopy(IUPACData.unambiguous_rna_letters)
00683             elif self.datatype=='protein':
00684                 self.alphabet=copy.deepcopy(IUPAC.protein)
00685                 self.ambiguous_values={'B':'DN','Z':'EQ','X':copy.deepcopy(IUPACData.protein_letters)} # that's how PAUP handles it
00686                 self.unambiguous_letters=copy.deepcopy(IUPACData.protein_letters)+'*' # stop-codon
00687             elif self.datatype=='standard':
00688                 raise NexusError('Datatype standard is not yet supported.')
00689                 #self.alphabet=None
00690                 #self.ambiguous_values={}
00691                 #if not self.symbols:
00692                 #    self.symbols='01' # if nothing else defined, then 0 and 1 are the default states
00693                 #self.unambiguous_letters=self.symbols
00694             else:
00695                 raise NexusError('Unsupported datatype: '+self.datatype)
00696             self.valid_characters=''.join(self.ambiguous_values)+self.unambiguous_letters
00697             if not self.respectcase:
00698                 self.valid_characters=self.valid_characters.lower()+self.valid_characters.upper()
00699             #we have to sort the reverse ambig coding dict key characters:
00700             #to be sure that it's 'ACGT':'N' and not 'GTCA':'N'
00701             rev=dict((i[1],i[0]) for i in self.ambiguous_values.iteritems() if i[0]!='X')
00702             self.rev_ambiguous_values={}
00703             for (k,v) in rev.iteritems():
00704                 key=[c for c in k]
00705                 key.sort()
00706                 self.rev_ambiguous_values[''.join(key)]=v
00707         #overwrite symbols for datype rna,dna,nucleotide
00708         if self.datatype in ['dna','rna','nucleotide']:
00709             self.symbols=self.alphabet.letters
00710             if self.missing not in self.ambiguous_values:
00711                 self.ambiguous_values[self.missing]=self.unambiguous_letters+self.gap
00712             self.ambiguous_values[self.gap]=self.gap
00713         elif self.datatype=='standard':
00714             if not self.symbols:
00715                 self.symbols=['1','0']
00716         if 'missing' in options:
00717             self.missing=options['missing'][0]
00718         if 'gap' in options:
00719             self.gap=options['gap'][0]
00720         if 'equate' in options:
00721             self.equate=options['equate']
00722         if 'matchchar' in options:
00723             self.matchchar=options['matchchar'][0]
00724         if 'labels' in options:
00725             self.labels=options['labels']
00726         if 'transpose' in options:
00727             raise NexusError('TRANSPOSE is not supported!')
00728             self.transpose=True
00729         if 'interleave' in options:
00730             if options['interleave']==None or options['interleave'].lower()=='yes':
00731                 self.interleave=True
00732         if 'tokens' in options:
00733             self.tokens=True
00734         if 'notokens' in options:
00735             self.tokens=False
00736 

def Bio.Nexus.Nexus.Nexus._get_indices (   self,
  options,
  set_type = CHARSET,
  separator = '=' 
) [private]
Parse the taxset/charset specification (PRIVATE).

e.g. '1 2   3 - 5 dog cat   10 - 20 \\ 3'
--> [0,1,2,3,4,'dog','cat',9,12,15,18]

Definition at line 1035 of file Nexus.py.

01035 
01036     def _get_indices(self,options,set_type=CHARSET,separator='='):
01037         """Parse the taxset/charset specification (PRIVATE).
01038 
01039         e.g. '1 2   3 - 5 dog cat   10 - 20 \\ 3'
01040         --> [0,1,2,3,4,'dog','cat',9,12,15,18]
01041         """
01042         opts=CharBuffer(options)
01043         name=self._name_n_vector(opts,separator=separator)
01044         indices=self._parse_list(opts,set_type=set_type)
01045         if indices is None:
01046             raise NexusError('Formatting error in line: %s ' % options)
01047         return name,indices

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._get_nexus_block (   self,
  file_contents 
) [private]
Generator for looping through Nexus blocks.

Definition at line 605 of file Nexus.py.

00605 
00606     def _get_nexus_block(self,file_contents):
00607         """Generator for looping through Nexus blocks."""
00608         inblock=False
00609         blocklines=[]
00610         while file_contents:
00611             cl=file_contents.pop(0)
00612             if cl.lower().startswith('begin'):
00613                 if not inblock:
00614                     inblock=True
00615                     title=cl.split()[1].lower()
00616                 else:
00617                     raise NexusError('Illegal block nesting in block %s' % title)
00618             elif cl.lower().startswith('end'):
00619                 if inblock:
00620                     inblock=False
00621                     yield title,blocklines
00622                     blocklines=[]
00623                 else:
00624                     raise NexusError('Unmatched \'end\'.')
00625             elif inblock:
00626                 blocklines.append(cl)

def Bio.Nexus.Nexus.Nexus._matrix (   self,
  options 
) [private]

Definition at line 803 of file Nexus.py.

00803 
00804     def _matrix(self,options):
00805         if not self.ntax or not self.nchar:
00806             raise NexusError('Dimensions must be specified before matrix!')
00807         self.matrix={}
00808         taxcount=0
00809         first_matrix_block=True
00810     
00811         #eliminate empty lines and leading/trailing whitespace 
00812         lines=[l.strip() for l in options.split('\n') if l.strip()!='']
00813         lineiter=iter(lines)
00814         while 1:
00815             try:
00816                 l=lineiter.next()
00817             except StopIteration:
00818                 if taxcount<self.ntax:
00819                     raise NexusError('Not enough taxa in matrix.')
00820                 elif taxcount>self.ntax:
00821                     raise NexusError('Too many taxa in matrix.')
00822                 else:
00823                     break
00824             # count the taxa and check for interleaved matrix
00825             taxcount+=1
00826             ##print taxcount
00827             if taxcount>self.ntax:
00828                 if not self.interleave:
00829                     raise NexusError('Too many taxa in matrix - should matrix be interleaved?')
00830                 else:
00831                     taxcount=1
00832                     first_matrix_block=False
00833             #get taxon name and sequence
00834             linechars=CharBuffer(l)
00835             id=quotestrip(linechars.next_word())
00836             l=linechars.rest().strip()
00837             chars=''
00838             if self.interleave:
00839                 #interleaved matrix
00840                 #print 'In interleave'
00841                 if l:
00842                     chars=''.join(l.split())
00843                 else:
00844                     chars=''.join(lineiter.next().split())
00845             else:
00846                 #non-interleaved matrix
00847                 chars=''.join(l.split())
00848                 while len(chars)<self.nchar:
00849                     l=lineiter.next()
00850                     chars+=''.join(l.split())
00851             iupac_seq=Seq(_replace_parenthesized_ambigs(chars,self.rev_ambiguous_values),self.alphabet)
00852             #first taxon has the reference sequence if matchhar is used
00853             if taxcount==1:
00854                 refseq=iupac_seq
00855             else:
00856                 if self.matchchar:
00857                     while 1:
00858                         p=iupac_seq.tostring().find(self.matchchar)
00859                         if p==-1:
00860                             break
00861                         iupac_seq=Seq(iupac_seq.tostring()[:p]+refseq[p]+iupac_seq.tostring()[p+1:],self.alphabet)
00862             #check for invalid characters
00863             for i,c in enumerate(iupac_seq.tostring()):
00864                 if c not in self.valid_characters and c!=self.gap and c!=self.missing:
00865                     raise NexusError( \
00866                         ('Taxon %s: Illegal character %s in sequence %s ' + \
00867                          '(check dimensions/interleaving)') % (id,c, iupac_seq))
00868             #add sequence to matrix
00869             if first_matrix_block:
00870                 self.unaltered_taxlabels.append(id)
00871                 id=_unique_label(self.matrix.keys(),id)
00872                 self.matrix[id]=iupac_seq
00873                 self.taxlabels.append(id)
00874             else:
00875                 # taxon names need to be in the same order in each interleaved block
00876                 id=_unique_label(self.taxlabels[:taxcount-1],id)
00877                 taxon_present=self._check_taxlabels(id)
00878                 if taxon_present:
00879                     self.matrix[taxon_present]+=iupac_seq
00880                 else:
00881                     raise NexusError('Taxon %s not in first block of interleaved matrix. Check matrix dimensions and interleave.' % id)
00882         #check all sequences for length according to nchar
00883         for taxon in self.matrix:
00884             if len(self.matrix[taxon])!=self.nchar:
00885                 raise NexusError('Matrx Nchar %d does not match data length (%d) for taxon %s' \
00886                                  % (self.nchar, len(self.matrix[taxon]),taxon))
00887         #check that taxlabels is identical with matrix.keys. If not, it's a problem
00888         matrixkeys=sorted(self.matrix)
00889         taxlabelssort=self.taxlabels[:]
00890         taxlabelssort.sort()
00891         assert matrixkeys==taxlabelssort,"ERROR: TAXLABELS must be identical with MATRIX. Please Report this as a bug, and send in data file."

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._name_n_vector (   self,
  opts,
  separator = '=' 
) [private]
Extract name and check that it's not in vector format.

Definition at line 1048 of file Nexus.py.

01048 
01049     def _name_n_vector(self,opts,separator='='):
01050         """Extract name and check that it's not in vector format."""
01051         rest=opts.rest()
01052         name=opts.next_word()
01053         # we ignore * before names
01054         if name=='*':
01055             name=opts.next_word()
01056         if not name:
01057             raise NexusError('Formatting error in line: %s ' % rest)
01058         name=quotestrip(name)
01059         if opts.peek_nonwhitespace=='(':
01060             open=opts.next_nonwhitespace()
01061             qualifier=open.next_word()
01062             close=opts.next_nonwhitespace()
01063             if  qualifier.lower()=='vector':
01064                 raise NexusError('Unsupported VECTOR format in line %s' \
01065                                  % (opts))
01066             elif qualifier.lower()!='standard':
01067                 raise NexusError('Unknown qualifier %s in line %s' \
01068                                  % (qualifier, opts))
01069         if opts.next_nonwhitespace()!=separator:
01070             raise NexusError('Formatting error in line: %s ' % rest)
01071         return name
    

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._options (   self,
  options 
) [private]

Definition at line 740 of file Nexus.py.

00740 
00741     def _options(self,options):
00742         self.options=options;

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._parse_list (   self,
  options_buffer,
  set_type 
) [private]
Parse a NEXUS list (PRIVATE).

e.g. [1, 2, 4-8\\2, dog, cat] --> [1,2,4,6,8,17,21],
(assuming dog is taxon no. 17 and cat is taxon no. 21).

Definition at line 1072 of file Nexus.py.

01072 
01073     def _parse_list(self,options_buffer,set_type):
01074         """Parse a NEXUS list (PRIVATE).
01075 
01076         e.g. [1, 2, 4-8\\2, dog, cat] --> [1,2,4,6,8,17,21],
01077         (assuming dog is taxon no. 17 and cat is taxon no. 21).
01078         """
01079         plain_list=[]
01080         if options_buffer.peek_nonwhitespace():
01081             try:    # capture all possible exceptions and treat them as formatting erros, if they are not NexusError
01082                 while True:
01083                     identifier=options_buffer.next_word()                                     # next list element
01084                     if not identifier:                                              # end of list?
01085                         break
01086                     start=self._resolve(identifier,set_type=set_type)
01087                     if options_buffer.peek_nonwhitespace()=='-':                            # followd by -
01088                         end=start
01089                         step=1
01090                         # get hyphen and end of range
01091                         hyphen=options_buffer.next_nonwhitespace()
01092                         end=self._resolve(options_buffer.next_word(),set_type=set_type)
01093                         if set_type==CHARSET:
01094                             if options_buffer.peek_nonwhitespace()=='\\':                           # followd by \
01095                                 backslash=options_buffer.next_nonwhitespace()
01096                                 step=int(options_buffer.next_word())         # get backslash and step
01097                             plain_list.extend(range(start,end+1,step)) 
01098                         else:
01099                             if type(start)==list or type(end)==list:
01100                                 raise NexusError('Name if character sets not allowed in range definition: %s'\
01101                                                  % identifier)
01102                             start=self.taxlabels.index(start)
01103                             end=self.taxlabels.index(end)
01104                             taxrange=self.taxlabels[start:end+1]
01105                             plain_list.extend(taxrange)
01106                     else:
01107                         if type(start)==list:           # start was the name of charset or taxset
01108                             plain_list.extend(start)
01109                         else:                           # start was an ordinary identifier
01110                             plain_list.append(start)
01111             except NexusError:
01112                 raise
01113             except:
01114                 return None
01115         return plain_list
        

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._parse_nexus_block (   self,
  title,
  contents 
) [private]
Parse a known Nexus Block (PRIVATE).

Definition at line 633 of file Nexus.py.

00633 
00634     def _parse_nexus_block(self,title, contents):
00635         """Parse a known Nexus Block (PRIVATE)."""
00636         # attached the structered block representation
00637         self._apply_block_structure(title, contents)
00638         #now check for taxa,characters,data blocks. If this stuff is defined more than once
00639         #the later occurences will override the previous ones.
00640         block=self.structured[-1] 
00641         for line in block.commandlines:
00642             try:
00643                 getattr(self,'_'+line.command)(line.options)
00644             except AttributeError:
00645                 raise
00646                 raise NexusError('Unknown command: %s ' % line.command)
    

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._resolve (   self,
  identifier,
  set_type = None 
) [private]
Translate identifier in list into character/taxon index.

Characters (which are referred to by their index in Nexus.py):
    Plain numbers are returned minus 1 (Nexus indices to python indices)
    Text identifiers are translaterd into their indices (if plain character indentifiers),
    the first hit in charlabels is returned (charlabels don't need to be unique)
    or the range of indices is returned (if names of character sets).
Taxa (which are referred to by their unique name in Nexus.py):
    Plain numbers are translated in their taxon name, underscores and spaces are considered equal.
    Names are returned unchanged (if plain taxon identifiers), or the names in
    the corresponding taxon set is returned

Definition at line 1116 of file Nexus.py.

01116 
01117     def _resolve(self,identifier,set_type=None):
01118         """Translate identifier in list into character/taxon index.
01119 
01120         Characters (which are referred to by their index in Nexus.py):
01121             Plain numbers are returned minus 1 (Nexus indices to python indices)
01122             Text identifiers are translaterd into their indices (if plain character indentifiers),
01123             the first hit in charlabels is returned (charlabels don't need to be unique)
01124             or the range of indices is returned (if names of character sets).
01125         Taxa (which are referred to by their unique name in Nexus.py):
01126             Plain numbers are translated in their taxon name, underscores and spaces are considered equal.
01127             Names are returned unchanged (if plain taxon identifiers), or the names in
01128             the corresponding taxon set is returned
01129         """
01130         identifier=quotestrip(identifier)
01131         if not set_type:
01132             raise NexusError('INTERNAL ERROR: Need type to resolve identifier.')
01133         if set_type==CHARSET:
01134             try:
01135                 n=int(identifier)
01136             except ValueError:
01137                 if self.charlabels and identifier in self.charlabels.itervalues():
01138                     for k in self.charlabels:
01139                         if self.charlabels[k]==identifier:
01140                             return k
01141                 elif self.charsets and identifier in self.charsets:
01142                     return self.charsets[identifier]
01143                 else:
01144                     raise NexusError('Unknown character identifier: %s' \
01145                                      % identifier)
01146             else:
01147                 if n<=self.nchar:
01148                     return n-1
01149                 else:
01150                     raise NexusError('Illegal character identifier: %d>nchar (=%d).' \
01151                                      % (identifier,self.nchar))
01152         elif set_type==TAXSET:
01153             try:
01154                 n=int(identifier)
01155             except ValueError:
01156                 taxlabels_id=self._check_taxlabels(identifier)
01157                 if taxlabels_id:
01158                     return taxlabels_id
01159                 elif self.taxsets and identifier in self.taxsets:
01160                     return self.taxsets[identifier]
01161                 else:
01162                     raise NexusError('Unknown taxon identifier: %s' \
01163                                      % identifier)
01164             else:
01165                 if n>0 and n<=self.ntax:
01166                     return self.taxlabels[n-1]
01167                 else:
01168                     raise NexusError('Illegal taxon identifier: %d>ntax (=%d).' \
01169                                      % (identifier,self.ntax))
01170         else:
01171             raise NexusError('Unknown set specification: %s.'% set_type)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._set (   self,
  options 
) [private]

Definition at line 737 of file Nexus.py.

00737 
00738     def _set(self,options):
00739         self.set=options;

def Bio.Nexus.Nexus.Nexus._statelabels (   self,
  options 
) [private]

Definition at line 798 of file Nexus.py.

00798 
00799     def _statelabels(self,options):
00800         #self.charlabels=options
00801         #print 'Command statelabels is not supported and will be ignored.'
00802         pass

def Bio.Nexus.Nexus.Nexus._stateset (   self,
  options 
) [private]

Definition at line 1172 of file Nexus.py.

01172 
01173     def _stateset(self, options):
01174         #Not implemented
01175         pass

def Bio.Nexus.Nexus.Nexus._taxlabels (   self,
  options 
) [private]
Get taxon labels (PRIVATE).

As the taxon names are already in the matrix, this is superfluous
except for transpose matrices, which are currently unsupported anyway.
Thus, we ignore the taxlabels command to make handling of duplicate
taxon names easier.

Definition at line 746 of file Nexus.py.

00746 
00747     def _taxlabels(self,options):
00748         """Get taxon labels (PRIVATE).
00749 
00750         As the taxon names are already in the matrix, this is superfluous
00751         except for transpose matrices, which are currently unsupported anyway.
00752         Thus, we ignore the taxlabels command to make handling of duplicate
00753         taxon names easier.
00754         """
00755         pass
00756         #self.taxlabels=[]
00757         #opts=CharBuffer(options)
00758         #while True:
00759         #    taxon=quotestrip(opts.next_word())
00760         #    if not taxon:
00761         #        break
00762         #    self.taxlabels.append(taxon)

def Bio.Nexus.Nexus.Nexus._taxpartition (   self,
  options 
) [private]

Definition at line 967 of file Nexus.py.

00967 
00968     def _taxpartition(self, options):
00969         taxpartition={}
00970         quotelevel=False
00971         opts=CharBuffer(options)
00972         name=self._name_n_vector(opts)
00973         if not name:
00974             raise NexusError('Formatting error in taxpartition: %s ' % options)
00975         # now collect thesubbpartitions and parse them
00976         # subpartitons separated by commas - which unfortunately could be part of a quoted identifier...
00977         # this is rather unelegant, but we have to avoid double-parsing and potential change of special nexus-words
00978         sub=''
00979         while True:
00980             w=opts.next()
00981             if w is None or (w==',' and not quotelevel):
00982                 subname,subindices=self._get_indices(sub,set_type=TAXSET,separator=':')
00983                 taxpartition[subname]=_make_unique(subindices)
00984                 sub=''
00985                 if w is None:
00986                     break
00987             else:
00988                 if w=="'":
00989                     quotelevel=not quotelevel
00990                 sub+=w
00991         self.taxpartitions[name]=taxpartition

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._taxset (   self,
  options 
) [private]

Definition at line 959 of file Nexus.py.

00959 
00960     def _taxset(self, options):
00961         name,taxa=self._get_indices(options,set_type=TAXSET)
00962         self.taxsets[name]=_make_unique(taxa)
                

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._title (   self,
  options 
) [private]

Definition at line 647 of file Nexus.py.

00647 
00648     def _title(self,options):
00649         pass

def Bio.Nexus.Nexus.Nexus._translate (   self,
  options 
) [private]

Definition at line 892 of file Nexus.py.

00892 
00893     def _translate(self,options):
00894         self.translate={}
00895         opts=CharBuffer(options)
00896         while True:
00897             try:
00898                 # get id and state
00899                 identifier=int(opts.next_word()) 
00900                 label=quotestrip(opts.next_word())
00901                 self.translate[identifier]=label
00902                 # check for comma or end of command
00903                 c=opts.next_nonwhitespace()
00904                 if c is None:
00905                     break
00906                 elif c!=',':
00907                     raise NexusError('Missing \',\' in line %s.' % options)
00908             except NexusError:
00909                 raise
00910             except:
00911                 raise NexusError('Format error in line %s.' % options)

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus._tree (   self,
  options 
) [private]

Definition at line 916 of file Nexus.py.

00916 
00917     def _tree(self,options):
00918         opts=CharBuffer(options)
00919         if opts.peek_nonwhitespace()=='*': # a star can be used to make it the default tree in some software packages
00920             dummy=opts.next_nonwhitespace()
00921         name=opts.next_word()
00922         if opts.next_nonwhitespace()!='=':
00923             raise NexusError('Syntax error in tree description: %s' \
00924                              % options[:50])
00925         rooted=False
00926         weight=1.0
00927         while opts.peek_nonwhitespace()=='[':
00928             open=opts.next_nonwhitespace()
00929             symbol=opts.next()
00930             if symbol!='&':
00931                 raise NexusError('Illegal special comment [%s...] in tree description: %s' \
00932                                  % (symbol, options[:50]))
00933             special=opts.next()
00934             value=opts.next_until(']')
00935             closing=opts.next()
00936             if special=='R':
00937                 rooted=True
00938             elif special=='U':
00939                 rooted=False
00940             elif special=='W':
00941                 weight=float(value)
00942         tree=Tree(name=name,weight=weight,rooted=rooted,tree=opts.rest().strip())
00943         # if there's an active translation table, translate
00944         if self.translate:
00945             for n in tree.get_terminals():
00946                 try:
00947                     tree.node(n).data.taxon=safename(self.translate[int(tree.node(n).data.taxon)])
00948                 except (ValueError,KeyError):
00949                     raise NexusError('Unable to substitue %s using \'translate\' data.' \
00950                                      % tree.node(n).data.taxon)
00951         self.trees.append(tree)

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus._treepartition (   self,
  options 
) [private]

Definition at line 1184 of file Nexus.py.

01184 
01185     def _treepartition(self, options):
01186         #Not implemented
01187         pass

def Bio.Nexus.Nexus.Nexus._treeset (   self,
  options 
) [private]

Definition at line 1180 of file Nexus.py.

01180 
01181     def _treeset(self, options):
01182         #Not implemented
01183         pass

def Bio.Nexus.Nexus.Nexus._unknown_nexus_block (   self,
  title,
  contents 
) [private]

Definition at line 627 of file Nexus.py.

00627 
00628     def _unknown_nexus_block(self,title, contents):
00629         block = Block()
00630         block.commandlines.append(contents)
00631         block.title = title
00632         self.unknown_blocks.append(block)        

def Bio.Nexus.Nexus.Nexus._utree (   self,
  options 
) [private]
Some software (clustalx) uses 'utree' to denote an unrooted tree.

Definition at line 912 of file Nexus.py.

00912 
00913     def _utree(self,options):
00914         """Some software (clustalx) uses 'utree' to denote an unrooted tree."""
00915         self._tree(options)
        

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.add_sequence (   self,
  name,
  sequence 
)
Adds a sequence (string) to the matrix.

Definition at line 1570 of file Nexus.py.

01570 
01571     def add_sequence(self,name,sequence):
01572         """Adds a sequence (string) to the matrix."""
01573         
01574         if not name:
01575             raise NexusError('New sequence must have a name')
01576 
01577         diff=self.nchar-len(sequence)
01578         if diff<0:
01579             self.insert_gap(self.nchar,-diff)
01580         elif diff>0:
01581             sequence+=self.missing*diff
01582 
01583         if name in self.taxlabels:
01584             unique_name=_unique_label(self.taxlabels,name)
01585             #print "WARNING: Sequence name %s is already present. Sequence was added as %s." % (name,unique_name)
01586         else:
01587             unique_name=name
01588 
01589         assert unique_name not in self.matrix, "ERROR. There is a discrepancy between taxlabels and matrix keys. Report this as a bug."
01590 
01591         self.matrix[unique_name]=Seq(sequence,self.alphabet)
01592         self.ntax+=1
01593         self.taxlabels.append(unique_name)
01594         self.unaltered_taxlabels.append(name)

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.append_sets (   self,
  exclude = [],
  delete = [],
  mrbayes = False,
  include_codons = True,
  codons_only = False 
)
Returns a sets block.

Definition at line 1349 of file Nexus.py.

01349 
01350     def append_sets(self,exclude=[],delete=[],mrbayes=False,include_codons=True,codons_only=False):
01351         """Returns a sets block."""
01352         if not self.charsets and not self.taxsets and not self.charpartitions:
01353             return ''
01354         if codons_only:
01355             setsb=['\nbegin codons']
01356         else:
01357             setsb=['\nbegin sets']
01358         # - now if characters have been excluded, the character sets need to be adjusted,
01359         #   so that they still point to the right character positions
01360         # calculate a list of offsets: for each deleted character, the following character position
01361         # in the new file will have an additional offset of -1
01362         offset=0
01363         offlist=[]
01364         for c in range(self.nchar):
01365             if c in exclude:
01366                 offset+=1
01367                 offlist.append(-1)  # dummy value as these character positions are excluded
01368             else:
01369                 offlist.append(c-offset)
01370         # now adjust each of the character sets
01371         if not codons_only:
01372             for n,ns in self.charsets.iteritems():
01373                 cset=[offlist[c] for c in ns if c not in exclude]
01374                 if cset: 
01375                     setsb.append('charset %s = %s' % (safename(n),_compact4nexus(cset))) 
01376             for n,s in self.taxsets.iteritems():
01377                 tset=[safename(t,mrbayes=mrbayes) for t in s if t not in delete]
01378                 if tset:
01379                     setsb.append('taxset %s = %s' % (safename(n),' '.join(tset))) 
01380         for n,p in self.charpartitions.iteritems():
01381             if not include_codons and n==CODONPOSITIONS:
01382                 continue
01383             elif codons_only and n!=CODONPOSITIONS:
01384                 continue
01385             # as characters have been excluded, the partitions must be adjusted
01386             # if a partition is empty, it will be omitted from the charpartition command
01387             # (although paup allows charpartition part=t1:,t2:,t3:1-100)
01388             names=_sort_keys_by_values(p)
01389             newpartition={}
01390             for sn in names:
01391                 nsp=[offlist[c] for c in p[sn] if c not in exclude]
01392                 if nsp:
01393                     newpartition[sn]=nsp
01394             if newpartition:
01395                 if include_codons and n==CODONPOSITIONS:
01396                     command='codonposset'
01397                 else:
01398                     command='charpartition'
01399                 setsb.append('%s %s = %s' % (command,safename(n),\
01400                 ', '.join(['%s: %s' % (sn,_compact4nexus(newpartition[sn])) for sn in names if sn in newpartition])))
01401         # now write charpartititions, much easier than charpartitions
01402         for n,p in self.taxpartitions.iteritems():
01403             names=_sort_keys_by_values(p)
01404             newpartition={}
01405             for sn in names:
01406                 nsp=[t for t in p[sn] if t not in delete]
01407                 if nsp:
01408                     newpartition[sn]=nsp
01409             if newpartition:
01410                 setsb.append('taxpartition %s = %s' % (safename(n),\
01411                 ', '.join(['%s: %s' % (safename(sn),' '.join(map(safename,newpartition[sn]))) for sn in names if sn in newpartition])))
01412         # add 'end' and return everything
01413         setsb.append('end;\n')
01414         if len(setsb)==2: # begin and end only
01415                 return ''
01416         else:
01417             return ';\n'.join(setsb)
    

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus.bootstrap (   self,
  matrix = None,
  delete = [],
  exclude = [] 
)
Return a bootstrapped matrix.

Definition at line 1548 of file Nexus.py.

01548 
01549     def bootstrap(self,matrix=None,delete=[],exclude=[]):
01550         """Return a bootstrapped matrix."""
01551         if not matrix:
01552             matrix=self.matrix
01553         seqobjects=isinstance(matrix[matrix.keys()[0]],Seq)         # remember if Seq objects
01554         cm=self.crop_matrix(delete=delete,exclude=exclude)          # crop data out
01555         if not cm:                                                  # everything deleted?
01556             return {}
01557         elif len(cm[cm.keys()[0]])==0:                              # everything excluded?
01558             return cm
01559         undelete=[t for t in self.taxlabels if t in cm]  
01560         if seqobjects:
01561             sitesm=zip(*[cm[t].tostring() for t in undelete])
01562             alphabet=matrix[matrix.keys()[0]].alphabet
01563         else:
01564             sitesm=zip(*[cm[t] for t in undelete])
01565         bootstrapsitesm=[sitesm[random.randint(0,len(sitesm)-1)] for i in range(len(sitesm))]
01566         bootstrapseqs=map(''.join,zip(*bootstrapsitesm))
01567         if seqobjects:
01568             bootstrapseqs=[Seq(s,alphabet) for s in bootstrapseqs]
01569         return dict(zip(undelete,bootstrapseqs)) 

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.constant (   self,
  matrix = None,
  delete = [],
  exclude = [] 
)
Return a list with all constant characters.

Definition at line 1450 of file Nexus.py.

01450 
01451     def constant(self,matrix=None,delete=[],exclude=[]):
01452         """Return a list with all constant characters."""
01453         if not matrix:
01454             matrix=self.matrix
01455         undelete=[t for t in self.taxlabels if t in matrix and t not in delete]
01456         if not undelete:
01457             return None
01458         elif len(undelete)==1:
01459             return [x for x in range(len(matrix[undelete[0]])) if x not in exclude]
01460         # get the first sequence and expand all ambiguous values
01461         constant=[(x,self.ambiguous_values.get(n.upper(),n.upper())) for 
01462                 x,n in enumerate(matrix[undelete[0]].tostring()) if x not in exclude]
01463         for taxon in undelete[1:]:
01464             newconstant=[]
01465             for site in constant:
01466                 #print '%d (paup=%d)' % (site[0],site[0]+1),
01467                 seqsite=matrix[taxon][site[0]].upper()
01468                 #print seqsite,'checked against',site[1],'\t',
01469                 if seqsite==self.missing or (seqsite==self.gap and self.options['gapmode'].lower()=='missing') or seqsite==site[1]: 
01470                     # missing or same as before  -> ok
01471                     newconstant.append(site)
01472                 elif seqsite in site[1] or site[1]==self.missing or (self.options['gapmode'].lower()=='missing' and site[1]==self.gap):
01473                     # subset of an ambig or only missing in previous -> take subset
01474                     newconstant.append((site[0],self.ambiguous_values.get(seqsite,seqsite)))
01475                 elif seqsite in self.ambiguous_values:  # is it an ambig: check the intersection with prev. values
01476                     intersect = set(self.ambiguous_values[seqsite]).intersection(set(site[1]))
01477                     if intersect:
01478                         newconstant.append((site[0],''.join(intersect)))
01479                     #    print 'ok'
01480                     #else:
01481                     #    print 'failed'
01482                 #else:
01483                 #    print 'failed'
01484             constant=newconstant
01485         cpos=[s[0] for s in constant]
01486         return cpos

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.crop_matrix (   self,
  matrix = None,
  delete = [],
  exclude = [] 
)
Return a matrix without deleted taxa and excluded characters.

Definition at line 1525 of file Nexus.py.

01525 
01526     def crop_matrix(self,matrix=None, delete=[], exclude=[]):
01527         """Return a matrix without deleted taxa and excluded characters."""
01528         if not matrix:
01529             matrix=self.matrix
01530         if [t for t in delete if not self._check_taxlabels(t)]:
01531             raise NexusError('Unknown taxa: %s' \
01532                              % ', '.join(set(delete).difference(self.taxlabels)))
01533         if exclude!=[]:
01534             undelete=[t for t in self.taxlabels if t in matrix and t not in delete]
01535             if not undelete:
01536                 return {}
01537             m=[matrix[k].tostring() for k in undelete]
01538             zipped_m=zip(*m)
01539             sitesm=[s for i,s in enumerate(zipped_m) if i not in exclude]
01540             if sitesm==[]:
01541                 return dict([(t,Seq('',self.alphabet)) for t in undelete])
01542             else:
01543                 zipped_sitesm=zip(*sitesm)
01544                 m=[Seq(s,self.alphabet) for s in map(''.join,zipped_sitesm)]
01545                 return dict(zip(undelete,m))
01546         else:
01547             return dict([(t,matrix[t]) for t in self.taxlabels if t in matrix and t not in delete])
           

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus.cstatus (   self,
  site,
  delete = [],
  narrow = True 
)
Summarize character.

narrow=True:  paup-mode (a c ? --> ac; ? ? ? --> ?)
narrow=false:           (a c ? --> a c g t -; ? ? ? --> a c g t -)

Definition at line 1487 of file Nexus.py.

01487 
01488     def cstatus(self,site,delete=[],narrow=True):
01489         """Summarize character.
01490 
01491         narrow=True:  paup-mode (a c ? --> ac; ? ? ? --> ?)
01492         narrow=false:           (a c ? --> a c g t -; ? ? ? --> a c g t -)
01493         """
01494         undelete=[t for t in self.taxlabels if t not in delete]
01495         if not undelete:
01496             return None
01497         cstatus=[]
01498         for t in undelete:
01499             c=self.matrix[t][site].upper()
01500             if self.options.get('gapmode')=='missing' and c==self.gap:
01501                 c=self.missing
01502             if narrow and c==self.missing:
01503                 if c not in cstatus:
01504                     cstatus.append(c)
01505             else:
01506                 cstatus.extend([b for b in self.ambiguous_values[c] if b not in cstatus])
01507         if self.missing in cstatus and narrow and len(cstatus)>1:
01508             cstatus=[c for c in cstatus if c!=self.missing]
01509         cstatus.sort()
01510         return cstatus

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus.export_fasta (   self,
  filename = None,
  width = 70 
)
Writes matrix into a fasta file: (self, filename=None, width=70).

Definition at line 1418 of file Nexus.py.

01418 
01419     def export_fasta(self, filename=None, width=70):
01420         """Writes matrix into a fasta file: (self, filename=None, width=70)."""       
01421         if not filename:
01422             if '.' in self.filename and self.filename.split('.')[-1].lower() in ['paup','nexus','nex','dat']:
01423                 filename='.'.join(self.filename.split('.')[:-1])+'.fas'
01424             else:
01425                 filename=self.filename+'.fas'
01426         fh=open(filename,'w')
01427         for taxon in self.taxlabels:
01428             fh.write('>'+safename(taxon)+'\n')
01429             for i in range(0, len(self.matrix[taxon].tostring()), width):
01430                 fh.write(self.matrix[taxon].tostring()[i:i+width] + '\n')    
01431         fh.close()
01432         return filename

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.export_phylip (   self,
  filename = None 
)
Writes matrix into a PHYLIP file: (self, filename=None).

Note that this writes a relaxed PHYLIP format file, where the names
are not truncated, nor checked for invalid characters.

Definition at line 1433 of file Nexus.py.

01433 
01434     def export_phylip(self, filename=None):
01435         """Writes matrix into a PHYLIP file: (self, filename=None).
01436 
01437         Note that this writes a relaxed PHYLIP format file, where the names
01438         are not truncated, nor checked for invalid characters."""
01439         if not filename:
01440             if '.' in self.filename and self.filename.split('.')[-1].lower() in ['paup','nexus','nex','dat']:
01441                 filename='.'.join(self.filename.split('.')[:-1])+'.phy'
01442             else:
01443                 filename=self.filename+'.phy'
01444         fh=open(filename,'w')
01445         fh.write('%d %d\n' % (self.ntax,self.nchar))
01446         for taxon in self.taxlabels:
01447             fh.write('%s %s\n' % (safename(taxon),self.matrix[taxon].tostring()))
01448         fh.close()
01449         return filename
    

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.gaponly (   self,
  include_missing = False 
)
Return gap-only sites.

Definition at line 1681 of file Nexus.py.

01681 
01682     def gaponly(self,include_missing=False):
01683         """Return gap-only sites."""
01684         gap=set(self.gap)
01685         if include_missing:
01686             gap.add(self.missing)
01687         sitesm=zip(*[self.matrix[t].tostring() for t in self.taxlabels])
01688         gaponly=[i for i,site in enumerate(sitesm) if set(site).issubset(gap)]
01689         return gaponly 
        

Here is the call graph for this function:

Included for backwards compatibility (DEPRECATED).

Definition at line 556 of file Nexus.py.

00556 
00557     def get_original_taxon_order(self):
00558         """Included for backwards compatibility (DEPRECATED)."""
        return self.taxlabels
def Bio.Nexus.Nexus.Nexus.insert_gap (   self,
  pos,
  n = 1,
  leftgreedy = False 
)
Add a gap into the matrix and adjust charsets and partitions.

pos=0: first position
pos=nchar: last position

Definition at line 1595 of file Nexus.py.

01595 
01596     def insert_gap(self,pos,n=1,leftgreedy=False):
01597         """Add a gap into the matrix and adjust charsets and partitions.
01598         
01599         pos=0: first position
01600         pos=nchar: last position
01601         """
01602 
01603         def _adjust(set,x,d,leftgreedy=False):
01604             """Adjusts chartacter sets if gaps are inserted, taking care of
01605             new gaps within a coherent character set.""" 
01606             # if 3 gaps are inserted at pos. 9 in a set that looks like 1 2 3  8 9 10 11 13 14 15
01607             # then the adjusted set will be 1 2 3  8 9 10 11 12 13 14 15 16 17 18 
01608             # but inserting into position 8 it will stay like 1 2 3 11 12 13 14 15 16 17 18
01609             set.sort()
01610             addpos=0
01611             for i,c in enumerate(set):
01612                 if c>=x:
01613                     set[i]=c+d
01614                 # if we add gaps within a group of characters, we want the gap position included in this group
01615                 if c==x:
01616                     if leftgreedy or (i>0 and set[i-1]==c-1):  
01617                         addpos=i
01618             if addpos>0:
01619                 set[addpos:addpos]=range(x,x+d)
01620             return set
01621 
01622         if pos<0 or pos>self.nchar:
01623             raise NexusError('Illegal gap position: %d' % pos)
01624         if n==0:
01625             return
01626         if self.taxlabels:
01627             #python 2.3 does not support zip(*[])
01628             sitesm=zip(*[self.matrix[t].tostring() for t in self.taxlabels])
01629         else:
01630             sitesm=[]
01631         sitesm[pos:pos]=[['-']*len(self.taxlabels)]*n
01632         # #self.matrix=dict([(taxon,Seq(map(''.join,zip(*sitesm))[i],self.alphabet)) for\
01633         #        i,taxon in enumerate(self.taxlabels)])
01634         zipped=zip(*sitesm)
01635         mapped=map(''.join,zipped)
01636         listed=[(taxon,Seq(mapped[i],self.alphabet)) for i,taxon in enumerate(self.taxlabels)]
01637         self.matrix=dict(listed) 
01638         self.nchar+=n
01639         # now adjust character sets
01640         for i,s in self.charsets.iteritems():
01641             self.charsets[i]=_adjust(s,pos,n,leftgreedy=leftgreedy)
01642         for p in self.charpartitions:
01643             for sp,s in self.charpartitions[p].iteritems():
01644                 self.charpartitions[p][sp]=_adjust(s,pos,n,leftgreedy=leftgreedy)
01645         # now adjust character state labels
01646         self.charlabels=self._adjust_charlabels(insert=[pos]*n)
01647         return self.charlabels
      

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus.invert (   self,
  charlist 
)
Returns all character indices that are not in charlist.

Definition at line 1677 of file Nexus.py.

01677 
01678     def invert(self,charlist):
01679         """Returns all character indices that are not in charlist."""
01680         return [c for c in range(self.nchar) if c not in charlist]

def Bio.Nexus.Nexus.Nexus.read (   self,
  input 
)
Read and parse NEXUS imput (a filename, file-handle, or string).

Definition at line 564 of file Nexus.py.

00564 
00565     def read(self,input):
00566         """Read and parse NEXUS imput (a filename, file-handle, or string)."""
00567 
00568         # 1. Assume we have the name of a file in the execution dir or a
00569         # file-like object.
00570         # Note we need to add parsing of the path to dir/filename
00571         try:
00572             with File.as_handle(input, 'rU') as fp:
00573                 file_contents = fp.read()
00574                 self.filename = getattr(fp, 'name', 'Unknown_nexus_file')
00575         except (TypeError,IOError,AttributeError):
00576             #2 Assume we have a string from a fh.read()
00577             if isinstance(input, basestring):
00578                 file_contents = input
00579                 self.filename='input_string'
00580             else:
00581                 print input.strip()[:50]
00582                 raise NexusError('Unrecognized input: %s ...' % input[:100])
00583         file_contents=file_contents.strip()
00584         if file_contents.startswith('#NEXUS'):
00585             file_contents=file_contents[6:]
00586         commandlines=_get_command_lines(file_contents)
00587         # get rid of stupid 'NEXUS token - in merged treefiles, this might appear multiple times'
00588         for i,cl in enumerate(commandlines):
00589             try:
00590                 if cl[:6].upper()=='#NEXUS':
00591                     commandlines[i]=cl[6:].strip()
00592             except:
00593                 pass
00594         # now loop through blocks (we parse only data in known blocks, thus ignoring non-block commands
00595         nexus_block_gen = self._get_nexus_block(commandlines)
00596         while 1:
00597             try:
00598                 title, contents = nexus_block_gen.next()
00599             except StopIteration:
00600                 break
00601             if title in KNOWN_NEXUS_BLOCKS:
00602                 self._parse_nexus_block(title, contents)
00603             else:
00604                 self._unknown_nexus_block(title, contents)

Included for backwards compatibility (DEPRECATED).

Definition at line 559 of file Nexus.py.

00559 
00560     def set_original_taxon_order(self,value):
00561         """Included for backwards compatibility (DEPRECATED)."""
        self.taxlabels=value
def Bio.Nexus.Nexus.Nexus.terminal_gap_to_missing (   self,
  missing = None,
  skip_n = True 
)
Replaces all terminal gaps with missing character.

Mixtures like ???------??------- are properly resolved.

Definition at line 1690 of file Nexus.py.

01690 
01691     def terminal_gap_to_missing(self,missing=None,skip_n=True):
01692         """Replaces all terminal gaps with missing character.
01693         
01694         Mixtures like ???------??------- are properly resolved."""
01695         
01696         if not missing:
01697             missing=self.missing
01698         replace=[self.missing,self.gap]
01699         if not skip_n:
01700             replace.extend(['n','N'])
01701         for taxon in self.taxlabels:
01702             sequence=self.matrix[taxon].tostring()
01703             length=len(sequence)
01704             start,end=get_start_end(sequence,skiplist=replace)
01705             if start==-1 and end==-1:
01706                 sequence=missing*length
01707             else:
01708                 sequence=sequence[:end+1]+missing*(length-end-1)
01709                 sequence=start*missing+sequence[start:]
01710             assert length==len(sequence), 'Illegal sequence manipulation in Nexus.termial_gap_to_missing in taxon %s' % taxon
01711             self.matrix[taxon]=Seq(sequence,self.alphabet)
01712 
01713 
try:

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.weighted_stepmatrix (   self,
  name = 'your_name_here',
  exclude = [],
  delete = [] 
)
Calculates a stepmatrix for weighted parsimony.

See Wheeler (1990), Cladistics 6:269-275 and
Felsenstein (1981), Biol. J. Linn. Soc. 16:183-196

Definition at line 1511 of file Nexus.py.

01511 
01512     def weighted_stepmatrix(self,name='your_name_here',exclude=[],delete=[]):
01513         """Calculates a stepmatrix for weighted parsimony.
01514 
01515         See Wheeler (1990), Cladistics 6:269-275 and
01516         Felsenstein (1981), Biol. J. Linn. Soc. 16:183-196
01517         """    
01518         m=StepMatrix(self.unambiguous_letters,self.gap)
01519         for site in [s for s in range(self.nchar) if s not in exclude]:
01520             cstatus=self.cstatus(site,delete)
01521             for i,b1 in enumerate(cstatus[:-1]):
01522                 for b2 in cstatus[i+1:]:
01523                     m.add(b1.upper(),b2.upper(),1)
01524         return m.transformation().weighting().smprint(name=name)

Here is the call graph for this function:

def Bio.Nexus.Nexus.Nexus.write_nexus_data (   self,
  filename = None,
  matrix = None,
  exclude = [],
  delete = [],
  blocksize = None,
  interleave = False,
  interleave_by_partition = False,
  comment = None,
  omit_NEXUS = False,
  append_sets = True,
  mrbayes = False,
  codons_block = True 
)
Writes a nexus file with data and sets block to a file or handle.

Character sets and partitions are appended by default, and are
adjusted according to excluded characters (i.e. character sets
still point to the same sites (not necessarily same positions),
without including the deleted characters.

filename - Either a filename as a string (which will be opened,
   written to and closed), or a handle object (which will
   be written to but NOT closed).
interleave_by_partition - Optional name of partition (string)
omit_NEXUS - Boolean.  If true, the '#NEXUS' line normally at the
   start of the file is ommited.

Returns the filename/handle used to write the data.

Definition at line 1229 of file Nexus.py.

01229 
01230             codons_block=True):
01231         """Writes a nexus file with data and sets block to a file or handle.
01232 
01233         Character sets and partitions are appended by default, and are
01234         adjusted according to excluded characters (i.e. character sets
01235         still point to the same sites (not necessarily same positions),
01236         without including the deleted characters.
01237 
01238         filename - Either a filename as a string (which will be opened,
01239                    written to and closed), or a handle object (which will
01240                    be written to but NOT closed).
01241         interleave_by_partition - Optional name of partition (string)
01242         omit_NEXUS - Boolean.  If true, the '#NEXUS' line normally at the
01243                    start of the file is ommited.
01244 
01245         Returns the filename/handle used to write the data.
01246         """
01247         if not matrix:
01248             matrix=self.matrix
01249         if not matrix:
01250             return
01251         if not filename:
01252             filename=self.filename
01253         if [t for t in delete if not self._check_taxlabels(t)]:
01254             raise NexusError('Unknown taxa: %s' \
01255                              % ', '.join(set(delete).difference(set(self.taxlabels))))
01256         if interleave_by_partition:
01257             if not interleave_by_partition in self.charpartitions:
01258                 raise NexusError('Unknown partition: %r' % interleave_by_partition)
01259             else:
01260                 partition=self.charpartitions[interleave_by_partition]
01261                 # we need to sort the partition names by starting position before we exclude characters
01262                 names=_sort_keys_by_values(partition)
01263                 newpartition={}
01264                 for p in partition:
01265                     newpartition[p]=[c for c in partition[p] if c not in exclude]
01266         # how many taxa and how many characters are left?
01267         undelete=[taxon for taxon in self.taxlabels if taxon in matrix and taxon not in delete]
01268         cropped_matrix=_seqmatrix2strmatrix(self.crop_matrix(matrix,exclude=exclude,delete=delete))
01269         ntax_adjusted=len(undelete)
01270         nchar_adjusted=len(cropped_matrix[undelete[0]])
01271         if not undelete or (undelete and undelete[0]==''):
01272             return
01273 
01274         with File.as_handle(filename, mode='w') as fh:
01275             if not omit_NEXUS:
01276                 fh.write('#NEXUS\n')
01277             if comment:
01278                 fh.write('['+comment+']\n')
01279             fh.write('begin data;\n')
01280             fh.write('\tdimensions ntax=%d nchar=%d;\n' % (ntax_adjusted, nchar_adjusted))
01281             fh.write('\tformat datatype='+self.datatype)
01282             if self.respectcase:
01283                 fh.write(' respectcase')
01284             if self.missing:
01285                 fh.write(' missing='+self.missing)
01286             if self.gap:
01287                 fh.write(' gap='+self.gap)
01288             if self.matchchar:
01289                 fh.write(' matchchar='+self.matchchar)
01290             if self.labels:
01291                 fh.write(' labels='+self.labels)
01292             if self.equate:
01293                 fh.write(' equate='+self.equate)
01294             if interleave or interleave_by_partition:
01295                 fh.write(' interleave')
01296             fh.write(';\n')
01297             #if self.taxlabels:
01298             #    fh.write('taxlabels '+' '.join(self.taxlabels)+';\n')
01299             if self.charlabels:
01300                 newcharlabels=self._adjust_charlabels(exclude=exclude)
01301                 clkeys=sorted(newcharlabels)
01302                 fh.write('charlabels '+', '.join(["%s %s" % (k+1,safename(newcharlabels[k])) for k in clkeys])+';\n')
01303             fh.write('matrix\n')
01304             if not blocksize:
01305                 if interleave:
01306                     blocksize=70
01307                 else:
01308                     blocksize=self.nchar
01309             # delete deleted taxa and ecxclude excluded characters...
01310             namelength=max([len(safename(t,mrbayes=mrbayes)) for t in undelete])
01311             if interleave_by_partition:
01312                 # interleave by partitions, but adjust partitions with regard to excluded characters
01313                 seek=0
01314                 for p in names:
01315                     fh.write('[%s: %s]\n' % (interleave_by_partition,p))
01316                     if len(newpartition[p])>0:
01317                         for taxon in undelete:
01318                             fh.write(safename(taxon,mrbayes=mrbayes).ljust(namelength+1))
01319                             fh.write(cropped_matrix[taxon][seek:seek+len(newpartition[p])]+'\n')
01320                         fh.write('\n')
01321                     else:
01322                         fh.write('[empty]\n\n')
01323                     seek+=len(newpartition[p])
01324             elif interleave:
01325                 for seek in range(0,nchar_adjusted,blocksize):
01326                     for taxon in undelete:
01327                         fh.write(safename(taxon,mrbayes=mrbayes).ljust(namelength+1))
01328                         fh.write(cropped_matrix[taxon][seek:seek+blocksize]+'\n')
01329                     fh.write('\n')
01330             else:
01331                 for taxon in undelete:
01332                     if blocksize<nchar_adjusted:
01333                         fh.write(safename(taxon,mrbayes=mrbayes)+'\n')
01334                     else:
01335                         fh.write(safename(taxon,mrbayes=mrbayes).ljust(namelength+1))
01336                     taxon_seq = cropped_matrix[taxon]
01337                     for seek in range(0,nchar_adjusted,blocksize):
01338                         fh.write(taxon_seq[seek:seek+blocksize]+'\n')
01339                     del taxon_seq
01340             fh.write(';\nend;\n')
01341             if append_sets:
01342                 if codons_block:
01343                     fh.write(self.append_sets(exclude=exclude,delete=delete,mrbayes=mrbayes,include_codons=False))
01344                     fh.write(self.append_sets(exclude=exclude,delete=delete,mrbayes=mrbayes,codons_only=True))
01345                 else:
01346                     fh.write(self.append_sets(exclude=exclude,delete=delete,mrbayes=mrbayes))
01347         return filename
01348 

Here is the call graph for this function:

Here is the caller graph for this function:

def Bio.Nexus.Nexus.Nexus.write_nexus_data_partitions (   self,
  matrix = None,
  filename = None,
  blocksize = None,
  interleave = False,
  exclude = [],
  delete = [],
  charpartition = None,
  comment = '',
  mrbayes = False 
)
Writes a nexus file for each partition in charpartition.

Only non-excluded characters and non-deleted taxa are included,
just the data block is written.

Definition at line 1189 of file Nexus.py.

01189 
01190             exclude=[], delete=[], charpartition=None, comment='',mrbayes=False):
01191         """Writes a nexus file for each partition in charpartition.
01192 
01193         Only non-excluded characters and non-deleted taxa are included,
01194         just the data block is written.
01195         """
01196 
01197         if not matrix:
01198             matrix=self.matrix
01199         if not matrix:
01200             return
01201         if not filename:
01202             filename=self.filename
01203         if charpartition:
01204             pfilenames={}
01205             for p in charpartition:
01206                 total_exclude=[]+exclude
01207                 total_exclude.extend([c for c in range(self.nchar) if c not in charpartition[p]])
01208                 total_exclude=_make_unique(total_exclude)
01209                 pcomment=comment+'\nPartition: '+p+'\n'
01210                 dot=filename.rfind('.')
01211                 if dot>0:
01212                     pfilename=filename[:dot]+'_'+p+'.data'
01213                 else:
01214                     pfilename=filename+'_'+p
01215                 pfilenames[p]=pfilename
01216                 self.write_nexus_data(filename=pfilename,matrix=matrix,blocksize=blocksize,
01217                         interleave=interleave,exclude=total_exclude,delete=delete,comment=pcomment,append_sets=False,
01218                         mrbayes=mrbayes)
01219             return pfilenames
01220         else:
01221             fn=self.filename+'.data'
01222             self.write_nexus_data(filename=fn,matrix=matrix,blocksize=blocksize,interleave=interleave,
01223                     exclude=exclude,delete=delete,comment=comment,append_sets=False,
01224                     mrbayes=mrbayes)
01225             return fn
    

Here is the call graph for this function:


Member Data Documentation

list Bio.Nexus.Nexus.Nexus.__slots__ = ['original_taxon_order','__dict__'] [static, private]

Definition at line 514 of file Nexus.py.

Definition at line 675 of file Nexus.py.

Definition at line 676 of file Nexus.py.

Definition at line 521 of file Nexus.py.

Definition at line 539 of file Nexus.py.

Definition at line 538 of file Nexus.py.

Definition at line 546 of file Nexus.py.

Definition at line 523 of file Nexus.py.

Definition at line 534 of file Nexus.py.

Definition at line 528 of file Nexus.py.

Definition at line 573 of file Nexus.py.

Definition at line 526 of file Nexus.py.

Definition at line 532 of file Nexus.py.

Definition at line 530 of file Nexus.py.

Definition at line 529 of file Nexus.py.

Definition at line 535 of file Nexus.py.

Definition at line 525 of file Nexus.py.

Definition at line 518 of file Nexus.py.

Definition at line 517 of file Nexus.py.

Definition at line 545 of file Nexus.py.

Definition at line 524 of file Nexus.py.

Definition at line 701 of file Nexus.py.

Definition at line 544 of file Nexus.py.

Definition at line 522 of file Nexus.py.

Definition at line 543 of file Nexus.py.

Definition at line 527 of file Nexus.py.

Definition at line 520 of file Nexus.py.

Definition at line 540 of file Nexus.py.

Definition at line 537 of file Nexus.py.

Definition at line 533 of file Nexus.py.

Definition at line 542 of file Nexus.py.

Definition at line 531 of file Nexus.py.

Definition at line 541 of file Nexus.py.

Definition at line 519 of file Nexus.py.

Definition at line 677 of file Nexus.py.

Definition at line 536 of file Nexus.py.

Definition at line 695 of file Nexus.py.


The documentation for this class was generated from the following file: