Back to index

python-biopython  1.60
PhyloXMLIO.py
Go to the documentation of this file.
00001 # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
00002 # This code is part of the Biopython distribution and governed by its
00003 # license. Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """PhyloXML reader/parser, writer, and associated functions.
00007 
00008 Instantiates tree elements from a parsed PhyloXML file, and constructs an XML
00009 file from a `Bio.Phylo.PhyloXML` object.
00010 
00011 About capitalization:
00012 
00013 - phyloXML means the file format specification
00014 - PhyloXML means the Biopython module `Bio.Phylo.PhyloXML` and its classes
00015 - Phyloxml means the top-level class used by `PhyloXMLIO.read` (but not
00016   `Bio.Phylo.read`!), containing a list of Phylogenies (objects derived from
00017   `BaseTree.Tree`)
00018 """
00019 __docformat__ = "restructuredtext en"
00020 
00021 import sys
00022 import warnings
00023 
00024 from Bio.Phylo import PhyloXML as PX
00025 
00026 if (3, 0, 0) <= sys.version_info[:3] <= (3, 1, 3):
00027     # Workaround for cElementTree regression in python 3.0--3.1.3
00028     # See http://bugs.python.org/issue9257
00029     from xml.etree import ElementTree
00030 else:
00031     try:
00032         from xml.etree import cElementTree as ElementTree
00033     except ImportError:
00034         # Alternative Python implementation, perhaps?
00035         from xml.etree import ElementTree as ElementTree
00036 
00037 # Recognize the phyloXML namespace when parsing
00038 # See http://effbot.org/zone/element-namespaces.htm
00039 NAMESPACES = {
00040         'phy':  'http://www.phyloxml.org',
00041         }
00042 
00043 try:
00044     register_namespace = ElementTree.register_namespace
00045 except AttributeError:
00046     if not hasattr(ElementTree, '_namespace_map'):
00047         # cElementTree needs the pure-Python xml.etree.ElementTree
00048         from xml.etree import ElementTree as ET_py
00049         ElementTree._namespace_map = ET_py._namespace_map
00050 
00051     def register_namespace(prefix, uri):
00052         ElementTree._namespace_map[uri] = prefix
00053 
00054 for prefix, uri in NAMESPACES.iteritems():
00055     register_namespace(prefix, uri)
00056 
00057 
00058 class PhyloXMLError(Exception):
00059     """Exception raised when PhyloXML object construction cannot continue.
00060 
00061     XML syntax errors will be found and raised by the underlying ElementTree
00062     module; this exception is for valid XML that breaks the phyloXML
00063     specification.
00064     """
00065     pass
00066 
00067 
00068 # ---------------------------------------------------------
00069 # Public API
00070 
00071 def read(file):
00072     """Parse a phyloXML file or stream and build a tree of Biopython objects.
00073 
00074     The children of the root node are phylogenies and possibly other arbitrary
00075     (non-phyloXML) objects.
00076 
00077     :returns: a single `Bio.Phylo.PhyloXML.Phyloxml` object.
00078     """
00079     return Parser(file).read()
00080 
00081 def parse(file):
00082     """Iterate over the phylogenetic trees in a phyloXML file.
00083 
00084     This ignores any additional data stored at the top level, but may be more
00085     memory-efficient than the `read` function.
00086 
00087     :returns: a generator of `Bio.Phylo.PhyloXML.Phylogeny` objects.
00088     """
00089     return Parser(file).parse()
00090 
00091 def write(obj, file, encoding='utf-8', indent=True):
00092     """Write a phyloXML file.
00093 
00094     :Parameters:
00095         obj
00096             an instance of `Phyloxml`, `Phylogeny` or `BaseTree.Tree`, or an
00097             iterable of either of the latter two. The object will be converted
00098             to a Phyloxml object before serialization.
00099         file
00100             either an open handle or a file name.
00101     """
00102     def fix_single(tree):
00103         if isinstance(tree, PX.Phylogeny):
00104             return tree
00105         if isinstance(tree, PX.Clade):
00106             return tree.to_phylogeny()
00107         if isinstance(tree, PX.BaseTree.Tree):
00108             return PX.Phylogeny.from_tree(tree)
00109         if isinstance(tree, PX.BaseTree.Clade):
00110             return PX.Phylogeny.from_tree(PX.BaseTree.Tree(root=tree))
00111         else:
00112             raise ValueError("iterable must contain Tree or Clade types")
00113 
00114     if isinstance(obj, PX.Phyloxml):
00115         pass
00116     elif (isinstance(obj, PX.BaseTree.Tree) or
00117           isinstance(obj, PX.BaseTree.Clade)):
00118         obj = fix_single(obj).to_phyloxml()
00119     elif hasattr(obj, '__iter__'):
00120         obj = PX.Phyloxml({}, phylogenies=(fix_single(t) for t in obj))
00121     else:
00122         raise ValueError("First argument must be a Phyloxml, Phylogeny, "
00123                 "Tree, or iterable of Trees or Phylogenies.")
00124     return Writer(obj).write(file, encoding=encoding, indent=indent)
00125 
00126 
00127 # ---------------------------------------------------------
00128 # Functions I wish ElementTree had
00129 
00130 def _local(tag):
00131     """Extract the local tag from a namespaced tag name."""
00132     if tag[0] == '{':
00133         return tag[tag.index('}')+1:]
00134     return tag
00135 
00136 def _split_namespace(tag):
00137     """Split a tag into namespace and local tag strings."""
00138     try:
00139         return tag[1:].split('}', 1)
00140     except:
00141         return ('', tag)
00142 
00143 def _ns(tag, namespace=NAMESPACES['phy']):
00144     """Format an XML tag with the given namespace."""
00145     return '{%s}%s' % (namespace, tag)
00146 
00147 def _get_child_as(parent, tag, construct):
00148     """Find a child node by tag, and pass it through a constructor.
00149 
00150     Returns None if no matching child is found.
00151     """
00152     child = parent.find(_ns(tag))
00153     if child is not None:
00154         return construct(child)
00155 
00156 def _get_child_text(parent, tag, construct=unicode):
00157     """Find a child node by tag; pass its text through a constructor.
00158 
00159     Returns None if no matching child is found.
00160     """
00161     child = parent.find(_ns(tag))
00162     if child is not None and child.text:
00163         return construct(child.text)
00164 
00165 def _get_children_as(parent, tag, construct):
00166     """Find child nodes by tag; pass each through a constructor.
00167 
00168     Returns an empty list if no matching child is found.
00169     """
00170     return [construct(child) for child in 
00171             parent.findall(_ns(tag))]
00172 
00173 def _get_children_text(parent, tag, construct=unicode):
00174     """Find child nodes by tag; pass each node's text through a constructor.
00175 
00176     Returns an empty list if no matching child is found.
00177     """
00178     return [construct(child.text) for child in 
00179             parent.findall(_ns(tag))
00180             if child.text]
00181 
00182 def _indent(elem, level=0):
00183     """Add line breaks and indentation to ElementTree in-place.
00184 
00185     Sources:
00186 
00187     - http://effbot.org/zone/element-lib.htm#prettyprint
00188     - http://infix.se/2007/02/06/gentlemen-indent-your-xml
00189     """
00190     i = "\n" + level*"  "
00191     if len(elem):
00192         if not elem.text or not elem.text.strip():
00193             elem.text = i + "  "
00194         for e in elem:
00195             _indent(e, level+1)
00196             if not e.tail or not e.tail.strip():
00197                 e.tail = i + "  "
00198         if not e.tail or not e.tail.strip():
00199             e.tail = i
00200     else:
00201         if level and (not elem.tail or not elem.tail.strip()):
00202             elem.tail = i
00203 
00204 # ---------------------------------------------------------
00205 # INPUT
00206 # ---------------------------------------------------------
00207 
00208 def _str2bool(text):
00209     if text == 'true':
00210         return True
00211     if text == 'false':
00212         return False
00213     raise ValueError('String could not be converted to boolean: ' + text)
00214 
00215 def _dict_str2bool(dct, keys):
00216     out = dct.copy()
00217     for key in keys:
00218         if key in out:
00219             out[key] = _str2bool(out[key])
00220     return out
00221 
00222 def _int(text):
00223     if text is not None:
00224         try:
00225             return int(text)
00226         except Exception:
00227             return None
00228 
00229 def _float(text):
00230     if text is not None:
00231         try:
00232             return float(text)
00233         except Exception:
00234             return None
00235 
00236 def _collapse_wspace(text):
00237     """Replace all spans of whitespace with a single space character.
00238 
00239     Also remove leading and trailing whitespace. See "Collapse Whitespace
00240     Policy" in the phyloXML spec glossary:
00241     http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary
00242     """
00243     if text is not None:
00244         return ' '.join(text.split())
00245 
00246 # NB: Not currently used
00247 def _replace_wspace(text):
00248     """Replace tab, LF and CR characters with spaces, but don't collapse.
00249 
00250     See "Replace Whitespace Policy" in the phyloXML spec glossary:
00251     http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary
00252     """
00253     for char in ('\t', '\n', '\r'):
00254         if char in text:
00255             text = text.replace(char, ' ')
00256     return text
00257 
00258 
00259 class Parser(object):
00260     """Methods for parsing all phyloXML nodes from an XML stream.
00261 
00262     To minimize memory use, the tree of ElementTree parsing events is cleared
00263     after completing each phylogeny, clade, and top-level 'other' element.
00264     Elements below the clade level are kept in memory until parsing of the
00265     current clade is finished -- this shouldn't be a problem because clade is
00266     the only recursive element, and non-clade nodes below this level are of
00267     bounded size.
00268     """
00269 
00270     def __init__(self, file):
00271         # Get an iterable context for XML parsing events
00272         context = iter(ElementTree.iterparse(file, events=('start', 'end')))
00273         event, root = context.next()
00274         self.root = root
00275         self.context = context
00276 
00277     def read(self):
00278         """Parse the phyloXML file and create a single Phyloxml object."""
00279         phyloxml = PX.Phyloxml(dict((_local(key), val)
00280                                 for key, val in self.root.items()))
00281         other_depth = 0
00282         for event, elem in self.context:
00283             namespace, localtag = _split_namespace(elem.tag)
00284             if event == 'start':
00285                 if namespace != NAMESPACES['phy']:
00286                     other_depth += 1
00287                     continue
00288                 if localtag == 'phylogeny':
00289                     phylogeny = self._parse_phylogeny(elem)
00290                     phyloxml.phylogenies.append(phylogeny)
00291             if event == 'end' and namespace != NAMESPACES['phy']:
00292                 # Deal with items not specified by phyloXML
00293                 other_depth -= 1
00294                 if other_depth == 0:
00295                     # We're directly under the root node -- evaluate
00296                     otr = self.other(elem, namespace, localtag)
00297                     phyloxml.other.append(otr)
00298                     self.root.clear()
00299         return phyloxml
00300 
00301     def parse(self):
00302         """Parse the phyloXML file incrementally and return each phylogeny."""
00303         phytag = _ns('phylogeny')
00304         for event, elem in self.context:
00305             if event == 'start' and elem.tag == phytag:
00306                 yield self._parse_phylogeny(elem)
00307 
00308     # Special parsing cases -- incremental, using self.context
00309 
00310     def _parse_phylogeny(self, parent):
00311         """Parse a single phylogeny within the phyloXML tree.
00312 
00313         Recursively builds a phylogenetic tree with help from parse_clade, then
00314         clears the XML event history for the phylogeny element and returns
00315         control to the top-level parsing function.
00316         """
00317         phylogeny = PX.Phylogeny(**_dict_str2bool(parent.attrib,
00318                                                    ['rooted', 'rerootable']))
00319         list_types = {
00320                 # XML tag, plural attribute
00321                 'confidence':   'confidences',
00322                 'property':     'properties',
00323                 'clade_relation': 'clade_relations',
00324                 'sequence_relation': 'sequence_relations',
00325                 }
00326         for event, elem in self.context:
00327             namespace, tag = _split_namespace(elem.tag)
00328             if event == 'start' and tag == 'clade':
00329                 assert phylogeny.root is None, \
00330                         "Phylogeny object should only have 1 clade"
00331                 phylogeny.root = self._parse_clade(elem)
00332                 continue
00333             if event == 'end':
00334                 if tag == 'phylogeny':
00335                     parent.clear()
00336                     break
00337                 # Handle the other non-recursive children
00338                 if tag in list_types:
00339                     getattr(phylogeny, list_types[tag]).append(
00340                             getattr(self, tag)(elem))
00341                 # Complex types
00342                 elif tag in ('date', 'id'):
00343                     setattr(phylogeny, tag, getattr(self, tag)(elem))
00344                 # Simple types
00345                 elif tag in ('name', 'description'):
00346                     setattr(phylogeny, tag, _collapse_wspace(elem.text))
00347                 # Unknown tags
00348                 elif namespace != NAMESPACES['phy']:
00349                     phylogeny.other.append(self.other(elem, namespace, tag))
00350                     parent.clear()
00351                 else:
00352                     # NB: This shouldn't happen in valid files
00353                     raise PhyloXMLError('Misidentified tag: ' + tag)
00354         return phylogeny
00355 
00356     _clade_complex_types = ['color', 'events', 'binary_characters', 'date']
00357     _clade_list_types = {
00358             'confidence':   'confidences',
00359             'distribution': 'distributions',
00360             'reference':    'references',
00361             'property':     'properties',
00362             }
00363     _clade_tracked_tags = set(_clade_complex_types + _clade_list_types.keys()
00364                               + ['branch_length', 'name', 'node_id', 'width'])
00365 
00366     def _parse_clade(self, parent):
00367         """Parse a Clade node and its children, recursively."""
00368         clade = PX.Clade(**parent.attrib)
00369         if clade.branch_length is not None:
00370             clade.branch_length = float(clade.branch_length)
00371         # NB: Only evaluate nodes at the current level
00372         tag_stack = []
00373         for event, elem in self.context:
00374             namespace, tag = _split_namespace(elem.tag)
00375             if event == 'start':
00376                 if tag == 'clade':
00377                     clade.clades.append(self._parse_clade(elem))
00378                     continue
00379                 if tag == 'taxonomy':
00380                     clade.taxonomies.append(self._parse_taxonomy(elem))
00381                     continue
00382                 if tag == 'sequence':
00383                     clade.sequences.append(self._parse_sequence(elem))
00384                     continue
00385                 if tag in self._clade_tracked_tags:
00386                     tag_stack.append(tag)
00387             if event == 'end':
00388                 if tag == 'clade':
00389                     elem.clear()
00390                     break
00391                 if tag != tag_stack[-1]:
00392                     continue
00393                 tag_stack.pop()
00394                 # Handle the other non-recursive children
00395                 if tag in self._clade_list_types:
00396                     getattr(clade, self._clade_list_types[tag]).append(
00397                             getattr(self, tag)(elem))
00398                 elif tag in self._clade_complex_types:
00399                     setattr(clade, tag, getattr(self, tag)(elem))
00400                 elif tag == 'branch_length':
00401                     # NB: possible collision with the attribute
00402                     if clade.branch_length is not None:
00403                         raise PhyloXMLError(
00404                                 'Attribute branch_length was already set '
00405                                 'for this Clade.')
00406                     clade.branch_length = _float(elem.text)
00407                 elif tag == 'width':
00408                     clade.width = _float(elem.text)
00409                 elif tag == 'name':
00410                     clade.name = _collapse_wspace(elem.text)
00411                 elif tag == 'node_id':
00412                     clade.node_id = PX.Id(elem.text.strip(),
00413                                           elem.attrib.get('provider'))
00414                 elif namespace != NAMESPACES['phy']:
00415                     clade.other.append(self.other(elem, namespace, tag))
00416                     elem.clear()
00417                 else:
00418                     raise PhyloXMLError('Misidentified tag: ' + tag)
00419         return clade
00420 
00421     def _parse_sequence(self, parent):
00422         sequence = PX.Sequence(**parent.attrib)
00423         for event, elem in self.context:
00424             namespace, tag = _split_namespace(elem.tag)
00425             if event == 'end':
00426                 if tag == 'sequence':
00427                     parent.clear()
00428                     break
00429                 if tag in ('accession', 'mol_seq', 'uri',
00430                         'domain_architecture'):
00431                     setattr(sequence, tag, getattr(self, tag)(elem))
00432                 elif tag == 'annotation':
00433                     sequence.annotations.append(self.annotation(elem))
00434                 elif tag == 'name': 
00435                     sequence.name = _collapse_wspace(elem.text)
00436                 elif tag in ('symbol', 'location'):
00437                     setattr(sequence, tag, elem.text)
00438                 elif namespace != NAMESPACES['phy']:
00439                     sequence.other.append(self.other(elem, namespace, tag))
00440                     parent.clear()
00441         return sequence
00442 
00443     def _parse_taxonomy(self, parent):
00444         taxonomy = PX.Taxonomy(**parent.attrib)
00445         for event, elem in self.context:
00446             namespace, tag = _split_namespace(elem.tag)
00447             if event == 'end':
00448                 if tag == 'taxonomy':
00449                     parent.clear()
00450                     break
00451                 if tag in ('id', 'uri'):
00452                     setattr(taxonomy, tag, getattr(self, tag)(elem))
00453                 elif tag == 'common_name':
00454                     taxonomy.common_names.append(_collapse_wspace(elem.text))
00455                 elif tag == 'synonym':
00456                     taxonomy.synonyms.append(elem.text)
00457                 elif tag in ('code', 'scientific_name', 'authority', 'rank'):
00458                     # ENH: check_str on rank
00459                     setattr(taxonomy, tag, elem.text)
00460                 elif namespace != NAMESPACES['phy']:
00461                     taxonomy.other.append(self.other(elem, namespace, tag))
00462                     parent.clear()
00463         return taxonomy
00464 
00465     def other(self, elem, namespace, localtag):
00466         return PX.Other(localtag, namespace, elem.attrib,
00467                   value=elem.text and elem.text.strip() or None,
00468                   children=[self.other(child, *_split_namespace(child.tag))
00469                             for child in elem])
00470 
00471     # Complex types
00472 
00473     def accession(self, elem):
00474         return PX.Accession(elem.text.strip(), elem.get('source'))
00475 
00476     def annotation(self, elem):
00477         return PX.Annotation(
00478                 desc=_collapse_wspace(_get_child_text(elem, 'desc')),
00479                 confidence=_get_child_as(elem, 'confidence', self.confidence),
00480                 properties=_get_children_as(elem, 'property', self.property),
00481                 uri=_get_child_as(elem, 'uri', self.uri),
00482                 **elem.attrib)
00483 
00484     def binary_characters(self, elem):
00485         def bc_getter(elem):
00486             return _get_children_text(elem, 'bc')
00487         return PX.BinaryCharacters(
00488                 type=elem.get('type'),
00489                 gained_count=_int(elem.get('gained_count')),
00490                 lost_count=_int(elem.get('lost_count')),
00491                 present_count=_int(elem.get('present_count')),
00492                 absent_count=_int(elem.get('absent_count')),
00493                 # Flatten BinaryCharacterList sub-nodes into lists of strings
00494                 gained=_get_child_as(elem, 'gained', bc_getter),
00495                 lost=_get_child_as(elem, 'lost', bc_getter),
00496                 present=_get_child_as(elem, 'present', bc_getter),
00497                 absent=_get_child_as(elem, 'absent', bc_getter))
00498 
00499     def clade_relation(self, elem):
00500         return PX.CladeRelation(
00501                 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'),
00502                 distance=elem.get('distance'),
00503                 confidence=_get_child_as(elem, 'confidence', self.confidence))
00504 
00505     def color(self, elem):
00506         red, green, blue = (_get_child_text(elem, color, int) for color in
00507                             ('red', 'green', 'blue'))
00508         return PX.BranchColor(red, green, blue)
00509 
00510     def confidence(self, elem):
00511         return PX.Confidence(
00512                 _float(elem.text),
00513                 elem.get('type'))
00514 
00515     def date(self, elem):
00516         return PX.Date(
00517                 unit=elem.get('unit'),
00518                 desc=_collapse_wspace(_get_child_text(elem, 'desc')),
00519                 value=_get_child_text(elem, 'value', float),
00520                 minimum=_get_child_text(elem, 'minimum', float),
00521                 maximum=_get_child_text(elem, 'maximum', float),
00522                 )
00523 
00524     def distribution(self, elem):
00525         return PX.Distribution(
00526                 desc=_collapse_wspace(_get_child_text(elem, 'desc')),
00527                 points=_get_children_as(elem, 'point', self.point),
00528                 polygons=_get_children_as(elem, 'polygon', self.polygon))
00529 
00530     def domain(self, elem):
00531         return PX.ProteinDomain(elem.text.strip(),
00532                 int(elem.get('from')) - 1,
00533                 int(elem.get('to')),
00534                 confidence=_float(elem.get('confidence')),
00535                 id=elem.get('id'))
00536 
00537     def domain_architecture(self, elem):
00538         return PX.DomainArchitecture(
00539                 length=int(elem.get('length')),
00540                 domains=_get_children_as(elem, 'domain', self.domain))
00541 
00542     def events(self, elem):
00543         return PX.Events(
00544                 type=_get_child_text(elem, 'type'),
00545                 duplications=_get_child_text(elem, 'duplications', int),
00546                 speciations=_get_child_text(elem, 'speciations', int),
00547                 losses=_get_child_text(elem, 'losses', int),
00548                 confidence=_get_child_as(elem, 'confidence', self.confidence))
00549 
00550     def id(self, elem):
00551         provider = elem.get('provider') or elem.get('type')
00552         return PX.Id(elem.text.strip(), provider)
00553 
00554     def mol_seq(self, elem):
00555         is_aligned = elem.get('is_aligned')
00556         if is_aligned is not None:
00557             is_aligned = _str2bool(is_aligned)
00558         return PX.MolSeq(elem.text.strip(), is_aligned=is_aligned)
00559 
00560     def point(self, elem):
00561         return PX.Point(
00562                 elem.get('geodetic_datum'),
00563                 _get_child_text(elem, 'lat', float),
00564                 _get_child_text(elem, 'long', float),
00565                 alt=_get_child_text(elem, 'alt', float),
00566                 alt_unit=elem.get('alt_unit'))
00567 
00568     def polygon(self, elem):
00569         return PX.Polygon(
00570                 points=_get_children_as(elem, 'point', self.point))
00571 
00572     def property(self, elem):
00573         return PX.Property(elem.text.strip(),
00574                 elem.get('ref'), elem.get('applies_to'), elem.get('datatype'),
00575                 unit=elem.get('unit'),
00576                 id_ref=elem.get('id_ref'))
00577 
00578     def reference(self, elem):
00579         return PX.Reference(
00580                 doi=elem.get('doi'),
00581                 desc=_get_child_text(elem, 'desc'))
00582 
00583     def sequence_relation(self, elem):
00584         return PX.SequenceRelation(
00585                 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'),
00586                 distance=_float(elem.get('distance')),
00587                 confidence=_get_child_as(elem, 'confidence', self.confidence))
00588 
00589     def uri(self, elem):
00590         return PX.Uri(elem.text.strip(),
00591                 desc=_collapse_wspace(elem.get('desc')),
00592                 type=elem.get('type'))
00593 
00594 
00595 
00596 # ---------------------------------------------------------
00597 # OUTPUT
00598 # ---------------------------------------------------------
00599 
00600 def _serialize(value):
00601     """Convert a Python primitive to a phyloXML-compatible Unicode string."""
00602     if isinstance(value, float):
00603         return unicode(value).upper()
00604     elif isinstance(value, bool):
00605         return unicode(value).lower()
00606     return unicode(value)
00607 
00608 
00609 def _clean_attrib(obj, attrs):
00610     """Create a dictionary from an object's specified, non-None attributes."""
00611     out = {}
00612     for key in attrs:
00613         val = getattr(obj, key)
00614         if val is not None:
00615             out[key] = _serialize(val)
00616     return out
00617 
00618 
00619 def _handle_complex(tag, attribs, subnodes, has_text=False):
00620     def wrapped(self, obj):
00621         elem = ElementTree.Element(tag, _clean_attrib(obj, attribs))
00622         for subn in subnodes:
00623             if isinstance(subn, basestring):
00624                 # singular object: method and attribute names are the same
00625                 if getattr(obj, subn) is not None:
00626                     elem.append(getattr(self, subn)(getattr(obj, subn)))
00627             else:
00628                 # list: singular method, pluralized attribute name
00629                 method, plural = subn
00630                 for item in getattr(obj, plural):
00631                     elem.append(getattr(self, method)(item))
00632         if has_text:
00633             elem.text = _serialize(obj.value)
00634         return elem
00635     wrapped.__doc__ = "Serialize a %s and its subnodes, in order." % tag
00636     return wrapped
00637 
00638 
00639 def _handle_simple(tag):
00640     def wrapped(self, obj):
00641         elem = ElementTree.Element(tag)
00642         elem.text = _serialize(obj)
00643         return elem
00644     wrapped.__doc__ = "Serialize a simple %s node." % tag
00645     return wrapped
00646 
00647 
00648 class Writer(object):
00649     """Methods for serializing a PhyloXML object to XML."""
00650 
00651     def __init__(self, phyloxml):
00652         """Build an ElementTree from a PhyloXML object."""
00653         assert isinstance(phyloxml, PX.Phyloxml), "Not a Phyloxml object"
00654         self._tree = ElementTree.ElementTree(self.phyloxml(phyloxml))
00655 
00656     def write(self, file, encoding='utf-8', indent=True):
00657         if indent:
00658             _indent(self._tree.getroot())
00659         self._tree.write(file, encoding)
00660         return len(self._tree.getroot())
00661 
00662     # Convert classes to ETree elements
00663 
00664     def phyloxml(self, obj):
00665         elem = ElementTree.Element('phyloxml', obj.attributes) # Namespaces
00666         for tree in obj.phylogenies:
00667             elem.append(self.phylogeny(tree))
00668         for otr in obj.other:
00669             elem.append(self.other(otr))
00670         return elem
00671 
00672     def other(self, obj):
00673         elem = ElementTree.Element(_ns(obj.tag, obj.namespace), obj.attributes)
00674         elem.text = obj.value
00675         for child in obj.children:
00676             elem.append(self.other(child))
00677         return elem
00678 
00679     phylogeny = _handle_complex('phylogeny',
00680             ('rooted', 'rerootable', 'branch_length_unit', 'type'),
00681             ( 'name',
00682               'id',
00683               'description',
00684               'date',
00685               ('confidence',        'confidences'),
00686               'clade',
00687               ('clade_relation',    'clade_relations'),
00688               ('sequence_relation', 'sequence_relations'),
00689               ('property',          'properties'),
00690               ('other',             'other'),
00691               ))
00692 
00693     clade = _handle_complex('clade', ('id_source',),
00694             ( 'name',
00695               'branch_length',
00696               ('confidence',    'confidences'),
00697               'width',
00698               'color',
00699               'node_id',
00700               ('taxonomy',      'taxonomies'),
00701               ('sequence',      'sequences'),
00702               'events',
00703               'binary_characters',
00704               ('distribution',  'distributions'),
00705               'date',
00706               ('reference',     'references'),
00707               ('property',      'properties'),
00708               ('clade',         'clades'),
00709               ('other',         'other'),
00710               ))
00711 
00712     accession = _handle_complex('accession', ('source',),
00713             (), has_text=True)
00714 
00715     annotation = _handle_complex('annotation',
00716             ('ref', 'source', 'evidence', 'type'),
00717             ( 'desc',
00718               'confidence',
00719               ('property',   'properties'),
00720               'uri',
00721               ))
00722 
00723     def binary_characters(self, obj):
00724         """Serialize a binary_characters node and its subnodes."""
00725         elem = ElementTree.Element('binary_characters',
00726                 _clean_attrib(obj,
00727                     ('type', 'gained_count', 'lost_count',
00728                         'present_count', 'absent_count')))
00729         for subn in ('gained', 'lost', 'present', 'absent'):
00730             subelem = ElementTree.Element(subn)
00731             for token in getattr(obj, subn):
00732                 subelem.append(self.bc(token))
00733             elem.append(subelem)
00734         return elem
00735 
00736     clade_relation = _handle_complex('clade_relation',
00737             ('id_ref_0', 'id_ref_1', 'distance', 'type'),
00738             ('confidence',))
00739 
00740     color = _handle_complex('color', (), ('red', 'green', 'blue'))
00741 
00742     confidence = _handle_complex('confidence', ('type',),
00743             (), has_text=True)
00744 
00745     date = _handle_complex('date', ('unit',),
00746             ('desc', 'value', 'minimum', 'maximum'))
00747 
00748     distribution = _handle_complex('distribution', (),
00749             ( 'desc',
00750               ('point',     'points'),
00751               ('polygon',   'polygons'),
00752               ))
00753 
00754     def domain(self, obj):
00755         """Serialize a domain node."""
00756         elem = ElementTree.Element('domain',
00757                 {'from': str(obj.start + 1), 'to': str(obj.end)})
00758         if obj.confidence is not None:
00759             elem.set('confidence', _serialize(obj.confidence))
00760         if obj.id is not None:
00761             elem.set('id', obj.id)
00762         elem.text = _serialize(obj.value)
00763         return elem
00764 
00765     domain_architecture = _handle_complex('domain_architecture',
00766             ('length',),
00767             (('domain', 'domains'),))
00768 
00769     events = _handle_complex('events', (),
00770             ( 'type',
00771               'duplications',
00772               'speciations',
00773               'losses',
00774               'confidence',
00775               ))
00776 
00777     id = _handle_complex('id', ('provider',), (), has_text=True)
00778 
00779     mol_seq = _handle_complex('mol_seq', ('is_aligned',),
00780             (), has_text=True)
00781 
00782     node_id = _handle_complex('node_id', ('provider',), (), has_text=True)
00783 
00784     point = _handle_complex('point', ('geodetic_datum', 'alt_unit'),
00785             ('lat', 'long', 'alt'))
00786 
00787     polygon = _handle_complex('polygon', (), (('point', 'points'),))
00788 
00789     property = _handle_complex('property',
00790             ('ref', 'unit', 'datatype', 'applies_to', 'id_ref'),
00791             (), has_text=True)
00792 
00793     reference = _handle_complex('reference', ('doi',), ('desc',))
00794 
00795     sequence = _handle_complex('sequence',
00796             ('type', 'id_ref', 'id_source'),
00797             ( 'symbol',
00798               'accession',
00799               'name',
00800               'location',
00801               'mol_seq',
00802               'uri',
00803               ('annotation', 'annotations'),
00804               'domain_architecture',
00805               ('other',      'other'),
00806               ))
00807 
00808     sequence_relation = _handle_complex('sequence_relation',
00809             ('id_ref_0', 'id_ref_1', 'distance', 'type'),
00810             ('confidence',))
00811 
00812     taxonomy = _handle_complex('taxonomy',
00813             ('id_source',),
00814             ( 'id',
00815               'code',
00816               'scientific_name',
00817               'authority',
00818               ('common_name',   'common_names'),
00819               ('synonym',       'synonyms'),
00820               'rank',
00821               'uri',
00822               ('other',         'other'),
00823               ))
00824 
00825     uri = _handle_complex('uri', ('desc', 'type'), (), has_text=True)
00826 
00827     # Primitive types
00828 
00829     # Floating point
00830     alt = _handle_simple('alt')
00831     branch_length = _handle_simple('branch_length')
00832     lat = _handle_simple('lat')
00833     long = _handle_simple('long')
00834     maximum = _handle_simple('maximum')
00835     minimum = _handle_simple('minimum')
00836     value = _handle_simple('value')
00837     width = _handle_simple('width')
00838 
00839     # Integers
00840     blue = _handle_simple('blue')
00841     duplications = _handle_simple('duplications')
00842     green = _handle_simple('green')
00843     losses = _handle_simple('losses')
00844     red = _handle_simple('red')
00845     speciations = _handle_simple('speciations')
00846 
00847     # Strings
00848     bc = _handle_simple('bc')
00849     code = _handle_simple('code')
00850     common_name = _handle_simple('common_name')
00851     desc = _handle_simple('desc')
00852     description = _handle_simple('description')
00853     location = _handle_simple('location')
00854     name = _handle_simple('name')
00855     rank = _handle_simple('rank')
00856     scientific_name = _handle_simple('scientific_name')
00857     symbol = _handle_simple('symbol')
00858     synonym = _handle_simple('synonym')
00859     type = _handle_simple('type')
00860