Back to index

python-biopython  1.60
Public Member Functions | Private Member Functions | Private Attributes
Bio.SeqIO.SeqXmlIO.SeqXmlIterator Class Reference
Inheritance diagram for Bio.SeqIO.SeqXmlIO.SeqXmlIterator:
Inheritance graph
[legend]
Collaboration diagram for Bio.SeqIO.SeqXmlIO.SeqXmlIterator:
Collaboration graph
[legend]

List of all members.

Public Member Functions

def __init__
def __iter__

Private Member Functions

def _attr_seqXML
def _attr_property
def _attr_species
def _attr_entry
def _elem_DNAseq
def _elem_RNAseq
def _elem_AAseq
def _elem_description
def _attr_DBRef

Private Attributes

 _source
 _source_version
 _version
 _speciesName
 _ncbiTaxId

Detailed Description

Breaks seqXML file into SeqRecords.

Assumes valid seqXML please validate beforehand.

Definition at line 98 of file SeqXmlIO.py.


Constructor & Destructor Documentation

def Bio.SeqIO.SeqXmlIO.SeqXmlIterator.__init__ (   self,
  handle 
)
Create the object.

Definition at line 103 of file SeqXmlIO.py.

00103 
00104     def __init__(self,handle):
00105         """Create the object."""
00106         XMLRecordIterator.__init__(self, handle,"entry")
00107         
00108         self._source = None
00109         self._source_version = None
00110         self._version = None
00111         self._speciesName = None
00112         self._ncbiTaxId = None


Member Function Documentation

Iterate over the records in the XML file. 
Returns the last parsed record.

Definition at line 47 of file SeqXmlIO.py.

00047 
00048     def __iter__(self):
00049         """Iterate over the records in the XML file. 
00050         Returns the last parsed record.""" 
00051         
00052         record = None
00053         try:
00054             for event,node in self._events:
00055                 
00056                 if event == "START_ELEMENT" and node.namespaceURI == self._namespace:
00057                     
00058                     if node.localName == self._recordTag:
00059                         #create an empty SeqRecord
00060                         record = SeqRecord('', id='')
00061 
00062                     #call matching methods with attributes only                    
00063                     if hasattr(self,"_attr_" + node.localName):
00064                         getattr(self,"_attr_" + node.localName)(self._attributes(node),record)
00065     
00066                     #call matching methods with DOM tree 
00067                     if hasattr(self,"_elem_" + node.localName):
00068                         #read the element and all nested elements into a DOM tree
00069                         self._events.expandNode(node)
00070                         node.normalize()
00071                         
00072                         getattr(self,"_elem_" + node.localName)(node,record)
00073                     
00074                 elif event == "END_ELEMENT" and node.namespaceURI == self._namespace and node.localName == self._recordTag:
00075                     yield record
00076                     
00077         except SAXParseException, e:
00078             
00079             if e.getLineNumber() == 1 and e.getColumnNumber() == 0:
00080                 #empty file
00081                 pass
00082             else:
00083                 import os
00084                 if e.getLineNumber() == 1 and e.getColumnNumber() == 1 \
00085                 and os.name == "java":
00086                     #empty file, see http://bugs.jython.org/issue1774
00087                     pass
00088                 else:
00089                     raise
00090 
    

Here is the call graph for this function:

def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._attr_DBRef (   self,
  attr_dict,
  record 
) [private]
Parse a database cross reference

Definition at line 205 of file SeqXmlIO.py.

00205 
00206     def _attr_DBRef(self,attr_dict,record):
00207         """Parse a database cross reference"""
00208         
00209         if "source" not in attr_dict or "id" not in attr_dict:
00210             raise ValueError("Invalid DB cross reference.")
00211         
00212         if "%s:%s" % (attr_dict["source"],attr_dict["id"]) not in record.dbxrefs:
00213             record.dbxrefs.append("%s:%s" % (attr_dict["source"],attr_dict["id"]) )
00214 
00215 

def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._attr_entry (   self,
  attr_dict,
  record 
) [private]
New entry set id and the optional entry source.

Definition at line 153 of file SeqXmlIO.py.

00153 
00154     def _attr_entry(self,attr_dict,record):
00155         """New entry set id and the optional entry source."""
00156         
00157         if "id" not in attr_dict:
00158             raise ValueError("Malformed entry! Identifier is missing.") 
00159         
00160         record.id = attr_dict["id"]
00161         if "source" in attr_dict:
00162             record.annotations["source"] = attr_dict["source"]
00163         elif self._source != None:
00164             record.annotations["source"] = self._source
00165             
00166         #initialize entry with global species definition
00167         #the keywords for the species annotation are taken from SwissIO   
00168         if self._ncbiTaxId != None:
00169             record.annotations["ncbi_taxid"] = self._ncbiTaxId
00170         if self._speciesName != None:
00171             record.annotations["organism"] = self._speciesName    
00172 

def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._attr_property (   self,
  attr_dict,
  record 
) [private]
Parse key value pair properties and store them as annotations.

Definition at line 127 of file SeqXmlIO.py.

00127 
00128     def _attr_property(self,attr_dict,record):
00129         """Parse key value pair properties and store them as annotations."""
00130         
00131         if "name" not in attr_dict:
00132             raise ValueError("Malformed property element.")
00133         
00134         value = attr_dict.get("value",None)
00135         
00136         if attr_dict["name"] not in record.annotations:
00137             record.annotations[attr_dict["name"]] = value
00138         elif isinstance(record.annotations[attr_dict["name"]],list):
00139             record.annotations[attr_dict["name"]].append(value)
00140         else:
00141             record.annotations[attr_dict["name"]] = [record.annotations[attr_dict["name"]],value]
00142             
        
def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._attr_seqXML (   self,
  attr_dict,
  record 
) [private]
Parse the document metadata.

Definition at line 113 of file SeqXmlIO.py.

00113 
00114     def _attr_seqXML(self,attr_dict,record):
00115         """Parse the document metadata."""
00116         
00117         if "source" in attr_dict:
00118             self._source = attr_dict["source"]
00119         if "sourceVersion" in attr_dict:
00120             self._source_version = attr_dict["sourceVersion"]
00121         if "version" in attr_dict:
00122             self._version = attr_dict["seqXMLversion"]
00123         if "ncbiTaxID" in attr_dict:
00124             self._ncbiTaxId = attr_dict["ncbiTaxID"]
00125         if "speciesName" in attr_dict:
00126             self._speciesName = attr_dict["speciesName"]
    
def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._attr_species (   self,
  attr_dict,
  record 
) [private]
Parse the species information.

Definition at line 143 of file SeqXmlIO.py.

00143 
00144     def _attr_species(self,attr_dict,record):
00145         """Parse the species information."""
00146         
00147         if "name" not in attr_dict or "ncbiTaxID" not in attr_dict:
00148             raise ValueError("Malformed species element!")
00149         
00150         #the keywords for the species annotation are taken from SwissIO   
00151         record.annotations["organism"] = attr_dict["name"]
00152         record.annotations["ncbi_taxid"] = attr_dict["ncbiTaxID"]
    
def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._elem_AAseq (   self,
  node,
  record 
) [private]
Parse protein sequence.

Definition at line 190 of file SeqXmlIO.py.

00190 
00191     def _elem_AAseq(self,node,record):
00192         """Parse protein sequence."""
00193         
00194         if not (node.hasChildNodes() and len(node.firstChild.data) > 0):
00195             raise ValueError("Sequence length should be greater than 0.")
00196         
00197         record.seq = Seq(node.firstChild.data,Alphabet.generic_protein)
00198         
        
def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._elem_description (   self,
  node,
  record 
) [private]
Parse the description.

Definition at line 199 of file SeqXmlIO.py.

00199 
00200     def _elem_description(self,node,record):
00201         """Parse the description."""
00202         
00203         if node.hasChildNodes() and len(node.firstChild.data) > 0:
00204             record.description = node.firstChild.data
        
def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._elem_DNAseq (   self,
  node,
  record 
) [private]
Parse DNA sequence.

Definition at line 173 of file SeqXmlIO.py.

00173 
00174     def _elem_DNAseq(self,node,record):
00175         """Parse DNA sequence."""
00176         
00177         if not (node.hasChildNodes() and len(node.firstChild.data) > 0):
00178             raise ValueError("Sequence length should be greater than 0.")
00179             
00180         record.seq = Seq(node.firstChild.data,Alphabet.generic_dna)
00181         
        
def Bio.SeqIO.SeqXmlIO.SeqXmlIterator._elem_RNAseq (   self,
  node,
  record 
) [private]
Parse RNA sequence.

Definition at line 182 of file SeqXmlIO.py.

00182 
00183     def _elem_RNAseq(self,node,record):
00184         """Parse RNA sequence."""
00185         
00186         if not (node.hasChildNodes() and len(node.firstChild.data) > 0):
00187             raise ValueError("Sequence length should be greater than 0.")
00188         
00189         record.seq = Seq(node.firstChild.data,Alphabet.generic_rna)
    

Member Data Documentation

Definition at line 111 of file SeqXmlIO.py.

Definition at line 107 of file SeqXmlIO.py.

Definition at line 108 of file SeqXmlIO.py.

Definition at line 110 of file SeqXmlIO.py.

Definition at line 109 of file SeqXmlIO.py.


The documentation for this class was generated from the following file: