Back to index

python-biopython  1.60
Functions | Variables
Bio.SeqIO.SwissIO Namespace Reference

Functions

def _make_position
def _make_seqfeature
def SwissIterator

Variables

string example_filename = "../../Tests/SwissProt/sp008"
tuple handle = open(example_filename)
tuple records = SwissIterator(handle)

Function Documentation

def Bio.SeqIO.SwissIO._make_position (   location_string,
  offset = 0 
) [private]
Turn a Swiss location position into a SeqFeature position object (PRIVATE).

An offset of -1 is used with a start location to make it pythonic.

Definition at line 23 of file SwissIO.py.

00023 
00024 def _make_position(location_string, offset=0):
00025     """Turn a Swiss location position into a SeqFeature position object (PRIVATE).
00026 
00027     An offset of -1 is used with a start location to make it pythonic.
00028     """
00029     if location_string=="?":
00030         return SeqFeature.UnknownPosition()
00031     #Hack so that feature from 0 to 0 becomes 0 to 0, not -1 to 0.
00032     try:
00033         return SeqFeature.ExactPosition(max(0, offset+int(location_string)))
00034     except ValueError:
00035         pass
00036     if location_string.startswith("<"):
00037         try:
00038             return SeqFeature.BeforePosition(max(0,offset+int(location_string[1:])))
00039         except ValueError:
00040             pass
00041     elif location_string.startswith(">"): # e.g. ">13"
00042         try:
00043             return SeqFeature.AfterPosition(max(0,offset+int(location_string[1:])))
00044         except ValueError :
00045             pass
00046     elif location_string.startswith("?"): # e.g. "?22"
00047         try:
00048             return SeqFeature.UncertainPosition(max(0,offset+int(location_string[1:])))
00049         except ValueError:
00050             pass
00051     raise NotImplementedError("Cannot parse location '%s'" % location_string)

Here is the caller graph for this function:

def Bio.SeqIO.SwissIO._make_seqfeature (   name,
  from_res,
  to_res,
  description,
  ft_id 
) [private]
Construct SeqFeature from feature data from parser (PRIVATE).

Definition at line 52 of file SwissIO.py.

00052 
00053 def _make_seqfeature(name, from_res, to_res, description, ft_id):
00054     """Construct SeqFeature from feature data from parser (PRIVATE)."""
00055     loc = SeqFeature.FeatureLocation(_make_position(from_res,-1),
00056                                      _make_position(to_res, 0))
00057     if not ft_id:
00058         ft_id = "<unknown id>" #The default in SeqFeature object
00059     return SeqFeature.SeqFeature(loc, type=name, id=ft_id,
00060                                  qualifiers={"description":description})
00061 
#This is a generator function!

Here is the call graph for this function:

Here is the caller graph for this function:

Breaks up a Swiss-Prot/UniProt file into SeqRecord objects.

Every section from the ID line to the terminating // becomes
a single SeqRecord with associated annotation and features.

This parser is for the flat file "swiss" format as used by:
 * Swiss-Prot aka SwissProt
 * TrEMBL
 * UniProtKB aka UniProt Knowledgebase

For consistency with BioPerl and EMBOSS we call this the "swiss"
format. See also the SeqIO support for "uniprot-xml" format.

Definition at line 62 of file SwissIO.py.

00062 
00063 def SwissIterator(handle):
00064     """Breaks up a Swiss-Prot/UniProt file into SeqRecord objects.
00065 
00066     Every section from the ID line to the terminating // becomes
00067     a single SeqRecord with associated annotation and features.
00068 
00069     This parser is for the flat file "swiss" format as used by:
00070      * Swiss-Prot aka SwissProt
00071      * TrEMBL
00072      * UniProtKB aka UniProt Knowledgebase
00073 
00074     For consistency with BioPerl and EMBOSS we call this the "swiss"
00075     format. See also the SeqIO support for "uniprot-xml" format.
00076     """
00077     swiss_records = SwissProt.parse(handle)
00078     for swiss_record in swiss_records:
00079         # Convert the SwissProt record to a SeqRecord
00080         seq = Seq.Seq(swiss_record.sequence, Alphabet.generic_protein)
00081         record = SeqRecord.SeqRecord(seq,
00082                                      id=swiss_record.accessions[0],
00083                                      name=swiss_record.entry_name,
00084                                      description=swiss_record.description,
00085                                      features=[_make_seqfeature(*f) for f \
00086                                                in swiss_record.features],
00087                                     )
00088         record.description = swiss_record.description
00089         for cross_reference in swiss_record.cross_references:
00090             if len(cross_reference) < 2:
00091                 continue
00092             database, accession = cross_reference[:2]
00093             dbxref = "%s:%s" % (database, accession)
00094             if not dbxref in record.dbxrefs:
00095                 record.dbxrefs.append(dbxref)
00096         annotations = record.annotations
00097         annotations['accessions'] = swiss_record.accessions
00098         annotations['date'] = swiss_record.created[0]
00099         annotations['date_last_sequence_update'] = swiss_record.sequence_update[0]
00100         if swiss_record.annotation_update:
00101             annotations['date_last_annotation_update'] = swiss_record.annotation_update[0]
00102         if swiss_record.gene_name:
00103             annotations['gene_name'] = swiss_record.gene_name
00104         annotations['organism'] = swiss_record.organism.rstrip(".")
00105         annotations['taxonomy'] = swiss_record.organism_classification
00106         annotations['ncbi_taxid'] = swiss_record.taxonomy_id
00107         if swiss_record.host_organism:
00108             annotations['organism_host'] = swiss_record.host_organism
00109         if swiss_record.host_taxonomy_id:
00110             annotations['host_ncbi_taxid'] = swiss_record.host_taxonomy_id
00111         if swiss_record.comments:
00112             annotations['comment'] = "\n".join(swiss_record.comments)
00113         if swiss_record.references:
00114             annotations['references'] = []
00115             for reference in swiss_record.references:
00116                 feature = SeqFeature.Reference()
00117                 feature.comment = " ".join(["%s=%s;" % (key, value) \
00118                                             for key, value \
00119                                             in reference.comments])
00120                 for key, value in reference.references:
00121                     if key == 'PubMed':
00122                         feature.pubmed_id = value
00123                     elif key == 'MEDLINE':
00124                         feature.medline_id = value
00125                     elif key == 'DOI':
00126                         pass
00127                     elif key == 'AGRICOLA':
00128                         pass
00129                     else:
00130                         raise ValueError(\
00131                             "Unknown key %s found in references" % key)
00132                 feature.authors = reference.authors
00133                 feature.title = reference.title
00134                 feature.journal = reference.location
00135                 annotations['references'].append(feature)
00136         if swiss_record.keywords:
00137             record.annotations['keywords'] = swiss_record.keywords
00138         yield record

Here is the call graph for this function:


Variable Documentation

string Bio.SeqIO.SwissIO.example_filename = "../../Tests/SwissProt/sp008"

Definition at line 142 of file SwissIO.py.

Definition at line 149 of file SwissIO.py.

Definition at line 150 of file SwissIO.py.