Back to index

python-biopython  1.60
StockholmIO.py
Go to the documentation of this file.
00001 # Copyright 2006-2010 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 """
00006 Bio.AlignIO support for the "stockholm" format (used in the PFAM database).
00007 
00008 You are expected to use this module via the Bio.AlignIO functions (or the
00009 Bio.SeqIO functions if you want to work directly with the gapped sequences).
00010 
00011 For example, consider a Stockholm alignment file containing the following::
00012 
00013     # STOCKHOLM 1.0
00014     #=GC SS_cons       .................<<<<<<<<...<<<<<<<........>>>>>>>..
00015     AP001509.1         UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGU
00016     #=GR AP001509.1 SS -----------------<<<<<<<<---..<<-<<-------->>->>..--
00017     AE007476.1         AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGU
00018     #=GR AE007476.1 SS -----------------<<<<<<<<-----<<.<<-------->>.>>----
00019 
00020     #=GC SS_cons       ......<<<<<<<.......>>>>>>>..>>>>>>>>...............
00021     AP001509.1         CUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
00022     #=GR AP001509.1 SS -------<<<<<--------->>>>>--->>>>>>>>---------------
00023     AE007476.1         UUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
00024     #=GR AE007476.1 SS ------.<<<<<--------->>>>>.-->>>>>>>>---------------
00025     //
00026 
00027 This is a single multiple sequence alignment, so you would probably load this
00028 using the Bio.AlignIO.read() function:
00029 
00030     >>> from Bio import AlignIO
00031     >>> align = AlignIO.read("Stockholm/simple.sth", "stockholm")
00032     >>> print align
00033     SingleLetterAlphabet() alignment with 2 rows and 104 columns
00034     UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-G...UGU AP001509.1
00035     AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-C...GAU AE007476.1
00036     >>> for record in align:
00037     ...     print record.id, len(record)
00038     AP001509.1 104
00039     AE007476.1 104
00040 
00041 This example file is clearly using RNA, so you might want the alignment object
00042 (and the SeqRecord objects it holds) to reflect this, rather than simple using
00043 the default single letter alphabet as shown above.  You can do this with an
00044 optional argument to the Bio.AlignIO.read() function:
00045 
00046     >>> from Bio import AlignIO
00047     >>> from Bio.Alphabet import generic_rna
00048     >>> align = AlignIO.read("Stockholm/simple.sth", "stockholm",
00049     ...                      alphabet=generic_rna)
00050     >>> print align
00051     RNAAlphabet() alignment with 2 rows and 104 columns
00052     UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-G...UGU AP001509.1
00053     AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-C...GAU AE007476.1
00054 
00055 In addition to the sequences themselves, this example alignment also includes
00056 some GR lines for the secondary structure of the sequences.  These are
00057 strings, with one character for each letter in the associated sequence:
00058 
00059     >>> for record in align:
00060     ...     print record.id
00061     ...     print record.seq
00062     ...     print record.letter_annotations['secondary_structure']
00063     AP001509.1
00064     UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
00065     -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
00066     AE007476.1
00067     AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
00068     -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>---------------
00069 
00070 Any general annotation for each row is recorded in the SeqRecord's annotations
00071 dictionary.  You can output this alignment in many different file formats
00072 using Bio.AlignIO.write(), or the MultipleSeqAlignment object's format method:
00073 
00074     >>> print align.format("fasta")
00075     >AP001509.1
00076     UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-A
00077     GGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
00078     >AE007476.1
00079     AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAA
00080     GGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
00081     <BLANKLINE>
00082 
00083 Most output formats won't be able to hold the annotation possible in a
00084 Stockholm file:
00085 
00086     >>> print align.format("stockholm")
00087     # STOCKHOLM 1.0
00088     #=GF SQ 2
00089     AP001509.1 UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
00090     #=GS AP001509.1 AC AP001509.1
00091     #=GS AP001509.1 DE AP001509.1
00092     #=GR AP001509.1 SS -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
00093     AE007476.1 AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
00094     #=GS AE007476.1 AC AE007476.1
00095     #=GS AE007476.1 DE AE007476.1
00096     #=GR AE007476.1 SS -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>---------------
00097     //
00098     <BLANKLINE>
00099 
00100 Note that when writing Stockholm files, AlignIO does not break long sequences
00101 up and interleave them (as in the input file shown above).  The standard
00102 allows this simpler layout, and it is more likely to be understood by other
00103 tools. 
00104 
00105 Finally, as an aside, it can sometimes be useful to use Bio.SeqIO.parse() to
00106 iterate over the alignment rows as SeqRecord objects - rather than working
00107 with Alignnment objects. Again, if you want to you can specify this is RNA:
00108 
00109     >>> from Bio import SeqIO
00110     >>> from Bio.Alphabet import generic_rna
00111     >>> for record in SeqIO.parse("Stockholm/simple.sth", "stockholm",
00112     ...                           alphabet=generic_rna):
00113     ...     print record.id
00114     ...     print record.seq
00115     ...     print record.letter_annotations['secondary_structure']
00116     AP001509.1
00117     UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
00118     -----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
00119     AE007476.1
00120     AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-CACGA-CGUUUCUACAAGGUG-CCGG-AA-CACCUAACAAUAAGUAAGUCAGCAGUGAGAU
00121     -----------------<<<<<<<<-----<<.<<-------->>.>>----------.<<<<<--------->>>>>.-->>>>>>>>---------------
00122 
00123 Remember that if you slice a SeqRecord, the per-letter-annotions like the
00124 secondary structure string here, are also sliced:
00125 
00126     >>> sub_record = record[10:20]
00127     >>> print sub_record.seq
00128     AUCGUUUUAC
00129     >>> print sub_record.letter_annotations['secondary_structure']
00130     -------<<<
00131 """
00132 __docformat__ = "epytext en" #not just plaintext
00133 from Bio.Seq import Seq
00134 from Bio.SeqRecord import SeqRecord
00135 from Bio.Align import MultipleSeqAlignment
00136 from Interfaces import AlignmentIterator, SequentialAlignmentWriter
00137 
00138 class StockholmWriter(SequentialAlignmentWriter):
00139     """Stockholm/PFAM alignment writer."""
00140 
00141     #These dictionaries should be kept in sync with those
00142     #defined in the StockholmIterator class.
00143     pfam_gr_mapping = {"secondary_structure" : "SS",
00144                        "surface_accessibility" : "SA",
00145                        "transmembrane" : "TM",
00146                        "posterior_probability" : "PP",
00147                        "ligand_binding" : "LI",
00148                        "active_site" : "AS",
00149                        "intron" : "IN"}
00150     #Following dictionary deliberately does not cover AC, DE or DR
00151     pfam_gs_mapping = {"organism" : "OS",
00152                        "organism_classification" : "OC",
00153                        "look" : "LO"}
00154 
00155     def write_alignment(self, alignment):
00156         """Use this to write (another) single alignment to an open file.
00157         
00158         Note that sequences and their annotation are recorded
00159         together (rather than having a block of annotation followed
00160         by a block of aligned sequences).
00161         """
00162         count = len(alignment)
00163         
00164         self._length_of_sequences = alignment.get_alignment_length()
00165         self._ids_written = []
00166         
00167         #NOTE - For now, the alignment object does not hold any per column
00168         #or per alignment annotation - only per sequence.
00169         
00170         if count == 0:
00171             raise ValueError("Must have at least one sequence")
00172         if self._length_of_sequences == 0:
00173             raise ValueError("Non-empty sequences are required")
00174 
00175         self.handle.write("# STOCKHOLM 1.0\n")
00176         self.handle.write("#=GF SQ %i\n" % count)
00177         for record in alignment:
00178             self._write_record(record)
00179         self.handle.write("//\n")
00180 
00181     def _write_record(self, record):
00182         """Write a single SeqRecord to the file"""
00183         if self._length_of_sequences != len(record.seq):
00184             raise ValueError("Sequences must all be the same length")
00185 
00186         #For the case for stockholm to stockholm, try and use record.name
00187         seq_name = record.id
00188         if record.name is not None:
00189             if "accession" in record.annotations:
00190                 if record.id == record.annotations["accession"]:
00191                     seq_name = record.name
00192 
00193         #In the Stockholm file format, spaces are not allowed in the id
00194         seq_name = seq_name.replace(" ","_")
00195 
00196         if "start" in record.annotations \
00197         and  "end" in record.annotations:
00198             suffix = "/%s-%s" % (str(record.annotations["start"]),
00199                                  str(record.annotations["end"]))
00200             if seq_name[-len(suffix):] != suffix:
00201                 seq_name = "%s/%s-%s" % (seq_name,
00202                                         str(record.annotations["start"]),
00203                                         str(record.annotations["end"]))
00204 
00205         if seq_name in self._ids_written:
00206             raise ValueError("Duplicate record identifier: %s" % seq_name)
00207         self._ids_written.append(seq_name)
00208         self.handle.write("%s %s\n" % (seq_name, record.seq.tostring()))
00209 
00210         #The recommended placement for GS lines (per sequence annotation)
00211         #is above the alignment (as a header block) or just below the
00212         #corresponding sequence.
00213         #
00214         #The recommended placement for GR lines (per sequence per column
00215         #annotation such as secondary structure) is just below the
00216         #corresponding sequence.
00217         #
00218         #We put both just below the corresponding sequence as this allows
00219         #us to write the file using a single pass through the records.
00220 
00221         #AC = Accession
00222         if "accession" in record.annotations:
00223             self.handle.write("#=GS %s AC %s\n" \
00224                 % (seq_name, self.clean(record.annotations["accession"])))
00225         elif record.id:
00226             self.handle.write("#=GS %s AC %s\n" \
00227                 % (seq_name, self.clean(record.id)))
00228         
00229         #DE = description
00230         if record.description:
00231             self.handle.write("#=GS %s DE %s\n" \
00232                 % (seq_name, self.clean(record.description)))
00233 
00234         #DE = database links
00235         for xref in record.dbxrefs:
00236             self.handle.write("#=GS %s DR %s\n" \
00237                 % (seq_name, self.clean(xref)))
00238 
00239         #GS = other per sequence annotation
00240         for key, value in record.annotations.iteritems():
00241             if key in self.pfam_gs_mapping:
00242                 data = self.clean(str(value))
00243                 if data:
00244                     self.handle.write("#=GS %s %s %s\n" \
00245                                       % (seq_name,
00246                                          self.clean(self.pfam_gs_mapping[key]),
00247                                          data))
00248             else:
00249                 #It doesn't follow the PFAM standards, but should we record
00250                 #this data anyway?
00251                 pass
00252 
00253         #GR = per row per column sequence annotation
00254         for key, value in record.letter_annotations.iteritems():
00255             if key in self.pfam_gr_mapping and len(str(value))==len(record.seq):
00256                 data = self.clean(str(value))
00257                 if data:
00258                     self.handle.write("#=GR %s %s %s\n" \
00259                                       % (seq_name,
00260                                          self.clean(self.pfam_gr_mapping[key]),
00261                                          data))
00262             else:
00263                 #It doesn't follow the PFAM standards, but should we record
00264                 #this data anyway?
00265                 pass
00266         
00267 class StockholmIterator(AlignmentIterator):
00268     """Loads a Stockholm file from PFAM into MultipleSeqAlignment objects.
00269 
00270     The file may contain multiple concatenated alignments, which are loaded
00271     and returned incrementally.
00272 
00273     This parser will detect if the Stockholm file follows the PFAM
00274     conventions for sequence specific meta-data (lines starting #=GS
00275     and #=GR) and populates the SeqRecord fields accordingly.
00276     
00277     Any annotation which does not follow the PFAM conventions is currently
00278     ignored.
00279 
00280     If an accession is provided for an entry in the meta data, IT WILL NOT
00281     be used as the record.id (it will be recorded in the record's
00282     annotations).  This is because some files have (sub) sequences from
00283     different parts of the same accession (differentiated by different
00284     start-end positions).
00285 
00286     Wrap-around alignments are not supported - each sequences must be on
00287     a single line.  However, interlaced sequences should work.
00288 
00289     For more information on the file format, please see:
00290     http://www.bioperl.org/wiki/Stockholm_multiple_alignment_format
00291     http://www.cgb.ki.se/cgb/groups/sonnhammer/Stockholm.html
00292 
00293     For consistency with BioPerl and EMBOSS we call this the "stockholm"
00294     format.
00295     """
00296 
00297     #These dictionaries should be kept in sync with those
00298     #defined in the PfamStockholmWriter class.
00299     pfam_gr_mapping = {"SS" : "secondary_structure",
00300                        "SA" : "surface_accessibility",
00301                        "TM" : "transmembrane",
00302                        "PP" : "posterior_probability",
00303                        "LI" : "ligand_binding",
00304                        "AS" : "active_site",
00305                        "IN" : "intron"}
00306     #Following dictionary deliberately does not cover AC, DE or DR
00307     pfam_gs_mapping = {"OS" : "organism",
00308                        "OC" : "organism_classification",
00309                        "LO" : "look"}
00310 
00311     def next(self):
00312         try:
00313             line = self._header
00314             del self._header
00315         except AttributeError:
00316             line = self.handle.readline()
00317         if not line:
00318             #Empty file - just give up.
00319             raise StopIteration
00320         if not line.strip() == '# STOCKHOLM 1.0':
00321             raise ValueError("Did not find STOCKHOLM header")
00322             #import sys
00323             #print >> sys.stderr, 'Warning file does not start with STOCKHOLM 1.0'
00324 
00325         # Note: If this file follows the PFAM conventions, there should be
00326         # a line containing the number of sequences, e.g. "#=GF SQ 67"
00327         # We do not check for this - perhaps we should, and verify that
00328         # if present it agrees with our parsing.
00329 
00330         seqs = {}
00331         ids = []
00332         gs = {}
00333         gr = {}
00334         gf = {}
00335         passed_end_alignment = False
00336         while 1:
00337             line = self.handle.readline()
00338             if not line: break #end of file
00339             line = line.strip() #remove trailing \n
00340             if line == '# STOCKHOLM 1.0':
00341                 self._header = line
00342                 break
00343             elif line == "//":
00344                 #The "//" line indicates the end of the alignment.
00345                 #There may still be more meta-data
00346                 passed_end_alignment = True
00347             elif line == "":
00348                 #blank line, ignore
00349                 pass
00350             elif line[0] != "#":
00351                 #Sequence
00352                 #Format: "<seqname> <sequence>"
00353                 assert not passed_end_alignment
00354                 parts = [x.strip() for x in line.split(" ",1)]
00355                 if len(parts) != 2:
00356                     #This might be someone attempting to store a zero length sequence?
00357                     raise ValueError("Could not split line into identifier " \
00358                                       + "and sequence:\n" + line)
00359                 id, seq = parts
00360                 if id not in ids:
00361                     ids.append(id)
00362                 seqs.setdefault(id, '')
00363                 seqs[id] += seq.replace(".","-")
00364             elif len(line) >= 5:
00365                 #Comment line or meta-data
00366                 if line[:5] == "#=GF ":
00367                     #Generic per-File annotation, free text
00368                     #Format: #=GF <feature> <free text>
00369                     feature, text = line[5:].strip().split(None,1)
00370                     #Each feature key could be used more than once,
00371                     #so store the entries as a list of strings.
00372                     if feature not in gf:
00373                         gf[feature] = [text]
00374                     else:
00375                         gf[feature].append(text)
00376                 elif line[:5] == '#=GC ':
00377                     #Generic per-Column annotation, exactly 1 char per column
00378                     #Format: "#=GC <feature> <exactly 1 char per column>"
00379                     pass
00380                 elif line[:5] == '#=GS ':
00381                     #Generic per-Sequence annotation, free text
00382                     #Format: "#=GS <seqname> <feature> <free text>"
00383                     id, feature, text = line[5:].strip().split(None,2)
00384                     #if id not in ids:
00385                     #    ids.append(id)
00386                     if id not in gs:
00387                         gs[id] = {}
00388                     if feature not in gs[id]:
00389                         gs[id][feature] = [text]
00390                     else:
00391                         gs[id][feature].append(text)
00392                 elif line[:5] == "#=GR ":
00393                     #Generic per-Sequence AND per-Column markup
00394                     #Format: "#=GR <seqname> <feature> <exactly 1 char per column>"
00395                     id, feature, text = line[5:].strip().split(None,2)
00396                     #if id not in ids:
00397                     #    ids.append(id)
00398                     if id not in gr:
00399                         gr[id] = {}
00400                     if feature not in gr[id]:
00401                         gr[id][feature] = ""
00402                     gr[id][feature] += text.strip() # append to any previous entry
00403                     #TODO - Should we check the length matches the alignment length?
00404                     #       For iterlaced sequences the GR data can be split over
00405                     #       multiple lines
00406             #Next line...            
00407 
00408 
00409         assert len(seqs) <= len(ids)
00410         #assert len(gs)   <= len(ids)
00411         #assert len(gr)   <= len(ids)
00412 
00413         self.ids = ids
00414         self.sequences = seqs
00415         self.seq_annotation = gs
00416         self.seq_col_annotation = gr
00417 
00418         if ids and seqs:
00419 
00420             if self.records_per_alignment is not None \
00421             and self.records_per_alignment != len(ids):
00422                 raise ValueError("Found %i records in this alignment, told to expect %i" \
00423                                  % (len(ids), self.records_per_alignment))
00424 
00425             alignment_length = len(seqs.values()[0])
00426             records = [] #Alignment obj will put them all in a list anyway
00427             for id in ids:
00428                 seq = seqs[id]
00429                 if alignment_length != len(seq):
00430                     raise ValueError("Sequences have different lengths, or repeated identifier")
00431                 name, start, end = self._identifier_split(id)
00432                 record = SeqRecord(Seq(seq, self.alphabet),
00433                                    id = id, name = name, description = id,
00434                                    annotations = {"accession":name})
00435                 #Accession will be overridden by _populate_meta_data if an explicit
00436                 #accession is provided:
00437                 record.annotations["accession"]=name
00438 
00439                 if start is not None:
00440                     record.annotations["start"] = start
00441                 if end is not None:
00442                     record.annotations["end"] = end
00443 
00444                 self._populate_meta_data(id, record)
00445                 records.append(record)
00446             alignment = MultipleSeqAlignment(records, self.alphabet)
00447 
00448             #TODO - Introduce an annotated alignment class?
00449             #For now, store the annotation a new private property:
00450             alignment._annotations = gr
00451 
00452             return alignment
00453         else:
00454             raise StopIteration
00455 
00456 
00457     def _identifier_split(self, identifier):
00458         """Returns (name,start,end) string tuple from an identier."""
00459         if identifier.find("/")!=-1:
00460             name, start_end = identifier.rsplit("/",1)
00461             if start_end.count("-")==1:
00462                 try:
00463                     start, end = map(int, start_end.split("-"))
00464                     return (name, start, end)
00465                 except ValueError:
00466                     # Non-integers after final '/' - fall through
00467                     pass
00468         return (identifier, None, None)
00469     
00470     def _get_meta_data(self, identifier, meta_dict):
00471         """Takes an itentifier and returns dict of all meta-data matching it.
00472 
00473         For example, given "Q9PN73_CAMJE/149-220" will return all matches to
00474         this or "Q9PN73_CAMJE" which the identifier without its /start-end
00475         suffix.
00476 
00477         In the example below, the suffix is required to match the AC, but must
00478         be removed to match the OS and OC meta-data::
00479 
00480             # STOCKHOLM 1.0
00481             #=GS Q9PN73_CAMJE/149-220  AC Q9PN73
00482             ...
00483             Q9PN73_CAMJE/149-220               NKA...
00484             ...
00485             #=GS Q9PN73_CAMJE OS Campylobacter jejuni
00486             #=GS Q9PN73_CAMJE OC Bacteria 
00487 
00488         This function will return an empty dictionary if no data is found."""
00489         name, start, end = self._identifier_split(identifier)
00490         if name==identifier:
00491             identifier_keys = [identifier]
00492         else:
00493             identifier_keys = [identifier, name]
00494         answer = {}
00495         for identifier_key in identifier_keys:
00496             try:
00497                 for feature_key in meta_dict[identifier_key]:
00498                     answer[feature_key] = meta_dict[identifier_key][feature_key]
00499             except KeyError:
00500                 pass
00501         return answer
00502 
00503     def _populate_meta_data(self, identifier, record):
00504         """Adds meta-date to a SecRecord's annotations dictionary.
00505 
00506         This function applies the PFAM conventions."""
00507 
00508         seq_data = self._get_meta_data(identifier, self.seq_annotation)
00509         for feature in seq_data:
00510             #Note this dictionary contains lists!
00511             if feature=="AC" : #ACcession number
00512                 assert len(seq_data[feature])==1
00513                 record.annotations["accession"]=seq_data[feature][0]
00514             elif feature=="DE" : #DEscription
00515                 record.description = "\n".join(seq_data[feature])
00516             elif feature=="DR" : #Database Reference
00517                 #Should we try and parse the strings?
00518                 record.dbxrefs = seq_data[feature]
00519             elif feature in self.pfam_gs_mapping:
00520                 record.annotations[self.pfam_gs_mapping[feature]] = ", ".join(seq_data[feature])
00521             else:
00522                 #Ignore it?
00523                 record.annotations["GS:" + feature] = ", ".join(seq_data[feature])
00524 
00525         #Now record the per-letter-annotations
00526         seq_col_data = self._get_meta_data(identifier, self.seq_col_annotation)
00527         for feature in seq_col_data:
00528             #Note this dictionary contains strings!
00529             if feature in self.pfam_gr_mapping:
00530                 record.letter_annotations[self.pfam_gr_mapping[feature]] = seq_col_data[feature]
00531             else:
00532                 #Ignore it?
00533                 record.letter_annotations["GR:" + feature] = seq_col_data[feature]
00534     
00535 def _test():
00536     """Run the Bio.SeqIO module's doctests.
00537 
00538     This will try and locate the unit tests directory, and run the doctests
00539     from there in order that the relative paths used in the examples work.
00540     """
00541     import doctest
00542     import os
00543     if os.path.isdir(os.path.join("..","..","Tests")):
00544         print "Runing doctests..."
00545         cur_dir = os.path.abspath(os.curdir)
00546         os.chdir(os.path.join("..","..","Tests"))
00547         assert os.path.isfile("Stockholm/simple.sth")
00548         doctest.testmod()
00549         os.chdir(cur_dir)
00550         del cur_dir
00551         print "Done"
00552         
00553 if __name__ == "__main__":
00554     _test()