Back to index

python-biopython  1.60
SeqRecord.py
Go to the documentation of this file.
00001 # Copyright 2000-2002 Andrew Dalke.
00002 # Copyright 2002-2004 Brad Chapman.
00003 # Copyright 2006-2010 by Peter Cock.
00004 # All rights reserved.
00005 # This code is part of the Biopython distribution and governed by its
00006 # license.  Please see the LICENSE file that should have been included
00007 # as part of this package.
00008 """Represent a Sequence Record, a sequence with annotation."""
00009 __docformat__ = "epytext en" #Simple markup to show doctests nicely
00010 
00011 # NEEDS TO BE SYNCH WITH THE REST OF BIOPYTHON AND BIOPERL
00012 # In particular, the SeqRecord and BioSQL.BioSeq.DBSeqRecord classes
00013 # need to be in sync (this is the BioSQL "Database SeqRecord", see
00014 # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class)
00015 
00016 class _RestrictedDict(dict):
00017     """Dict which only allows sequences of given length as values (PRIVATE).
00018 
00019     This simple subclass of the Python dictionary is used in the SeqRecord
00020     object for holding per-letter-annotations.  This class is intended to
00021     prevent simple errors by only allowing python sequences (e.g. lists,
00022     strings and tuples) to be stored, and only if their length matches that
00023     expected (the length of the SeqRecord's seq object).  It cannot however
00024     prevent the entries being edited in situ (for example appending entries
00025     to a list).
00026 
00027     >>> x = _RestrictedDict(5)
00028     >>> x["test"] = "hello"
00029     >>> x
00030     {'test': 'hello'}
00031 
00032     Adding entries which don't have the expected length are blocked:
00033 
00034     >>> x["test"] = "hello world"
00035     Traceback (most recent call last):
00036     ...
00037     TypeError: We only allow python sequences (lists, tuples or strings) of length 5.
00038 
00039     The expected length is stored as a private attribute,
00040 
00041     >>> x._length
00042     5
00043 
00044     In order that the SeqRecord (and other objects using this class) can be
00045     pickled, for example for use in the multiprocessing library, we need to
00046     be able to pickle the restricted dictionary objects.
00047     
00048     Using the default protocol, which is 0 on Python 2.x,
00049 
00050     >>> import pickle
00051     >>> y = pickle.loads(pickle.dumps(x))
00052     >>> y
00053     {'test': 'hello'}
00054     >>> y._length
00055     5
00056 
00057     Using the highest protocol, which is 2 on Python 2.x,
00058 
00059     >>> import pickle
00060     >>> z = pickle.loads(pickle.dumps(x, pickle.HIGHEST_PROTOCOL))
00061     >>> z
00062     {'test': 'hello'}
00063     >>> z._length
00064     5
00065     """
00066 
00067     def __init__(self, length):
00068         """Create an EMPTY restricted dictionary."""
00069         dict.__init__(self)
00070         self._length = int(length)
00071 
00072     def __setitem__(self, key, value):
00073         #The check hasattr(self, "_length") is to cope with pickle protocol 2
00074         #I couldn't seem to avoid this with __getstate__ and __setstate__
00075         if not hasattr(value,"__len__") or not hasattr(value,"__getitem__") \
00076         or (hasattr(self, "_length") and len(value) != self._length):
00077             raise TypeError("We only allow python sequences (lists, tuples or "
00078                             "strings) of length %i." % self._length)
00079         dict.__setitem__(self, key, value)
00080 
00081     def update(self, new_dict):
00082         #Force this to go via our strict __setitem__ method
00083         for (key, value) in new_dict.iteritems():
00084             self[key] = value
00085 
00086 
00087 class SeqRecord(object):
00088     """A SeqRecord object holds a sequence and information about it.
00089 
00090     Main attributes:
00091      - id          - Identifier such as a locus tag (string)
00092      - seq         - The sequence itself (Seq object or similar)
00093 
00094     Additional attributes:
00095      - name        - Sequence name, e.g. gene name (string)
00096      - description - Additional text (string)
00097      - dbxrefs     - List of database cross references (list of strings)
00098      - features    - Any (sub)features defined (list of SeqFeature objects)
00099      - annotations - Further information about the whole sequence (dictionary)
00100                      Most entries are strings, or lists of strings.
00101      - letter_annotations - Per letter/symbol annotation (restricted
00102                      dictionary). This holds Python sequences (lists, strings
00103                      or tuples) whose length matches that of the sequence.
00104                      A typical use would be to hold a list of integers
00105                      representing sequencing quality scores, or a string
00106                      representing the secondary structure.
00107 
00108     You will typically use Bio.SeqIO to read in sequences from files as
00109     SeqRecord objects.  However, you may want to create your own SeqRecord
00110     objects directly (see the __init__ method for further details):
00111 
00112     >>> from Bio.Seq import Seq
00113     >>> from Bio.SeqRecord import SeqRecord
00114     >>> from Bio.Alphabet import IUPAC
00115     >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
00116     ...                         IUPAC.protein),
00117     ...                    id="YP_025292.1", name="HokC",
00118     ...                    description="toxic membrane protein")
00119     >>> print record
00120     ID: YP_025292.1
00121     Name: HokC
00122     Description: toxic membrane protein
00123     Number of features: 0
00124     Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein())
00125 
00126     If you want to save SeqRecord objects to a sequence file, use Bio.SeqIO
00127     for this.  For the special case where you want the SeqRecord turned into
00128     a string in a particular file format there is a format method which uses
00129     Bio.SeqIO internally:
00130 
00131     >>> print record.format("fasta")
00132     >YP_025292.1 toxic membrane protein
00133     MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
00134     <BLANKLINE>
00135 
00136     You can also do things like slicing a SeqRecord, checking its length, etc
00137 
00138     >>> len(record)
00139     44
00140     >>> edited = record[:10] + record[11:]
00141     >>> print edited.seq
00142     MKQHKAMIVAIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
00143     >>> print record.seq
00144     MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
00145     
00146     """
00147     def __init__(self, seq, id = "<unknown id>", name = "<unknown name>",
00148                  description = "<unknown description>", dbxrefs = None,
00149                  features = None, annotations = None,
00150                  letter_annotations = None):
00151         """Create a SeqRecord.
00152 
00153         Arguments:
00154          - seq         - Sequence, required (Seq, MutableSeq or UnknownSeq)
00155          - id          - Sequence identifier, recommended (string)
00156          - name        - Sequence name, optional (string)
00157          - description - Sequence description, optional (string)
00158          - dbxrefs     - Database cross references, optional (list of strings)
00159          - features    - Any (sub)features, optional (list of SeqFeature objects)
00160          - annotations - Dictionary of annotations for the whole sequence
00161          - letter_annotations - Dictionary of per-letter-annotations, values
00162                                 should be strings, list or tuples of the same
00163                                 length as the full sequence.
00164 
00165         You will typically use Bio.SeqIO to read in sequences from files as
00166         SeqRecord objects.  However, you may want to create your own SeqRecord
00167         objects directly.
00168 
00169         Note that while an id is optional, we strongly recommend you supply a
00170         unique id string for each record.  This is especially important
00171         if you wish to write your sequences to a file.
00172 
00173         If you don't have the actual sequence, but you do know its length,
00174         then using the UnknownSeq object from Bio.Seq is appropriate.
00175 
00176         You can create a 'blank' SeqRecord object, and then populate the
00177         attributes later.  
00178         """
00179         if id is not None and not isinstance(id, basestring):
00180             #Lots of existing code uses id=None... this may be a bad idea.
00181             raise TypeError("id argument should be a string")
00182         if not isinstance(name, basestring):
00183             raise TypeError("name argument should be a string")
00184         if not isinstance(description, basestring):
00185             raise TypeError("description argument should be a string")
00186         self._seq = seq
00187         self.id = id
00188         self.name = name
00189         self.description = description
00190 
00191         # database cross references (for the whole sequence)
00192         if dbxrefs is None:
00193             dbxrefs = []
00194         elif not isinstance(dbxrefs, list):
00195             raise TypeError("dbxrefs argument should be a list (of strings)")
00196         self.dbxrefs = dbxrefs
00197         
00198         # annotations about the whole sequence
00199         if annotations is None:
00200             annotations = {}
00201         elif not isinstance(annotations, dict):
00202             raise TypeError("annotations argument should be a dict")
00203         self.annotations = annotations
00204 
00205         if letter_annotations is None:
00206             # annotations about each letter in the sequence
00207             if seq is None:
00208                 #Should we allow this and use a normal unrestricted dict?
00209                 self._per_letter_annotations = _RestrictedDict(length=0)
00210             else:
00211                 try:
00212                     self._per_letter_annotations = \
00213                                               _RestrictedDict(length=len(seq))
00214                 except:
00215                     raise TypeError("seq argument should be a Seq object or similar")
00216         else:
00217             #This will be handled via the property set function, which will
00218             #turn this into a _RestrictedDict and thus ensure all the values
00219             #in the dict are the right length
00220             self.letter_annotations = letter_annotations
00221         
00222         # annotations about parts of the sequence
00223         if features is None:
00224             features = []
00225         elif not isinstance(features, list):
00226             raise TypeError("features argument should be a list (of SeqFeature objects)")
00227         self.features = features
00228 
00229     #TODO - Just make this a read only property?
00230     def _set_per_letter_annotations(self, value):
00231         if not isinstance(value, dict):
00232             raise TypeError("The per-letter-annotations should be a "
00233                             "(restricted) dictionary.")
00234         #Turn this into a restricted-dictionary (and check the entries)
00235         try:
00236             self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
00237         except AttributeError:
00238             #e.g. seq is None
00239             self._per_letter_annotations = _RestrictedDict(length=0)
00240         self._per_letter_annotations.update(value)
00241     letter_annotations = property( \
00242         fget=lambda self : self._per_letter_annotations,
00243         fset=_set_per_letter_annotations,
00244         doc="""Dictionary of per-letter-annotation for the sequence.
00245 
00246         For example, this can hold quality scores used in FASTQ or QUAL files.
00247         Consider this example using Bio.SeqIO to read in an example Solexa
00248         variant FASTQ file as a SeqRecord:
00249 
00250         >>> from Bio import SeqIO
00251         >>> handle = open("Quality/solexa_faked.fastq", "rU")
00252         >>> record = SeqIO.read(handle, "fastq-solexa")
00253         >>> handle.close()
00254         >>> print record.id, record.seq
00255         slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
00256         >>> print record.letter_annotations.keys()
00257         ['solexa_quality']
00258         >>> print record.letter_annotations["solexa_quality"]
00259         [40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5]
00260 
00261         The letter_annotations get sliced automatically if you slice the
00262         parent SeqRecord, for example taking the last ten bases:
00263 
00264         >>> sub_record = record[-10:]
00265         >>> print sub_record.id, sub_record.seq
00266         slxa_0001_1_0001_01 ACGTNNNNNN
00267         >>> print sub_record.letter_annotations["solexa_quality"]
00268         [4, 3, 2, 1, 0, -1, -2, -3, -4, -5]
00269 
00270         Any python sequence (i.e. list, tuple or string) can be recorded in
00271         the SeqRecord's letter_annotations dictionary as long as the length
00272         matches that of the SeqRecord's sequence.  e.g.
00273 
00274         >>> len(sub_record.letter_annotations)
00275         1
00276         >>> sub_record.letter_annotations["dummy"] = "abcdefghij"
00277         >>> len(sub_record.letter_annotations)
00278         2
00279 
00280         You can delete entries from the letter_annotations dictionary as usual:
00281 
00282         >>> del sub_record.letter_annotations["solexa_quality"]
00283         >>> sub_record.letter_annotations
00284         {'dummy': 'abcdefghij'}
00285 
00286         You can completely clear the dictionary easily as follows:
00287 
00288         >>> sub_record.letter_annotations = {}
00289         >>> sub_record.letter_annotations
00290         {}
00291         """)
00292 
00293     def _set_seq(self, value):
00294         #TODO - Add a deprecation warning that the seq should be write only?
00295         if self._per_letter_annotations:
00296             #TODO - Make this a warning? Silently empty the dictionary?
00297             raise ValueError("You must empty the letter annotations first!")
00298         self._seq = value
00299         try:
00300             self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
00301         except AttributeError:
00302             #e.g. seq is None
00303             self._per_letter_annotations = _RestrictedDict(length=0)
00304 
00305     seq = property(fget=lambda self : self._seq,
00306                    fset=_set_seq,
00307                    doc="The sequence itself, as a Seq or MutableSeq object.")
00308 
00309     def __getitem__(self, index):
00310         """Returns a sub-sequence or an individual letter.
00311 
00312         Slicing, e.g. my_record[5:10], returns a new SeqRecord for
00313         that sub-sequence with approriate annotation preserved.  The
00314         name, id and description are kept.
00315 
00316         Any per-letter-annotations are sliced to match the requested
00317         sub-sequence.  Unless a stride is used, all those features
00318         which fall fully within the subsequence are included (with
00319         their locations adjusted accordingly).
00320 
00321         However, the annotations dictionary and the dbxrefs list are
00322         not used for the new SeqRecord, as in general they may not
00323         apply to the subsequence.  If you want to preserve them, you
00324         must explictly copy them to the new SeqRecord yourself.
00325 
00326         Using an integer index, e.g. my_record[5] is shorthand for
00327         extracting that letter from the sequence, my_record.seq[5].
00328 
00329         For example, consider this short protein and its secondary
00330         structure as encoded by the PDB (e.g. H for alpha helices),
00331         plus a simple feature for its histidine self phosphorylation
00332         site:
00333 
00334         >>> from Bio.Seq import Seq
00335         >>> from Bio.SeqRecord import SeqRecord
00336         >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
00337         >>> from Bio.Alphabet import IUPAC
00338         >>> rec = SeqRecord(Seq("MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLAT"
00339         ...                     "EMMSEQDGYLAESINKDIEECNAIIEQFIDYLR",
00340         ...                     IUPAC.protein),
00341         ...                 id="1JOY", name="EnvZ",
00342         ...                 description="Homodimeric domain of EnvZ from E. coli")
00343         >>> rec.letter_annotations["secondary_structure"] = "  S  SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT  "
00344         >>> rec.features.append(SeqFeature(FeatureLocation(20,21),
00345         ...                     type = "Site"))
00346 
00347         Now let's have a quick look at the full record,
00348 
00349         >>> print rec
00350         ID: 1JOY
00351         Name: EnvZ
00352         Description: Homodimeric domain of EnvZ from E. coli
00353         Number of features: 1
00354         Per letter annotation for: secondary_structure
00355         Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR', IUPACProtein())
00356         >>> print rec.letter_annotations["secondary_structure"]
00357           S  SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT  
00358         >>> print rec.features[0].location
00359         [20:21]
00360 
00361         Now let's take a sub sequence, here chosen as the first (fractured)
00362         alpha helix which includes the histidine phosphorylation site:
00363 
00364         >>> sub = rec[11:41]
00365         >>> print sub
00366         ID: 1JOY
00367         Name: EnvZ
00368         Description: Homodimeric domain of EnvZ from E. coli
00369         Number of features: 1
00370         Per letter annotation for: secondary_structure
00371         Seq('RTLLMAGVSHDLRTPLTRIRLATEMMSEQD', IUPACProtein())
00372         >>> print sub.letter_annotations["secondary_structure"]
00373         HHHHHTTTHHHHHHHHHHHHHHHHHHHHHH
00374         >>> print sub.features[0].location
00375         [9:10]
00376 
00377         You can also of course omit the start or end values, for
00378         example to get the first ten letters only:
00379 
00380         >>> print rec[:10]
00381         ID: 1JOY
00382         Name: EnvZ
00383         Description: Homodimeric domain of EnvZ from E. coli
00384         Number of features: 0
00385         Per letter annotation for: secondary_structure
00386         Seq('MAAGVKQLAD', IUPACProtein())
00387 
00388         Or for the last ten letters:
00389 
00390         >>> print rec[-10:]
00391         ID: 1JOY
00392         Name: EnvZ
00393         Description: Homodimeric domain of EnvZ from E. coli
00394         Number of features: 0
00395         Per letter annotation for: secondary_structure
00396         Seq('IIEQFIDYLR', IUPACProtein())
00397 
00398         If you omit both, then you get a copy of the original record (although
00399         lacking the annotations and dbxrefs):
00400 
00401         >>> print rec[:]
00402         ID: 1JOY
00403         Name: EnvZ
00404         Description: Homodimeric domain of EnvZ from E. coli
00405         Number of features: 1
00406         Per letter annotation for: secondary_structure
00407         Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR', IUPACProtein())
00408 
00409         Finally, indexing with a simple integer is shorthand for pulling out
00410         that letter from the sequence directly:
00411 
00412         >>> rec[5]
00413         'K'
00414         >>> rec.seq[5]
00415         'K'
00416         """
00417         if isinstance(index, int):
00418             #NOTE - The sequence level annotation like the id, name, etc
00419             #do not really apply to a single character.  However, should
00420             #we try and expose any per-letter-annotation here?  If so how?
00421             return self.seq[index]
00422         elif isinstance(index, slice):
00423             if self.seq is None:
00424                 raise ValueError("If the sequence is None, we cannot slice it.")
00425             parent_length = len(self)
00426             answer = self.__class__(self.seq[index],
00427                                     id=self.id,
00428                                     name=self.name,
00429                                     description=self.description)
00430             #TODO - The desription may no longer apply.
00431             #It would be safer to change it to something
00432             #generic like "edited" or the default value.
00433             
00434             #Don't copy the annotation dict and dbxefs list,
00435             #they may not apply to a subsequence.
00436             #answer.annotations = dict(self.annotations.iteritems())
00437             #answer.dbxrefs = self.dbxrefs[:]
00438             #TODO - Review this in light of adding SeqRecord objects?
00439             
00440             #TODO - Cope with strides by generating ambiguous locations?
00441             start, stop, step = index.indices(parent_length)
00442             if step == 1:
00443                 #Select relevant features, add them with shifted locations
00444                 #assert str(self.seq)[index] == str(self.seq)[start:stop]
00445                 for f in self.features:
00446                     if f.ref or f.ref_db:
00447                         #TODO - Implement this (with lots of tests)?
00448                         import warnings
00449                         warnings.warn("When slicing SeqRecord objects, any "
00450                               "SeqFeature referencing other sequences (e.g. "
00451                               "from segmented GenBank records) are ignored.")
00452                         continue
00453                     if start <= f.location.nofuzzy_start \
00454                     and f.location.nofuzzy_end <= stop:
00455                         answer.features.append(f._shift(-start))
00456 
00457             #Slice all the values to match the sliced sequence
00458             #(this should also work with strides, even negative strides):
00459             for key, value in self.letter_annotations.iteritems():
00460                 answer._per_letter_annotations[key] = value[index]
00461 
00462             return answer
00463         raise ValueError, "Invalid index"
00464 
00465     def __iter__(self):
00466         """Iterate over the letters in the sequence.
00467 
00468         For example, using Bio.SeqIO to read in a protein FASTA file:
00469 
00470         >>> from Bio import SeqIO
00471         >>> record = SeqIO.read(open("Fasta/loveliesbleeding.pro"),"fasta")
00472         >>> for amino in record:
00473         ...     print amino
00474         ...     if amino == "L": break
00475         X
00476         A
00477         G
00478         L
00479         >>> print record.seq[3]
00480         L
00481 
00482         This is just a shortcut for iterating over the sequence directly:
00483 
00484         >>> for amino in record.seq:
00485         ...     print amino
00486         ...     if amino == "L": break
00487         X
00488         A
00489         G
00490         L
00491         >>> print record.seq[3]
00492         L
00493         
00494         Note that this does not facilitate iteration together with any
00495         per-letter-annotation.  However, you can achieve that using the
00496         python zip function on the record (or its sequence) and the relevant
00497         per-letter-annotation:
00498         
00499         >>> from Bio import SeqIO
00500         >>> rec = SeqIO.read(open("Quality/solexa_faked.fastq", "rU"),
00501         ...                  "fastq-solexa")
00502         >>> print rec.id, rec.seq
00503         slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
00504         >>> print rec.letter_annotations.keys()
00505         ['solexa_quality']
00506         >>> for nuc, qual in zip(rec,rec.letter_annotations["solexa_quality"]):
00507         ...     if qual > 35:
00508         ...         print nuc, qual
00509         A 40
00510         C 39
00511         G 38
00512         T 37
00513         A 36
00514 
00515         You may agree that using zip(rec.seq, ...) is more explicit than using
00516         zip(rec, ...) as shown above.
00517         """
00518         return iter(self.seq)
00519 
00520     def __contains__(self, char):
00521         """Implements the 'in' keyword, searches the sequence.
00522 
00523         e.g.
00524 
00525         >>> from Bio import SeqIO
00526         >>> record = SeqIO.read(open("Fasta/sweetpea.nu"), "fasta")
00527         >>> "GAATTC" in record
00528         False
00529         >>> "AAA" in record
00530         True
00531 
00532         This essentially acts as a proxy for using "in" on the sequence:
00533 
00534         >>> "GAATTC" in record.seq
00535         False
00536         >>> "AAA" in record.seq
00537         True
00538 
00539         Note that you can also use Seq objects as the query,
00540 
00541         >>> from Bio.Seq import Seq
00542         >>> from Bio.Alphabet import generic_dna
00543         >>> Seq("AAA") in record
00544         True
00545         >>> Seq("AAA", generic_dna) in record
00546         True
00547 
00548         See also the Seq object's __contains__ method.
00549         """        
00550         return char in self.seq
00551 
00552 
00553     def __str__(self):
00554         """A human readable summary of the record and its annotation (string).
00555 
00556         The python built in function str works by calling the object's ___str__
00557         method.  e.g.
00558 
00559         >>> from Bio.Seq import Seq
00560         >>> from Bio.SeqRecord import SeqRecord
00561         >>> from Bio.Alphabet import IUPAC
00562         >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
00563         ...                         IUPAC.protein),
00564         ...                    id="YP_025292.1", name="HokC",
00565         ...                    description="toxic membrane protein, small")
00566         >>> print str(record)
00567         ID: YP_025292.1
00568         Name: HokC
00569         Description: toxic membrane protein, small
00570         Number of features: 0
00571         Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein())
00572 
00573         In this example you don't actually need to call str explicity, as the
00574         print command does this automatically:
00575 
00576         >>> print record
00577         ID: YP_025292.1
00578         Name: HokC
00579         Description: toxic membrane protein, small
00580         Number of features: 0
00581         Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein())
00582 
00583         Note that long sequences are shown truncated.
00584         """
00585         lines = []
00586         if self.id:
00587             lines.append("ID: %s" % self.id)
00588         if self.name:
00589             lines.append("Name: %s" % self.name)
00590         if self.description:
00591             lines.append("Description: %s" % self.description)
00592         if self.dbxrefs:
00593             lines.append("Database cross-references: " \
00594                          + ", ".join(self.dbxrefs))
00595         lines.append("Number of features: %i" % len(self.features))
00596         for a in self.annotations:
00597             lines.append("/%s=%s" % (a, str(self.annotations[a])))
00598         if self.letter_annotations:
00599             lines.append("Per letter annotation for: " \
00600                          + ", ".join(self.letter_annotations.keys()))
00601         #Don't want to include the entire sequence,
00602         #and showing the alphabet is useful:
00603         lines.append(repr(self.seq))
00604         return "\n".join(lines)
00605 
00606     def __repr__(self):
00607         """A concise summary of the record for debugging (string).
00608 
00609         The python built in function repr works by calling the object's ___repr__
00610         method.  e.g.
00611 
00612         >>> from Bio.Seq import Seq
00613         >>> from Bio.SeqRecord import SeqRecord
00614         >>> from Bio.Alphabet import generic_protein
00615         >>> rec = SeqRecord(Seq("MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKAT"
00616         ...                    +"GEMKEQTEWHRVVLFGKLAEVASEYLRKGSQVYIEGQLRTRKWTDQ"
00617         ...                    +"SGQDRYTTEVVVNVGGTMQMLGGRQGGGAPAGGNIGGGQPQGGWGQ"
00618         ...                    +"PQQPQGGNQFSGGAQSRPQQSAPAAPSNEPPMDFDDDIPF",
00619         ...                    generic_protein),
00620         ...                 id="NP_418483.1", name="b4059",
00621         ...                 description="ssDNA-binding protein",
00622         ...                 dbxrefs=["ASAP:13298", "GI:16131885", "GeneID:948570"])
00623         >>> print repr(rec)
00624         SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF', ProteinAlphabet()), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570'])
00625 
00626         At the python prompt you can also use this shorthand:
00627 
00628         >>> rec
00629         SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF', ProteinAlphabet()), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570'])
00630 
00631         Note that long sequences are shown truncated. Also note that any
00632         annotations, letter_annotations and features are not shown (as they
00633         would lead to a very long string).
00634         """
00635         return self.__class__.__name__ \
00636          + "(seq=%s, id=%s, name=%s, description=%s, dbxrefs=%s)" \
00637          % tuple(map(repr, (self.seq, self.id, self.name,
00638                             self.description, self.dbxrefs)))
00639 
00640     def format(self, format):
00641         r"""Returns the record as a string in the specified file format.
00642 
00643         The format should be a lower case string supported as an output
00644         format by Bio.SeqIO, which is used to turn the SeqRecord into a
00645         string.  e.g.
00646 
00647         >>> from Bio.Seq import Seq
00648         >>> from Bio.SeqRecord import SeqRecord
00649         >>> from Bio.Alphabet import IUPAC
00650         >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
00651         ...                         IUPAC.protein),
00652         ...                    id="YP_025292.1", name="HokC",
00653         ...                    description="toxic membrane protein")
00654         >>> record.format("fasta")
00655         '>YP_025292.1 toxic membrane protein\nMKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF\n'
00656         >>> print record.format("fasta")
00657         >YP_025292.1 toxic membrane protein
00658         MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
00659         <BLANKLINE>
00660 
00661         The python print command automatically appends a new line, meaning
00662         in this example a blank line is shown.  If you look at the string
00663         representation you can see there is a trailing new line (shown as
00664         slash n) which is important when writing to a file or if
00665         concatenating mutliple sequence strings together.
00666 
00667         Note that this method will NOT work on every possible file format
00668         supported by Bio.SeqIO (e.g. some are for multiple sequences only).
00669         """
00670         #See also the __format__ added for Python 2.6 / 3.0, PEP 3101
00671         #See also the Bio.Align.Generic.Alignment class and its format()
00672         return self.__format__(format)
00673 
00674     def __format__(self, format_spec):
00675         """Returns the record as a string in the specified file format.
00676 
00677         This method supports the python format() function added in
00678         Python 2.6/3.0.  The format_spec should be a lower case string
00679         supported by Bio.SeqIO as an output file format. See also the
00680         SeqRecord's format() method.
00681         """
00682         if not format_spec:
00683             #Follow python convention and default to using __str__
00684             return str(self)    
00685         from Bio import SeqIO
00686         if format_spec in SeqIO._BinaryFormats:
00687             #Return bytes on Python 3
00688             try:
00689                 #This is in Python 2.6+, but we need it on Python 3
00690                 from io import BytesIO
00691                 handle = BytesIO()
00692             except ImportError:
00693                 #Must be on Python 2.5 or older
00694                 from StringIO import StringIO
00695                 handle = StringIO()
00696         else:
00697             from StringIO import StringIO
00698             handle = StringIO()
00699         SeqIO.write(self, handle, format_spec)
00700         return handle.getvalue()
00701 
00702     def __len__(self):
00703         """Returns the length of the sequence.
00704 
00705         For example, using Bio.SeqIO to read in a FASTA nucleotide file:
00706 
00707         >>> from Bio import SeqIO
00708         >>> record = SeqIO.read(open("Fasta/sweetpea.nu"),"fasta")
00709         >>> len(record)
00710         309
00711         >>> len(record.seq)
00712         309
00713         """
00714         return len(self.seq)
00715 
00716     def __nonzero__(self):
00717         """Returns True regardless of the length of the sequence.
00718 
00719         This behaviour is for backwards compatibility, since until the
00720         __len__ method was added, a SeqRecord always evaluated as True.
00721 
00722         Note that in comparison, a Seq object will evaluate to False if it
00723         has a zero length sequence.
00724 
00725         WARNING: The SeqRecord may in future evaluate to False when its
00726         sequence is of zero length (in order to better match the Seq
00727         object behaviour)!
00728         """
00729         return True
00730 
00731     def __add__(self, other):
00732         """Add another sequence or string to this sequence.
00733 
00734         The other sequence can be a SeqRecord object, a Seq object (or
00735         similar, e.g. a MutableSeq) or a plain Python string. If you add
00736         a plain string or a Seq (like) object, the new SeqRecord will simply
00737         have this appended to the existing data. However, any per letter
00738         annotation will be lost:
00739 
00740         >>> from Bio import SeqIO
00741         >>> handle = open("Quality/solexa_faked.fastq", "rU")
00742         >>> record = SeqIO.read(handle, "fastq-solexa")
00743         >>> handle.close()
00744         >>> print record.id, record.seq
00745         slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
00746         >>> print record.letter_annotations.keys()
00747         ['solexa_quality']
00748 
00749         >>> new = record + "ACT"
00750         >>> print new.id, new.seq
00751         slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNNACT
00752         >>> print new.letter_annotations.keys()
00753         []
00754         
00755         The new record will attempt to combine the annotation, but for any
00756         ambiguities (e.g. different names) it defaults to omitting that
00757         annotation.
00758 
00759         >>> from Bio import SeqIO
00760         >>> handle = open("GenBank/pBAD30.gb")
00761         >>> plasmid = SeqIO.read(handle, "gb")
00762         >>> handle.close()
00763         >>> print plasmid.id, len(plasmid)
00764         pBAD30 4923
00765 
00766         Now let's cut the plasmid into two pieces, and join them back up the
00767         other way round (i.e. shift the starting point on this plasmid, have
00768         a look at the annotated features in the original file to see why this
00769         particular split point might make sense):
00770 
00771         >>> left = plasmid[:3765]
00772         >>> right = plasmid[3765:]
00773         >>> new = right + left
00774         >>> print new.id, len(new)
00775         pBAD30 4923
00776         >>> str(new.seq) == str(right.seq + left.seq)
00777         True
00778         >>> len(new.features) == len(left.features) + len(right.features)
00779         True
00780 
00781         When we add the left and right SeqRecord objects, their annotation
00782         is all consistent, so it is all conserved in the new SeqRecord:
00783         
00784         >>> new.id == left.id == right.id == plasmid.id
00785         True
00786         >>> new.name == left.name == right.name == plasmid.name
00787         True
00788         >>> new.description == plasmid.description
00789         True
00790         >>> new.annotations == left.annotations == right.annotations
00791         True
00792         >>> new.letter_annotations == plasmid.letter_annotations
00793         True
00794         >>> new.dbxrefs == left.dbxrefs == right.dbxrefs
00795         True
00796 
00797         However, we should point out that when we sliced the SeqRecord,
00798         any annotations dictionary or dbxrefs list entries were lost.
00799         You can explicitly copy them like this:
00800 
00801         >>> new.annotations = plasmid.annotations.copy()
00802         >>> new.dbxrefs = plasmid.dbxrefs[:]
00803         """
00804         if not isinstance(other, SeqRecord):
00805             #Assume it is a string or a Seq.
00806             #Note can't transfer any per-letter-annotations
00807             return SeqRecord(self.seq + other,
00808                              id = self.id, name = self.name,
00809                              description = self.description,
00810                              features = self.features[:],
00811                              annotations = self.annotations.copy(),
00812                              dbxrefs = self.dbxrefs[:])
00813         #Adding two SeqRecord objects... must merge annotation.
00814         answer = SeqRecord(self.seq + other.seq,
00815                            features = self.features[:],
00816                            dbxrefs = self.dbxrefs[:])
00817         #Will take all the features and all the db cross refs,
00818         l = len(self)
00819         for f in other.features:
00820             answer.features.append(f._shift(l))
00821         del l
00822         for ref in other.dbxrefs:
00823             if ref not in answer.dbxrefs:
00824                 answer.dbxrefs.append(ref)
00825         #Take common id/name/description/annotation
00826         if self.id == other.id:
00827             answer.id = self.id
00828         if self.name == other.name:
00829             answer.name = self.name
00830         if self.description == other.description:
00831             answer.description = self.description
00832         for k,v in self.annotations.iteritems():
00833             if k in other.annotations and other.annotations[k] == v:
00834                 answer.annotations[k] = v
00835         #Can append matching per-letter-annotation
00836         for k,v in self.letter_annotations.iteritems():
00837             if k in other.letter_annotations:
00838                 answer.letter_annotations[k] = v + other.letter_annotations[k]
00839         return answer
00840         
00841     def __radd__(self, other):
00842         """Add another sequence or string to this sequence (from the left).
00843 
00844         This method handles adding a Seq object (or similar, e.g. MutableSeq)
00845         or a plain Python string (on the left) to a SeqRecord (on the right).
00846         See the __add__ method for more details, but for example:
00847 
00848         >>> from Bio import SeqIO
00849         >>> handle = open("Quality/solexa_faked.fastq", "rU")
00850         >>> record = SeqIO.read(handle, "fastq-solexa")
00851         >>> handle.close()
00852         >>> print record.id, record.seq
00853         slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
00854         >>> print record.letter_annotations.keys()
00855         ['solexa_quality']
00856 
00857         >>> new = "ACT" + record
00858         >>> print new.id, new.seq
00859         slxa_0001_1_0001_01 ACTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
00860         >>> print new.letter_annotations.keys()
00861         []
00862         """
00863         if isinstance(other, SeqRecord):
00864             raise RuntimeError("This should have happened via the __add__ of "
00865                                "the other SeqRecord being added!")
00866         #Assume it is a string or a Seq.
00867         #Note can't transfer any per-letter-annotations
00868         offset = len(other)
00869         return SeqRecord(other + self.seq,
00870                          id = self.id, name = self.name,
00871                          description = self.description,
00872                          features = [f._shift(offset) for f in self.features],
00873                          annotations = self.annotations.copy(),
00874                          dbxrefs = self.dbxrefs[:])
00875 
00876     def upper(self):
00877         """Returns a copy of the record with an upper case sequence.
00878 
00879         All the annotation is preserved unchanged. e.g.
00880 
00881         >>> from Bio.Alphabet import generic_dna
00882         >>> from Bio.Seq import Seq
00883         >>> from Bio.SeqRecord import SeqRecord
00884         >>> record = SeqRecord(Seq("acgtACGT", generic_dna), id="Test",
00885         ...                    description = "Made up for this example")
00886         >>> record.letter_annotations["phred_quality"] = [1,2,3,4,5,6,7,8]
00887         >>> print record.upper().format("fastq")
00888         @Test Made up for this example
00889         ACGTACGT
00890         +
00891         "#$%&'()
00892         <BLANKLINE>
00893 
00894         Naturally, there is a matching lower method:
00895         
00896         >>> print record.lower().format("fastq")
00897         @Test Made up for this example
00898         acgtacgt
00899         +
00900         "#$%&'()
00901         <BLANKLINE>
00902         """
00903         return SeqRecord(self.seq.upper(),
00904                          id = self.id, name = self.name,
00905                          description = self.description,
00906                          dbxrefs = self.dbxrefs[:],
00907                          features = self.features[:],
00908                          annotations = self.annotations.copy(),
00909                          letter_annotations=self.letter_annotations.copy())
00910 
00911     def lower(self):
00912         """Returns a copy of the record with a lower case sequence.
00913 
00914         All the annotation is preserved unchanged. e.g.
00915 
00916         >>> from Bio import SeqIO
00917         >>> record = SeqIO.read("Fasta/aster.pro", "fasta")
00918         >>> print record.format("fasta")
00919         >gi|3298468|dbj|BAA31520.1| SAMIPF
00920         GGHVNPAVTFGAFVGGNITLLRGIVYIIAQLLGSTVACLLLKFVTNDMAVGVFSLSAGVG
00921         VTNALVFEIVMTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI
00922         <BLANKLINE>
00923         >>> print record.lower().format("fasta")
00924         >gi|3298468|dbj|BAA31520.1| SAMIPF
00925         gghvnpavtfgafvggnitllrgivyiiaqllgstvaclllkfvtndmavgvfslsagvg
00926         vtnalvfeivmtfglvytvyataidpkkgslgtiapiaigfivgani
00927         <BLANKLINE>
00928 
00929         To take a more annotation rich example,
00930 
00931         >>> from Bio import SeqIO
00932         >>> old = SeqIO.read("EMBL/TRBG361.embl", "embl")
00933         >>> len(old.features)
00934         3
00935         >>> new = old.lower()
00936         >>> len(old.features) == len(new.features)
00937         True
00938         >>> old.annotations["organism"] == new.annotations["organism"]
00939         True
00940         >>> old.dbxrefs == new.dbxrefs
00941         True
00942         """
00943         return SeqRecord(self.seq.lower(),
00944                          id = self.id, name = self.name,
00945                          description = self.description,
00946                          dbxrefs = self.dbxrefs[:],
00947                          features = self.features[:],
00948                          annotations = self.annotations.copy(),
00949                          letter_annotations=self.letter_annotations.copy())
00950 
00951     def reverse_complement(self, id=False, name=False, description=False,
00952                            features=True, annotations=False,
00953                            letter_annotations=True, dbxrefs=False):
00954         """Returns new SeqRecord with reverse complement sequence.
00955 
00956         You can specify the returned record's id, name and description as
00957         strings, or True to keep that of the parent, or False for a default.
00958 
00959         You can specify the returned record's features with a list of
00960         SeqFeature objects, or True to keep that of the parent, or False to
00961         omit them. The default is to keep the original features (with the
00962         strand and locations adjusted).
00963 
00964         You can also specify both the returned record's annotations and
00965         letter_annotations as dictionaries, True to keep that of the parent,
00966         or False to omit them. The default is to keep the original
00967         annotations (with the letter annotations reversed).
00968 
00969         To show what happens to the pre-letter annotations, consider an
00970         example Solexa variant FASTQ file with a single entry, which we'll
00971         read in as a SeqRecord:
00972 
00973         >>> from Bio import SeqIO
00974         >>> handle = open("Quality/solexa_faked.fastq", "rU")
00975         >>> record = SeqIO.read(handle, "fastq-solexa")
00976         >>> handle.close()
00977         >>> print record.id, record.seq
00978         slxa_0001_1_0001_01 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTNNNNNN
00979         >>> print record.letter_annotations.keys()
00980         ['solexa_quality']
00981         >>> print record.letter_annotations["solexa_quality"]
00982         [40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5]
00983 
00984         Now take the reverse complement,
00985 
00986         >>> rc_record = record.reverse_complement(id=record.id+"_rc")
00987         >>> print rc_record.id, rc_record.seq
00988         slxa_0001_1_0001_01_rc NNNNNNACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
00989 
00990         Notice that the per-letter-annotations have also been reversed,
00991         although this may not be appropriate for all cases.
00992 
00993         >>> print rc_record.letter_annotations["solexa_quality"]
00994         [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
00995 
00996         Now for the features, we need a different example. Parsing a GenBank
00997         file is probably the easiest way to get an nice example with features
00998         in it...
00999 
01000         >>> from Bio import SeqIO
01001         >>> handle = open("GenBank/pBAD30.gb")
01002         >>> plasmid = SeqIO.read(handle, "gb")
01003         >>> handle.close()
01004         >>> print plasmid.id, len(plasmid)
01005         pBAD30 4923
01006         >>> plasmid.seq
01007         Seq('GCTAGCGGAGTGTATACTGGCTTACTATGTTGGCACTGATGAGGGTGTCAGTGA...ATG', IUPACAmbiguousDNA())
01008         >>> len(plasmid.features)
01009         13
01010 
01011         Now, let's take the reverse complement of this whole plasmid:
01012 
01013         >>> rc_plasmid = plasmid.reverse_complement(id=plasmid.id+"_rc")
01014         >>> print rc_plasmid.id, len(rc_plasmid)
01015         pBAD30_rc 4923
01016         >>> rc_plasmid.seq
01017         Seq('CATGGGCAAATATTATACGCAAGGCGACAAGGTGCTGATGCCGCTGGCGATTCA...AGC', IUPACAmbiguousDNA())
01018         >>> len(rc_plasmid.features)
01019         13
01020 
01021         Let's compare the first CDS feature - it has gone from being the
01022         second feature (index 1) to the second last feature (index -2), its
01023         strand has changed, and the location switched round.
01024 
01025         >>> print plasmid.features[1]
01026         type: CDS
01027         location: [1081:1960](-)
01028         qualifiers: 
01029             Key: label, Value: ['araC']
01030             Key: note, Value: ['araC regulator of the arabinose BAD promoter']
01031             Key: vntifkey, Value: ['4']
01032         <BLANKLINE>
01033         >>> print rc_plasmid.features[-2]
01034         type: CDS
01035         location: [2963:3842](+)
01036         qualifiers: 
01037             Key: label, Value: ['araC']
01038             Key: note, Value: ['araC regulator of the arabinose BAD promoter']
01039             Key: vntifkey, Value: ['4']
01040         <BLANKLINE>
01041 
01042         You can check this new location, based on the length of the plasmid:
01043 
01044         >>> len(plasmid) - 1081
01045         3842
01046         >>> len(plasmid) - 1960
01047         2963
01048 
01049         Note that if the SeqFeature annotation includes any strand specific
01050         information (e.g. base changes for a SNP), this information is not
01051         ammended, and would need correction after the reverse complement.
01052 
01053         Note trying to reverse complement a protein SeqRecord raises an
01054         exception:
01055 
01056         >>> from Bio.SeqRecord import SeqRecord
01057         >>> from Bio.Seq import Seq
01058         >>> from Bio.Alphabet import IUPAC
01059         >>> protein_rec = SeqRecord(Seq("MAIVMGR", IUPAC.protein), id="Test")
01060         >>> protein_rec.reverse_complement()
01061         Traceback (most recent call last):
01062            ...
01063         ValueError: Proteins do not have complements!
01064 
01065         Also note you can reverse complement a SeqRecord using a MutableSeq:
01066 
01067         >>> from Bio.SeqRecord import SeqRecord
01068         >>> from Bio.Seq import MutableSeq
01069         >>> from Bio.Alphabet import generic_dna
01070         >>> rec = SeqRecord(MutableSeq("ACGT", generic_dna), id="Test")
01071         >>> rec.seq[0] = "T"
01072         >>> print rec.id, rec.seq
01073         Test TCGT
01074         >>> rc = rec.reverse_complement(id=True)
01075         >>> print rc.id, rc.seq
01076         Test ACGA
01077         """
01078         from Bio.Seq import MutableSeq #Lazy to avoid circular imports
01079         if isinstance(self.seq, MutableSeq):
01080             #Currently the MutableSeq reverse complement is in situ
01081             answer = SeqRecord(self.seq.toseq().reverse_complement())
01082         else:
01083             answer = SeqRecord(self.seq.reverse_complement())
01084         if isinstance(id, basestring):
01085             answer.id = id
01086         elif id:
01087             answer.id = self.id
01088         if isinstance(name, basestring):
01089             answer.name = name
01090         elif name:
01091             answer.name = self.name
01092         if isinstance(description, basestring):
01093             answer.description = description
01094         elif description:
01095             answer.description = self.description
01096         if isinstance(dbxrefs, list):
01097             answer.dbxrefs = dbxrefs
01098         elif dbxrefs:
01099             #Copy the old dbxrefs
01100             answer.dbxrefs = self.dbxrefs[:]
01101         if isinstance(features, list):
01102             answer.features = features
01103         elif features:
01104             #Copy the old features, adjusting location and string
01105             l = len(answer)
01106             answer.features = [f._flip(l) for f in self.features]
01107             #The old list should have been sorted by start location,
01108             #reversing it will leave it sorted by what is now the end position,
01109             #so we need to resort in case of overlapping features.
01110             #NOTE - In the common case of gene before CDS (and similar) with
01111             #the exact same locations, this will still maintain gene before CDS
01112             answer.features.sort(key=lambda x : x.location.start.position)
01113         if isinstance(annotations, dict):
01114             answer.annotations = annotations
01115         elif annotations:
01116             #Copy the old annotations,
01117             answer.annotations = self.annotations.copy()
01118         if isinstance(letter_annotations, dict):
01119             answer.letter_annotations = letter_annotations
01120         elif letter_annotations:
01121             #Copy the old per letter annotations, reversing them
01122             for key, value in self.letter_annotations.iteritems():
01123                 answer._per_letter_annotations[key] = value[::-1]
01124         return answer
01125 
01126 def _test():
01127     """Run the Bio.SeqRecord module's doctests (PRIVATE).
01128 
01129     This will try and locate the unit tests directory, and run the doctests
01130     from there in order that the relative paths used in the examples work.
01131     """
01132     import doctest
01133     import os
01134     if os.path.isdir(os.path.join("..","Tests")):
01135         print "Runing doctests..."
01136         cur_dir = os.path.abspath(os.curdir)
01137         os.chdir(os.path.join("..","Tests"))
01138         doctest.testmod()
01139         os.chdir(cur_dir)
01140         del cur_dir
01141         print "Done"
01142     elif os.path.isdir(os.path.join("Tests")):
01143         print "Runing doctests..."
01144         cur_dir = os.path.abspath(os.curdir)
01145         os.chdir(os.path.join("Tests"))
01146         doctest.testmod()
01147         os.chdir(cur_dir)
01148         del cur_dir
01149         print "Done"
01150 
01151 if __name__ == "__main__":
01152     _test()