Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 # Copyright 2003-2009 by Bartek Wilczynski.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 """
00006 Module containing different tools for sequence motif analysis.
00007 
00008 it contains the core Motif class containing various I/O methods
00009 as well as methods for motif comparisons and motif searching in sequences.
00010 It also inlcudes functionality for parsing AlignACE and MEME programs
00011 """
00012 from Bio.Motif._Motif import Motif
00013 from Bio.Motif.Parsers.AlignAce import read as _AlignAce_read
00014 from Bio.Motif.Parsers.MEME import read as _MEME_read
00015 from Bio.Motif.Thresholds import ScoreDistribution
00016 
00017 _parsers={"AlignAce" : _AlignAce_read,
00018           "MEME" : _MEME_read,
00019           }
00020 
00021 def _from_pfm(handle):
00022     return Motif()._from_jaspar_pfm(handle)
00023 
00024 def _from_sites(handle):
00025     return Motif()._from_jaspar_sites(handle)
00026 
00027 _readers={"jaspar-pfm": _from_pfm,
00028           "jaspar-sites": _from_sites
00029           }
00030 
00031 
00032           
00033 def parse(handle,format):
00034     """Parses an output file of motif finding programs.
00035 
00036     Currently supported formats:
00037      - AlignAce
00038      - MEME
00039 
00040     You can also use single-motif formats, although the Bio.Motif.read()
00041     function is simpler to use in this situation.
00042      - jaspar-pfm
00043      - jaspar-sites
00044 
00045     For example:
00046 
00047     >>> from Bio import Motif
00048     >>> for motif in Motif.parse(open("Motif/alignace.out"),"AlignAce"):
00049     ...     print motif.consensus()
00050     TCTACGATTGAG
00051     CTGCACCTAGCTACGAGTGAG
00052     GTGCCCTAAGCATACTAGGCG
00053     GCCACTAGCAGAGCAGGGGGC
00054     CGACTCAGAGGTT
00055     CCACGCTAAGAGAAGTGCCGGAG
00056     GCACGTCCCTGAGCA
00057     GTCCATCGCAAAGCGTGGGGC
00058     GAGATCAGAGGGCCG
00059     TGGACGCGGGG
00060     GACCAGAGCCTCGCATGGGGG
00061     AGCGCGCGTG
00062     GCCGGTTGCTGTTCATTAGG
00063     ACCGACGGCAGCTAAAAGGG
00064     GACGCCGGGGAT
00065     CGACTCGCGCTTACAAGG
00066     """
00067     try:
00068         parser=_parsers[format]
00069         
00070     except KeyError:
00071         try: #not a true parser, try reader formats
00072             reader=_readers[format]
00073         except:
00074             raise ValueError("Wrong parser format")
00075         else: #we have a proper reader 
00076             yield reader(handle)
00077     else: # we have a proper reader
00078         for m in parser(handle).motifs:
00079             yield m
00080 
00081 def read(handle,format):
00082     """Reads a motif from a handle using a specified file-format.
00083 
00084     This supports the same formats as Bio.Motif.parse(), but
00085     only for files containing exactly one record.  For example,
00086     reading a pfm file:
00087 
00088     >>> from Bio import Motif
00089     >>> motif = Motif.read(open("Motif/SRF.pfm"),"jaspar-pfm")
00090     >>> motif.consensus()
00091     Seq('GCCCATATATGG', IUPACUnambiguousDNA())
00092 
00093     Or a single-motif MEME file,
00094 
00095     >>> from Bio import Motif
00096     >>> motif =  Motif.read(open("Motif/meme.out"),"MEME")
00097     >>> motif.consensus()
00098     Seq('CTCAATCGTA', IUPACUnambiguousDNA())
00099 
00100     If the handle contains no records, or more than one record,
00101     an exception is raised:
00102 
00103     >>> from Bio import Motif
00104     >>> motif = Motif.read(open("Motif/alignace.out"),"AlignAce")
00105     Traceback (most recent call last):
00106         ...
00107     ValueError: More than one motif found in handle
00108 
00109     If however you want the first record from a file containing
00110     multiple records this function would raise an exception (as
00111     shown in the example above).  Instead use:
00112 
00113     >>> from Bio import Motif
00114     >>> motif = Motif.parse(open("Motif/alignace.out"),"AlignAce").next()
00115     >>> motif.consensus()
00116     Seq('TCTACGATTGAG', IUPACUnambiguousDNA())
00117 
00118     Use the Bio.Motif.parse(handle, format) function if you want
00119     to read multiple records from the handle.
00120     """
00121     iterator = parse(handle, format)
00122     try:
00123         first = iterator.next()
00124     except StopIteration:
00125         first = None
00126     if first is None:
00127         raise ValueError("No motifs found in handle")
00128     try:
00129         second = iterator.next()
00130     except StopIteration:
00131         second = None
00132     if second is not None:
00133         raise ValueError("More than one motif found in handle")
00134     return first
00135 
00136 
00137 def _test():
00138     """Run the Bio.Motif module's doctests.
00139 
00140     This will try and locate the unit tests directory, and run the doctests
00141     from there in order that the relative paths used in the examples work.
00142     """
00143     import doctest
00144     import os
00145     if os.path.isdir(os.path.join("..","..","Tests")):
00146         print "Runing doctests..."
00147         cur_dir = os.path.abspath(os.curdir)
00148         os.chdir(os.path.join("..","..","Tests"))
00149         doctest.testmod()
00150         os.chdir(cur_dir)
00151         del cur_dir
00152         print "Done"
00153 
00154 if __name__ == "__main__":
00155     #Run the doctests
00156     _test()