Back to index

python-biopython  1.60
MAST.py
Go to the documentation of this file.
00001 # Copyright 2008 by Bartek Wilczynski.
00002 # Adapted from Bio.MEME.Parser by Jason A. Hackney.  All rights reserved.
00003 # This code is part of the Biopython distribution and governed by its
00004 # license.  Please see the LICENSE file that should have been included
00005 # as part of this package.
00006 
00007 from Bio.Alphabet import IUPAC
00008 from Bio.Motif.Parsers.MEME import MEMEMotif
00009 
00010 
00011 class Record(object):
00012     """The class for holding the results from a MAST run.
00013     
00014     A MAST.Record holds data about matches between motifs and sequences.
00015     The motifs held by the Record are objects of the class MEMEMotif.
00016     
00017     Methods:
00018     get_motif_by_name (motif_name): returns a MEMEMotif with the given
00019     name.
00020     """
00021 
00022     def __init__ (self):
00023         self.sequences = []
00024         self.version = ""
00025         self.database = ""
00026         self.diagrams = {}
00027         self.alphabet = None
00028         self.motifs = []
00029     
00030     def get_motif_by_name (self, name):
00031         for m in self.motifs:
00032             if m.name == name:
00033                 return m
00034 
00035 def read(handle):
00036     """read(handle)"""
00037     record = Record()
00038     __read_version(record, handle)
00039     __read_database_and_motifs(record, handle)
00040     __read_section_i(record, handle)
00041     __read_section_ii(record, handle)
00042     __read_section_iii(record, handle)
00043     return record
00044 
00045 
00046 # Everything below is private
00047 
00048 
00049 def __read_version(record, handle):
00050     for line in handle:
00051         if "MAST version" in line:
00052             break
00053     else:
00054         raise ValueError("Improper input file. Does not begin with a line with 'MAST version'")
00055     record.version = line.strip().split()[2]
00056 
00057 
00058 def __read_database_and_motifs(record, handle):
00059     for line in handle:
00060         if line.startswith('DATABASE AND MOTIFS'):
00061             break
00062     line = handle.next()
00063     if not line.startswith('****'):
00064         raise ValueError("Line does not start with '****':\n%s" % line)
00065     line = handle.next()
00066     if not 'DATABASE' in line:
00067         raise ValueError("Line does not contain 'DATABASE':\n%s" % line)
00068     words = line.strip().split()
00069     record.database = words[1]
00070     if words[2] == '(nucleotide)':
00071         record.alphabet = IUPAC.unambiguous_dna
00072     elif words[2] == '(peptide)':
00073         record.alphabet = IUPAC.protein
00074     for line in handle:
00075         if 'MOTIF WIDTH' in line:
00076             break
00077     line = handle.next()
00078     if not '----' in line:
00079         raise ValueError("Line does not contain '----':\n%s" % line)
00080     for line in handle:
00081         if not line.strip():
00082             break
00083         words = line.strip().split()
00084         motif = MEMEMotif()
00085         motif.alphabet = record.alphabet
00086         motif.name = words[0]
00087         motif.length = int(words[1])
00088         # motif.add_instance(words[2])
00089         record.motifs.append(motif)
00090 
00091 
00092 def __read_section_i(record, handle):
00093     for line in handle:
00094         if line.startswith('SECTION I:'):
00095             break
00096     for line in handle:
00097         if line.startswith('SEQUENCE NAME'):
00098             break
00099     line = handle.next()
00100     if not line.startswith('---'):
00101         raise ValueError("Line does not start with '---':\n%s" % line)
00102     for line in handle:
00103         if not line.strip():
00104             break
00105         else:
00106             sequence, description_evalue_length = line.split(None, 1)
00107             record.sequences.append(sequence)
00108     line = handle.next()
00109     if not line.startswith('****'):
00110         raise ValueError("Line does not start with '****':\n%s" % line)
00111 
00112 
00113 def __read_section_ii(record, handle):
00114     for line in handle:
00115         if line.startswith('SECTION II:'):
00116             break
00117     for line in handle:
00118         if line.startswith('SEQUENCE NAME'):
00119             break
00120     line = handle.next()
00121     if not line.startswith('---'):
00122         raise ValueError("Line does not start with '---':\n%s" % line)
00123     for line in handle:
00124         if not line.strip():
00125             break
00126         elif line.startswith(" "):
00127             diagram = line.strip()
00128             record.diagrams[sequence] += diagram
00129         else:
00130             sequence, pvalue, diagram = line.split()
00131             record.diagrams[sequence] = diagram
00132     line = handle.next()
00133     if not line.startswith('****'):
00134         raise ValueError("Line does not start with '****':\n%s" % line)
00135 
00136 
00137 def __read_section_iii(record, handle):
00138     for line in handle:
00139         if line.startswith('SECTION III:'):
00140             break
00141     for line in handle:
00142         if line.startswith('****'):
00143             break
00144     for line in handle:
00145         if line.startswith('*****'):
00146             break
00147     for line in handle:
00148         if line.strip():
00149             break