Back to index

python-biopython  1.60
Interfaces.py
Go to the documentation of this file.
00001 # Copyright 2006-2009 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 """
00006 Bio.SeqIO support module (not for general use).
00007 
00008 Unless you are writing a new parser or writer for Bio.SeqIO, you should not
00009 use this module.  It provides base classes to try and simplify things.
00010 """
00011 
00012 from Bio.Alphabet import generic_alphabet
00013 
00014 class SequenceIterator(object):
00015     """Base class for building SeqRecord iterators.
00016 
00017     You should write a next() method to return SeqRecord
00018     objects.  You may wish to redefine the __init__
00019     method as well.
00020     """
00021     def __init__(self, handle, alphabet=generic_alphabet):
00022         """Create a SequenceIterator object.
00023 
00024         handle - input file
00025         alphabet - optional, e.g. Bio.Alphabet.generic_protein
00026 
00027         Note when subclassing:
00028         - there should be a single non-optional argument,
00029           the handle.
00030         - you do not have to require an alphabet.
00031         - you can add additional optional arguments."""
00032         self.handle = handle
00033         self.alphabet = alphabet
00034         #####################################################
00035         # You may want to subclass this, for example        #
00036         # to read through the file to find the first record,#
00037         # or if additional arguments are required.          #
00038         #####################################################
00039 
00040     def next(self):
00041         """Return the next record in the file.
00042 
00043         This method should be replaced by any derived class to do something useful."""
00044         raise NotImplementedError("This object should be subclassed")
00045         #####################################################
00046         # You SHOULD subclass this, to split the file up    #
00047         # into your individual records, and convert these   #
00048         # into useful objects, e.g. return SeqRecord object #
00049         #####################################################
00050 
00051     def __iter__(self):
00052         """Iterate over the entries as a SeqRecord objects.
00053 
00054         Example usage for Fasta files:
00055 
00056         myFile = open("example.fasta","r")
00057         myFastaReader = FastaIterator(myFile)
00058         for record in myFastaReader:
00059             print record.id
00060             print record.seq
00061         myFile.close()"""
00062         return iter(self.next, None)
00063 
00064 class InterlacedSequenceIterator(SequenceIterator):
00065     """Base class for any iterator of a non-sequential file type.
00066 
00067     This object is not intended for use directly.
00068     
00069     When writing a parser for any interlaced sequence file where the whole
00070     file must be read in order to extract any single record, then you should
00071     subclass this object.
00072 
00073     All you need to do is to define your own:
00074     (1) __init__ method to parse the file and call self.move_start()
00075     (2) __len__ method to return the number of records
00076     (3) __getitem__ to return any requested record.
00077 
00078     This class will then provide the iterator methods including next(), but relies
00079     on knowing the total number of records and tracking the pending record index in
00080     as self._n
00081 
00082     It is up to the subclassed object to decide if it wants to generate a cache of
00083     SeqRecords when initialised, or simply use its own lists and dicts and create
00084     SeqRecords on request.
00085     """
00086 
00087     def __init__(self):
00088         """Create the object.
00089 
00090         This method should be replaced by any derived class to do something useful."""
00091         #We assume that your implementation of __init__ will ensure self._n=0
00092         self.move_start()
00093         raise NotImplementedError("This object method should be subclassed")
00094         #####################################################
00095         # You SHOULD subclass this                          #
00096         #####################################################
00097 
00098     def __len__(self):
00099         """Return the number of records.
00100 
00101         This method should be replaced by any derived class to do something useful."""
00102         raise NotImplementedError("This object method should be subclassed")
00103         #####################################################
00104         # You SHOULD subclass this                          #
00105         #####################################################
00106 
00107     def __getitem__(self, i):
00108         """Return the requested record.
00109 
00110         This method should be replaced by any derived class to do something
00111         useful.
00112 
00113         It should NOT touch the value of self._n"""
00114         raise NotImplementedError("This object method should be subclassed")
00115         #####################################################
00116         # You SHOULD subclass this                          #
00117         #####################################################
00118 
00119     def move_start(self):
00120         self._n = 0
00121 
00122     def next(self):
00123         next_record = self._n
00124         if next_record < len(self):
00125             self._n = next_record+1
00126             return self[next_record]
00127         else:
00128             #StopIteration
00129             return None
00130     
00131     def __iter__(self):
00132         return iter(self.next, None)
00133 
00134 class SequenceWriter(object):
00135     """This class should be subclassed.
00136 
00137     Interlaced file formats (e.g. Clustal) should subclass directly.
00138 
00139     Sequential file formats (e.g. Fasta, GenBank) should subclass
00140     the SequentialSequenceWriter class instead.
00141     """
00142     def __init__(self, handle):
00143         """Creates the writer object.
00144 
00145         Use the method write_file() to actually record your sequence records."""
00146         self.handle = handle
00147 
00148     def _get_seq_string(self, record):
00149         """Use this to catch errors like the sequence being None."""
00150         try:
00151             #The tostring() method is part of the Seq API, we could instead
00152             #use str(record.seq) but that would give a string "None" if the
00153             #sequence was None, and unpredicatable output if an unexpected
00154             #object was present.
00155             return record.seq.tostring()
00156         except AttributeError:
00157             if record.seq is None:
00158                 #We could silently treat this as an empty sequence, Seq(""),
00159                 #but that would be an implict assumption we should avoid.
00160                 raise TypeError("SeqRecord (id=%s) has None for its sequence." \
00161                                 % record.id)
00162             else:
00163                 raise TypeError("SeqRecord (id=%s) has an invalid sequence." \
00164                                 % record.id)
00165 
00166     def clean(self, text):
00167         """Use this to avoid getting newlines in the output."""
00168         return text.replace("\n", " ").replace("\r", " ").replace("  ", " ")
00169     
00170     def write_file(self, records):
00171         """Use this to write an entire file containing the given records.
00172 
00173         records - A list or iterator returning SeqRecord objects
00174 
00175         Should return the number of records (as an integer).
00176 
00177         This method can only be called once."""
00178         #Note when implementing this, your writer class should NOT close the
00179         #file at the end, but the calling code should.
00180         raise NotImplementedError("This object should be subclassed")
00181         #####################################################
00182         # You SHOULD subclass this                          #
00183         #####################################################
00184 
00185 class SequentialSequenceWriter(SequenceWriter):
00186     """This class should be subclassed.
00187 
00188     It is intended for sequential file formats with an (optional)
00189     header, repeated records, and an (optional) footer.
00190 
00191     In this case (as with interlaced file formats), the user may
00192     simply call the write_file() method and be done.
00193 
00194     However, they may also call the write_header(), followed
00195     by multiple calls to write_record() and/or write_records()
00196     followed finally by write_footer().
00197 
00198     Users must call write_header() and write_footer() even when
00199     the file format concerned doesn't have a header or footer.
00200     This is to try and make life as easy as possible when
00201     switching the output format.
00202     
00203     Note that write_header() cannot require any assumptions about
00204     the number of records.
00205     """
00206     def __init__(self, handle):
00207         self.handle = handle
00208         self._header_written = False
00209         self._record_written = False
00210         self._footer_written = False
00211 
00212     def write_header(self):
00213         assert not self._header_written, "You have aleady called write_header()"
00214         assert not self._record_written, "You have aleady called write_record() or write_records()"
00215         assert not self._footer_written, "You have aleady called write_footer()"
00216         self._header_written = True
00217         
00218     def write_footer(self):
00219         assert self._header_written, "You must call write_header() first"
00220         assert self._record_written, "You have not called write_record() or write_records() yet"
00221         assert not self._footer_written, "You have aleady called write_footer()"
00222         self._footer_written = True
00223 
00224     def write_record(self, record):
00225         """Write a single record to the output file.
00226 
00227         record - a SeqRecord object
00228 
00229         Once you have called write_header() you can call write_record()
00230         and/or write_records() as many times as needed.  Then call
00231         write_footer() and close()."""
00232         assert self._header_written, "You must call write_header() first"
00233         assert not self._footer_written, "You have already called write_footer()"
00234         self._record_written = True
00235         raise NotImplementedError("This object should be subclassed")
00236         #####################################################
00237         # You SHOULD subclass this                          #
00238         #####################################################
00239 
00240     def write_records(self, records):
00241         """Write multiple record to the output file.
00242 
00243         records - A list or iterator returning SeqRecord objects
00244 
00245         Once you have called write_header() you can call write_record()
00246         and/or write_records() as many times as needed.  Then call
00247         write_footer() and close().
00248 
00249         Returns the number of records written.
00250         """
00251         #Default implementation:
00252         assert self._header_written, "You must call write_header() first"
00253         assert not self._footer_written, "You have already called write_footer()"
00254         count = 0
00255         for record in records:
00256             self.write_record(record)
00257             count += 1
00258         #Mark as true, even if there where no records
00259         self._record_written = True
00260         return count
00261 
00262     def write_file(self, records):
00263         """Use this to write an entire file containing the given records.
00264 
00265         records - A list or iterator returning SeqRecord objects
00266 
00267         This method can only be called once.  Returns the number of records
00268         written.
00269         """
00270         self.write_header()
00271         count = self.write_records(records)
00272         self.write_footer()
00273         return count