Back to index

python-biopython  1.60
File.py
Go to the documentation of this file.
00001 # Copyright 1999 by Jeffrey Chang.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """Code for more fancy file handles.
00007 
00008 
00009 Classes:
00010 
00011 UndoHandle     File object decorator with support for undo-like operations.
00012 
00013 StringHandle   Wraps a file object around a string.  This is now DEPRECATED,
00014                and is likely to be removed in a future release of Biopython.
00015 
00016 SGMLStripper   Object that strips SGML.  This is now DEPRECATED, and is likely
00017                to be removed in a future release of Biopython.
00018 
00019 """
00020 # For with statement in Python 2.5
00021 from __future__ import with_statement
00022 import contextlib
00023 import StringIO
00024 
00025 
00026 @contextlib.contextmanager
00027 def as_handle(handleish, mode='r', **kwargs):
00028     """
00029     Context manager for arguments that can be passed to
00030     SeqIO and AlignIO read, write, and parse methods: either file objects or strings.
00031 
00032     When given a string, returns a file handle open to handleish with provided
00033     mode which will be closed when the manager exits.
00034 
00035     All other inputs are returned, and are *not* closed
00036 
00037     - handleish  - Either a string or file handle
00038     - mode       - Mode to open handleish (used only if handleish is a string)
00039     - kwargs     - Further arguments to pass to open(...)
00040 
00041     Example:
00042 
00043     >>> with as_handle('seqs.fasta', 'w') as fp:
00044     ...     fp.write('>test\nACGT')
00045     >>> fp.closed
00046     True
00047 
00048     >>> handle = open('seqs.fasta', 'w')
00049     >>> with as_handle(handle) as fp:
00050     ...     fp.write('>test\nACGT')
00051     >>> fp.closed
00052     False
00053     >>> fp.close()
00054     """
00055     if isinstance(handleish, basestring):
00056         with open(handleish, mode, **kwargs) as fp:
00057             yield fp
00058     else:
00059         yield handleish
00060 
00061 
00062 class UndoHandle(object):
00063     """A Python handle that adds functionality for saving lines.
00064 
00065     Saves lines in a LIFO fashion.
00066 
00067     Added methods:
00068     saveline    Save a line to be returned next time.
00069     peekline    Peek at the next line without consuming it.
00070 
00071     """
00072     def __init__(self, handle):
00073         self._handle = handle
00074         self._saved = []
00075 
00076     def __iter__(self):
00077         return self
00078 
00079     def next(self):
00080         next = self.readline()
00081         if not next:
00082             raise StopIteration
00083         return next
00084 
00085     def readlines(self, *args, **keywds):
00086         lines = self._saved + self._handle.readlines(*args,**keywds)
00087         self._saved = []
00088         return lines
00089 
00090     def readline(self, *args, **keywds):
00091         if self._saved:
00092             line = self._saved.pop(0)
00093         else:
00094             line = self._handle.readline(*args,**keywds)
00095         return line
00096 
00097     def read(self, size=-1):
00098         if size == -1:
00099             saved = "".join(self._saved)
00100             self._saved[:] = []
00101         else:
00102             saved = ''
00103             while size > 0 and self._saved:
00104                 if len(self._saved[0]) <= size:
00105                     size = size - len(self._saved[0])
00106                     saved = saved + self._saved.pop(0)
00107                 else:
00108                     saved = saved + self._saved[0][:size]
00109                     self._saved[0] = self._saved[0][size:]
00110                     size = 0
00111         return saved + self._handle.read(size)
00112 
00113     def saveline(self, line):
00114         if line:
00115             self._saved = [line] + self._saved
00116 
00117     def peekline(self):
00118         if self._saved:
00119             line = self._saved[0]
00120         else:
00121             line = self._handle.readline()
00122             self.saveline(line)
00123         return line
00124 
00125     def tell(self):
00126         lengths = map(len, self._saved)
00127         sum = reduce(lambda x, y: x+y, lengths, 0)
00128         return self._handle.tell() - sum
00129 
00130     def seek(self, *args):
00131         self._saved = []
00132         self._handle.seek(*args)
00133 
00134     def __getattr__(self, attr):
00135         return getattr(self._handle, attr)
00136 
00137     def __enter__(self):
00138         return self
00139 
00140     def __exit__(self, type, value, traceback):
00141         self._handle.close()
00142 
00143 
00144 # I could make this faster by using cStringIO.
00145 # However, cStringIO (in v1.52) does not implement the
00146 # readlines method.
00147 class StringHandle(StringIO.StringIO):
00148     def __init__(self, buffer=''):
00149         import warnings
00150         import Bio
00151         warnings.warn("This class is deprecated, and is likely to be removed in a future version of Biopython. Please use the class StringIO in the module StringIO in the Python standard library instead", Bio.BiopythonDeprecationWarning)
00152         StringIO.StringIO.__init__(self, buffer)
00153 
00154 try:
00155     import sgmllib
00156 except ImportError:
00157     #This isn't available on Python 3, but we don't care much as SGMLStripper
00158     #is obsolete
00159     pass
00160 else:
00161     class SGMLStripper(object):
00162         """Object to strip SGML tags (OBSOLETE)."""
00163         class MyParser(sgmllib.SGMLParser):
00164             def __init__(self):
00165                 sgmllib.SGMLParser.__init__(self)
00166                 self.data = ''
00167             def handle_data(self, data):
00168                 self.data = self.data + data
00169 
00170         def __init__(self):
00171             import warnings
00172             import Bio
00173             warnings.warn("This class is deprecated, and is likely to be removed in a future version of Biopython", Bio.BiopythonDeprecationWarning)
00174             self._parser = SGMLStripper.MyParser()
00175 
00176         def strip(self, str):
00177             """S.strip(str) -> string
00178 
00179             Strip the SGML tags from str.
00180 
00181             """
00182             if not str:  # empty string, don't do anything.
00183                 return ''
00184             # I need to make sure that I don't return an empty string if
00185             # the buffer is not empty.  This can happen if there's a newline
00186             # character embedded within a tag.  Thus, I'll first check to
00187             # see if the last character is a newline.  If it is, and it's stripped
00188             # away, I'll add it back.
00189             is_newline = str[-1] in ['\n', '\r']
00190 
00191             self._parser.data = ''    # clear the parser's data (don't reset)
00192             self._parser.feed(str)
00193             if self._parser.data:
00194                 str = self._parser.data
00195             elif is_newline:
00196                 str = '\n'
00197             else:
00198                 str = ''
00199             return str
00200 
00201