Back to index

python-biopython  1.60
__init__.py
Go to the documentation of this file.
00001 # Copyright 2001-2004 Brad Chapman.
00002 # Revisions copyright 2009-2010 by Peter Cock.
00003 # All rights reserved.
00004 # This code is part of the Biopython distribution and governed by its
00005 # license.  Please see the LICENSE file that should have been included
00006 # as part of this package.
00007 """General mechanisms to access applications in Biopython.
00008 
00009 This module is not intended for direct use. It provides the basic objects which
00010 are subclassed by our command line wrappers, such as:
00011 
00012  - Bio.Align.Applications
00013  - Bio.Blast.Applications
00014  - Bio.Emboss.Applications
00015  - Bio.Sequencing.Applications
00016 
00017 These modules provide wrapper classes for command line tools to help you
00018 construct command line strings by setting the values of each parameter.
00019 The finished command line strings are then normally invoked via the built-in
00020 Python module subprocess.
00021 """
00022 import os, sys
00023 import StringIO
00024 import subprocess
00025 import re
00026 
00027 from subprocess import CalledProcessError as _ProcessCalledError
00028 
00029 from Bio import File
00030 
00031 #Use this regular expresion to test the property names are going to
00032 #be valid as Python properties or arguments
00033 _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
00034 assert _re_prop_name.match("t")
00035 assert _re_prop_name.match("test")
00036 assert _re_prop_name.match("_test") is None # we don't want private names
00037 assert _re_prop_name.match("-test") is None
00038 assert _re_prop_name.match("test_name")
00039 assert _re_prop_name.match("test2")
00040 #These are reserved names in Python itself,
00041 _reserved_names = ["and", "del", "from", "not", "while", "as", "elif",
00042                    "global", "or", "with", "assert", "else", "if", "pass",
00043                    "yield", "break", "except", "import", "print", "class",
00044                    "exec", "in", "raise", "continue", "finally", "is",
00045                    "return", "def", "for", "lambda", "try"]
00046 #These are reserved names due to the way the wrappers work
00047 _local_reserved_names = ["set_parameter"]
00048 
00049 
00050 class ApplicationError(_ProcessCalledError):
00051     """Raised when an application returns a non-zero exit status.
00052     
00053     The exit status will be stored in the returncode attribute, similarly
00054     the command line string used in the cmd attribute, and (if captured)
00055     stdout and stderr as strings.
00056     
00057     This exception is a subclass of subprocess.CalledProcessError.
00058     
00059     >>> err = ApplicationError(-11, "helloworld", "", "Some error text")
00060     >>> err.returncode, err.cmd, err.stdout, err.stderr
00061     (-11, 'helloworld', '', 'Some error text')
00062     >>> print err
00063     Command 'helloworld' returned non-zero exit status -11, 'Some error text'
00064     
00065     """
00066     def __init__(self, returncode, cmd, stdout="", stderr=""):
00067         self.returncode = returncode
00068         self.cmd = cmd
00069         self.stdout = stdout
00070         self.stderr = stderr
00071     
00072     def __str__(self):
00073         #get first line of any stderr message
00074         try:
00075             msg = self.stderr.lstrip().split("\n",1)[0].rstrip()
00076         except:
00077             msg = ""
00078         if msg:
00079             return "Command '%s' returned non-zero exit status %d, %r" \
00080                    % (self.cmd, self.returncode, msg)
00081         else:
00082             return "Command '%s' returned non-zero exit status %d" \
00083                    % (self.cmd, self.returncode)
00084     
00085     def __repr__(self):
00086         return "ApplicationError(%i, %s, %s, %s)" \
00087                % (self.returncode, self.cmd, self.stdout, self.stderr)
00088 
00089 
00090 class AbstractCommandline(object):
00091     """Generic interface for constructing command line strings.
00092 
00093     This class shouldn't be called directly; it should be subclassed to
00094     provide an implementation for a specific application.
00095 
00096     For a usage example we'll show one of the EMBOSS wrappers.  You can set
00097     options when creating the wrapper object using keyword arguments - or
00098     later using their corresponding properties:
00099 
00100     >>> from Bio.Emboss.Applications import WaterCommandline
00101     >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
00102     >>> cline
00103     WaterCommandline(cmd='water', gapopen=10, gapextend=0.5)
00104 
00105     You can instead manipulate the parameters via their properties, e.g.
00106 
00107     >>> cline.gapopen
00108     10
00109     >>> cline.gapopen = 20
00110     >>> cline
00111     WaterCommandline(cmd='water', gapopen=20, gapextend=0.5)
00112 
00113     You can clear a parameter you have already added by 'deleting' the
00114     corresponding property:
00115 
00116     >>> del cline.gapopen
00117     >>> cline.gapopen
00118     >>> cline
00119     WaterCommandline(cmd='water', gapextend=0.5)
00120 
00121     Once you have set the parameters you need, turn the object into a string:
00122 
00123     >>> str(cline)
00124     Traceback (most recent call last):
00125     ...
00126     ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout).
00127 
00128     In this case the wrapper knows certain arguments are required to construct
00129     a valid command line for the tool.  For a complete example,
00130 
00131     >>> from Bio.Emboss.Applications import WaterCommandline
00132     >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5)
00133     >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT"
00134     >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT"
00135     >>> water_cmd.outfile = "temp_water.txt"
00136     >>> print water_cmd
00137     water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
00138     >>> water_cmd
00139     WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
00140 
00141     You would typically run the command line via a standard Python operating
00142     system call using the subprocess module for full control. For the simple
00143     case where you just want to run the command and get the output:
00144 
00145     stdout, stderr = water_cmd()
00146     """
00147     #Note the call example above is not a doctest as we can't handle EMBOSS
00148     #(or any other tool) being missing in the unit tests.
00149     def __init__(self, cmd, **kwargs):
00150         """Create a new instance of a command line wrapper object."""
00151         # Init method - should be subclassed!
00152         # 
00153         # The subclass methods should look like this:
00154         # 
00155         # def __init__(self, cmd="muscle", **kwargs):
00156         #     self.parameters = [...]
00157         #     AbstractCommandline.__init__(self, cmd, **kwargs)
00158         # 
00159         # i.e. There should have an optional argument "cmd" to set the location
00160         # of the executable (with a sensible default which should work if the
00161         # command is on the path on Unix), and keyword arguments.  It should
00162         # then define a list of parameters, all objects derived from the base
00163         # class _AbstractParameter.
00164         # 
00165         # The keyword arguments should be any valid parameter name, and will
00166         # be used to set the associated parameter.
00167         self.program_name = cmd
00168         try:
00169             parameters = self.parameters
00170         except AttributeError:
00171             raise AttributeError("Subclass should have defined self.parameters")
00172         #Create properties for each parameter at run time
00173         aliases = set()
00174         for p in parameters:
00175             for name in p.names:
00176                 if name in aliases:
00177                     raise ValueError("Parameter alias %s multiply defined" \
00178                                      % name)
00179                 aliases.add(name)
00180             name = p.names[-1]
00181             if _re_prop_name.match(name) is None:
00182                 raise ValueError("Final parameter name %s cannot be used as "
00183                                  "an argument or property name in python"
00184                                  % repr(name))
00185             if name in _reserved_names:
00186                 raise ValueError("Final parameter name %s cannot be used as "
00187                                  "an argument or property name because it is "
00188                                  "a reserved word in python" % repr(name))
00189             if name in _local_reserved_names:
00190                 raise ValueError("Final parameter name %s cannot be used as "
00191                                  "an argument or property name due to the "
00192                                  "way the AbstractCommandline class works"
00193                                  % repr(name))
00194             #Beware of binding-versus-assignment confusion issues
00195             def getter(name):
00196                 return lambda x : x._get_parameter(name)
00197             def setter(name):
00198                 return lambda x, value : x.set_parameter(name, value)
00199             def deleter(name):
00200                 return lambda x : x._clear_parameter(name)
00201             doc = p.description
00202             if isinstance(p, _Switch):
00203                 doc += "\n\nThis property controls the addition of the %s " \
00204                        "switch, treat this property as a boolean." % p.names[0]
00205             else:
00206                 doc += "\n\nThis controls the addition of the %s parameter " \
00207                        "and its associated value.  Set this property to the " \
00208                        "argument value required." % p.names[0]
00209             prop = property(getter(name), setter(name), deleter(name), doc)
00210             setattr(self.__class__, name, prop) #magic!
00211         for key, value in kwargs.iteritems():
00212             self.set_parameter(key, value)
00213     
00214     def _validate(self):
00215         """Make sure the required parameters have been set (PRIVATE).
00216 
00217         No return value - it either works or raises a ValueError.
00218 
00219         This is a separate method (called from __str__) so that subclasses may
00220         override it.
00221         """
00222         for p in self.parameters:
00223             #Check for missing required parameters:
00224             if p.is_required and not(p.is_set):
00225                 raise ValueError("Parameter %s is not set." \
00226                                  % p.names[-1])
00227             #Also repeat the parameter validation here, just in case?
00228 
00229     def __str__(self):
00230         """Make the commandline string with the currently set options.
00231 
00232         e.g.
00233         >>> from Bio.Emboss.Applications import WaterCommandline
00234         >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
00235         >>> cline.asequence = "asis:ACCCGGGCGCGGT"
00236         >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
00237         >>> cline.outfile = "temp_water.txt"
00238         >>> print cline
00239         water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
00240         >>> str(cline)
00241         'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
00242         """
00243         self._validate()
00244         commandline = "%s " % self.program_name
00245         for parameter in self.parameters:
00246             if parameter.is_set:
00247                 #This will include a trailing space:
00248                 commandline += str(parameter)
00249         return commandline.strip() # remove trailing space
00250 
00251     def __repr__(self):
00252         """Return a representation of the command line object for debugging.
00253 
00254         e.g.
00255         >>> from Bio.Emboss.Applications import WaterCommandline
00256         >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
00257         >>> cline.asequence = "asis:ACCCGGGCGCGGT"
00258         >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
00259         >>> cline.outfile = "temp_water.txt"
00260         >>> print cline
00261         water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
00262         >>> cline
00263         WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
00264         """
00265         answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name))
00266         for parameter in self.parameters:
00267             if parameter.is_set:
00268                 if isinstance(parameter, _Switch):
00269                     answer += ", %s=True" % parameter.names[-1]
00270                 else:
00271                     answer += ", %s=%s" \
00272                               % (parameter.names[-1], repr(parameter.value))
00273         answer += ")"
00274         return answer
00275 
00276     def _get_parameter(self, name):
00277         """Get a commandline option value."""
00278         for parameter in self.parameters:
00279             if name in parameter.names:
00280                 if isinstance(parameter, _Switch):
00281                     return parameter.is_set
00282                 else:
00283                     return parameter.value
00284         raise ValueError("Option name %s was not found." % name)
00285 
00286     def _clear_parameter(self, name):
00287         """Reset or clear a commandline option value."""
00288         cleared_option = False
00289         for parameter in self.parameters:
00290             if name in parameter.names:
00291                 parameter.value = None
00292                 parameter.is_set = False
00293                 cleared_option = True
00294         if not cleared_option:
00295             raise ValueError("Option name %s was not found." % name)
00296         
00297     def set_parameter(self, name, value = None):
00298         """Set a commandline option for a program.
00299         """
00300         set_option = False
00301         for parameter in self.parameters:
00302             if name in parameter.names:
00303                 if isinstance(parameter, _Switch):
00304                     if value is None:
00305                         import warnings
00306                         warnings.warn("For a switch type argument like %s, "
00307                                       "we expect a boolean.  None is treated "
00308                                       "as FALSE!" % parameter.names[-1])
00309                     parameter.is_set = bool(value)
00310                     set_option = True
00311                 else:
00312                     if value is not None:
00313                         self._check_value(value, name, parameter.checker_function)
00314                         parameter.value = value
00315                     parameter.is_set = True
00316                     set_option = True
00317         if not set_option:
00318             raise ValueError("Option name %s was not found." % name)
00319 
00320     def _check_value(self, value, name, check_function):
00321         """Check whether the given value is valid.
00322 
00323         No return value - it either works or raises a ValueError.
00324 
00325         This uses the passed function 'check_function', which can either
00326         return a [0, 1] (bad, good) value or raise an error. Either way
00327         this function will raise an error if the value is not valid, or
00328         finish silently otherwise.
00329         """
00330         if check_function is not None:
00331             is_good = check_function(value) #May raise an exception
00332             assert is_good in [0,1,True,False]
00333             if not is_good:
00334                 raise ValueError("Invalid parameter value %r for parameter %s" \
00335                                  % (value, name))
00336 
00337     def __setattr__(self, name, value):
00338         """Set attribute name to value (PRIVATE).
00339 
00340         This code implements a workaround for a user interface issue.
00341         Without this __setattr__ attribute-based assignment of parameters
00342         will silently accept invalid parameters, leading to known instances
00343         of the user assuming that parameters for the application are set,
00344         when they are not.
00345         
00346         >>> from Bio.Emboss.Applications import WaterCommandline
00347         >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True)
00348         >>> cline.asequence = "a.fasta"
00349         >>> cline.bsequence = "b.fasta"
00350         >>> cline.csequence = "c.fasta"
00351         Traceback (most recent call last):
00352         ...
00353         ValueError: Option name csequence was not found.
00354         >>> print cline
00355         water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
00356 
00357         This workaround uses a whitelist of object attributes, and sets the
00358         object attribute list as normal, for these.  Other attributes are
00359         assumed to be parameters, and passed to the self.set_parameter method
00360         for validation and assignment.
00361         """
00362         if name in ['parameters', 'program_name']: # Allowed attributes
00363             self.__dict__[name] = value
00364         else:
00365             self.set_parameter(name, value)  # treat as a parameter
00366     
00367     def __call__(self, stdin=None, stdout=True, stderr=True,
00368                  cwd=None, env=None):
00369         """Executes the command, waits for it to finish, and returns output.
00370         
00371         Runs the command line tool and waits for it to finish. If it returns
00372         a non-zero error level, an exception is raised. Otherwise two strings
00373         are returned containing stdout and stderr.
00374         
00375         The optional stdin argument should be a string of data which will be
00376         passed to the tool as standard input.
00377 
00378         The optional stdout and stderr argument are treated as a booleans, and
00379         control if the output should be captured (True, default), or ignored
00380         by sending it to /dev/null to avoid wasting memory (False). In the
00381         later case empty string(s) are returned.
00382 
00383         The optional cwd argument is a string giving the working directory to
00384         to run the command from. See Python's subprocess module documentation
00385         for more details.
00386 
00387         The optional env argument is a dictionary setting the environment
00388         variables to be used in the new process. By default the current
00389         process' environment variables are used. See Python's subprocess
00390         module documentation for more details.
00391 
00392         Default example usage:
00393 
00394         from Bio.Emboss.Applications import WaterCommandline
00395         water_cmd = WaterCommandline(gapopen=10, gapextend=0.5,
00396                                      stdout=True, auto=True,
00397                                      asequence="a.fasta", bsequence="b.fasta")
00398         print "About to run:\n%s" % water_cmd
00399         std_output, err_output = water_cmd()
00400 
00401         This functionality is similar to subprocess.check_output() added in
00402         Python 2.7. In general if you require more control over running the
00403         command, use subprocess directly.
00404         
00405         As of Biopython 1.56, when the program called returns a non-zero error
00406         level, a custom ApplicationError exception is raised. This includes
00407         any stdout and stderr strings captured as attributes of the exception
00408         object, since they may be useful for diagnosing what went wrong.
00409         """
00410         if stdout:
00411             stdout_arg = subprocess.PIPE
00412         else:
00413             stdout_arg = open(os.devnull)
00414         if stderr:
00415             stderr_arg = subprocess.PIPE
00416         else:
00417             stderr_arg = open(os.devnull)
00418         #We may not need to supply any piped input, but we setup the
00419         #standard input pipe anyway as a work around for a python
00420         #bug if this is called from a Windows GUI program.  For
00421         #details, see http://bugs.python.org/issue1124861
00422         #
00423         #Using universal newlines is important on Python 3, this
00424         #gives unicode handles rather than bytes handles.
00425         child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE,
00426                                          stdout=stdout_arg, stderr=stderr_arg,
00427                                          universal_newlines=True,
00428                                          cwd=cwd, env=env,
00429                                          shell=(sys.platform!="win32"))
00430         #Use .communicate as can get deadlocks with .wait(), see Bug 2804
00431         stdout_str, stderr_str = child_process.communicate(stdin)
00432         if not stdout: assert not stdout_str
00433         if not stderr: assert not stderr_str
00434         return_code = child_process.returncode
00435         if return_code:
00436             raise ApplicationError(return_code, str(self),
00437                                    stdout_str, stderr_str)
00438         return stdout_str, stderr_str
00439 
00440 
00441 class _AbstractParameter:
00442     """A class to hold information about a parameter for a commandline.
00443 
00444     Do not use this directly, instead use one of the subclasses.
00445     """
00446     def __init__(self):
00447         raise NotImplementedError
00448 
00449     def __str__(self):
00450         raise NotImplementedError
00451 
00452 class _Option(_AbstractParameter):
00453     """Represent an option that can be set for a program.
00454 
00455     This holds UNIXish options like --append=yes and -a yes,
00456     where a value (here "yes") is generally expected.
00457 
00458     For UNIXish options like -kimura in clustalw which don't
00459     take a value, use the _Switch object instead.
00460 
00461     Attributes:
00462 
00463     o names -- a list of string names by which the parameter can be
00464     referenced (ie. ["-a", "--append", "append"]). The first name in
00465     the list is considered to be the one that goes on the commandline,
00466     for those parameters that print the option. The last name in the list
00467     is assumed to be a "human readable" name describing the option in one
00468     word.
00469 
00470     o description -- a description of the option.
00471 
00472     o filename -- True if this argument is a filename and should be
00473     automatically quoted if it contains spaces.
00474 
00475     o checker_function -- a reference to a function that will determine
00476     if a given value is valid for this parameter. This function can either
00477     raise an error when given a bad value, or return a [0, 1] decision on
00478     whether the value is correct.
00479 
00480     o equate -- should an equals sign be inserted if a value is used?
00481 
00482     o is_required -- a flag to indicate if the parameter must be set for
00483     the program to be run.
00484 
00485     o is_set -- if the parameter has been set
00486 
00487     o value -- the value of a parameter
00488     """
00489     def __init__(self, names, description, filename=False, checker_function=None,
00490                  is_required=False, equate=True):
00491         self.names = names
00492         assert isinstance(description, basestring), \
00493                "%r for %s" % (description, names[-1])
00494         self.is_filename = filename
00495         self.checker_function = checker_function
00496         self.description = description
00497         self.equate = equate
00498         self.is_required = is_required
00499 
00500         self.is_set = False
00501         self.value = None
00502 
00503     def __str__(self):
00504         """Return the value of this option for the commandline.
00505 
00506         Includes a trailing space.
00507         """
00508         # Note: Before equate was handled explicitly, the old
00509         # code would do either "--name " or "--name=value ",
00510         # or " -name " or " -name value ".  This choice is now
00511         # now made explicitly when setting up the option.
00512         if self.value is None:
00513             return "%s " % self.names[0]
00514         if self.is_filename:
00515             v = _escape_filename(self.value)
00516         else:
00517             v = str(self.value)
00518         if self.equate:
00519             return "%s=%s " % (self.names[0], v)
00520         else:
00521             return "%s %s " % (self.names[0], v)
00522 
00523 class _Switch(_AbstractParameter):
00524     """Represent an optional argument switch for a program.
00525 
00526     This holds UNIXish options like -kimura in clustalw which don't
00527     take a value, they are either included in the command string
00528     or omitted.
00529 
00530     o names -- a list of string names by which the parameter can be
00531     referenced (ie. ["-a", "--append", "append"]). The first name in
00532     the list is considered to be the one that goes on the commandline,
00533     for those parameters that print the option. The last name in the list
00534     is assumed to be a "human readable" name describing the option in one
00535     word.
00536 
00537     o description -- a description of the option.
00538 
00539     o is_set -- if the parameter has been set
00540 
00541     NOTE - There is no value attribute, see is_set instead,
00542     """
00543     def __init__(self, names, description):
00544         self.names = names
00545         self.description = description
00546         self.is_set = False
00547         self.is_required = False
00548 
00549     def __str__(self):
00550         """Return the value of this option for the commandline.
00551 
00552         Includes a trailing space.
00553         """
00554         assert not hasattr(self, "value")
00555         if self.is_set:
00556             return "%s " % self.names[0]
00557         else:
00558             return ""
00559 
00560 class _Argument(_AbstractParameter):
00561     """Represent an argument on a commandline.
00562     """
00563     def __init__(self, names, description, filename=False,
00564                  checker_function=None, is_required=False):
00565         self.names = names
00566         assert isinstance(description, basestring), \
00567                "%r for %s" % (description, names[-1])
00568         self.is_filename = filename
00569         self.checker_function = checker_function
00570         self.description = description
00571         self.is_required = is_required
00572         self.is_set = False
00573         self.value = None
00574 
00575     def __str__(self):
00576         if self.value is None:
00577             return " "
00578         elif self.is_filename:
00579             return "%s " % _escape_filename(self.value)
00580         else:
00581             return "%s " % self.value
00582 
00583 def _escape_filename(filename):
00584     """Escape filenames with spaces by adding quotes (PRIVATE).
00585 
00586     Note this will not add quotes if they are already included:
00587     
00588     >>> print _escape_filename('example with spaces')
00589     "example with spaces"
00590     >>> print _escape_filename('"example with spaces"')
00591     "example with spaces"
00592     """
00593     #Is adding the following helpful
00594     #if os.path.isfile(filename):
00595     #    #On Windows, if the file exists, we can ask for
00596     #    #its alternative short name (DOS style 8.3 format)
00597     #    #which has no spaces in it.  Note that this name
00598     #    #is not portable between machines, or even folder!
00599     #    try:
00600     #        import win32api
00601     #        short = win32api.GetShortPathName(filename)
00602     #        assert os.path.isfile(short)
00603     #        return short
00604     #    except ImportError:
00605     #        pass
00606     if " " not in filename:
00607         return filename
00608     #We'll just quote it - works on Windows, Mac OS X etc
00609     if filename.startswith('"') and filename.endswith('"'):
00610         #Its already quoted
00611         return filename
00612     else:
00613         return '"%s"' % filename
00614 
00615 def _test():
00616     """Run the Bio.Application module's doctests."""
00617     import doctest
00618     doctest.testmod(verbose=1)
00619 
00620 if __name__ == "__main__":
00621     #Run the doctests
00622     _test()