Back to index

python-biopython  1.60
Functions
Bio.Blast.NCBIWWW Namespace Reference

Functions

def qblast
def _parse_qblast_ref_page

Function Documentation

def Bio.Blast.NCBIWWW._parse_qblast_ref_page (   handle) [private]
Extract a tuple of RID, RTOE from the 'please wait' page (PRIVATE).

The NCBI FAQ pages use TOE for 'Time of Execution', so RTOE is proably
'Request Time of Execution' and RID would be 'Request Identifier'.

Definition at line 180 of file NCBIWWW.py.

00180 
00181 def _parse_qblast_ref_page(handle):
00182     """Extract a tuple of RID, RTOE from the 'please wait' page (PRIVATE).
00183 
00184     The NCBI FAQ pages use TOE for 'Time of Execution', so RTOE is proably
00185     'Request Time of Execution' and RID would be 'Request Identifier'.
00186     """
00187     s = _as_string(handle.read())
00188     i = s.find("RID =")
00189     if i == -1:
00190         rid = None
00191     else:
00192         j = s.find("\n", i)
00193         rid = s[i+len("RID ="):j].strip()
00194 
00195     i = s.find("RTOE =")
00196     if i == -1:
00197         rtoe = None
00198     else:
00199         j = s.find("\n", i)
00200         rtoe = s[i+len("RTOE ="):j].strip()
00201 
00202     if not rid and not rtoe:
00203         #Can we reliably extract the error message from the HTML page?
00204         #e.g.  "Message ID#24 Error: Failed to read the Blast query:
00205         #       Nucleotide FASTA provided for protein sequence"
00206         #or    "Message ID#32 Error: Query contains no data: Query
00207         #       contains no sequence data"
00208         #
00209         #This used to occur inside a <div class="error msInf"> entry:
00210         i = s.find('<div class="error msInf">')
00211         if i != -1:
00212             msg = s[i+len('<div class="error msInf">'):].strip()
00213             msg = msg.split("</div>",1)[0].split("\n",1)[0].strip()
00214             if msg:
00215                 raise ValueError("Error message from NCBI: %s" % msg)
00216         #In spring 2010 the markup was like this:
00217         i = s.find('<p class="error">')
00218         if i != -1:
00219             msg = s[i+len('<p class="error">'):].strip()
00220             msg = msg.split("</p>",1)[0].split("\n",1)[0].strip()
00221             if msg:
00222                 raise ValueError("Error message from NCBI: %s" % msg)
00223         #Generic search based on the way the error messages start:
00224         i = s.find('Message ID#')
00225         if i != -1:
00226             #Break the message at the first HTML tag
00227             msg = s[i:].split("<",1)[0].split("\n",1)[0].strip()
00228             raise ValueError("Error message from NCBI: %s" % msg)
00229         #We didn't recognise the error layout :(
00230         #print s
00231         raise ValueError("No RID and no RTOE found in the 'please wait' page, "
00232                          "there was probably an error in your request but we "
00233                          "could not extract a helpful error message.")
00234     elif not rid:
00235         #Can this happen?
00236         raise ValueError("No RID found in the 'please wait' page."
00237                          " (although RTOE = %s)" % repr(rtoe))
00238     elif not rtoe:
00239         #Can this happen?
00240         raise ValueError("No RTOE found in the 'please wait' page."
00241                          " (although RID = %s)" % repr(rid))
00242 
00243     try:
00244         return rid, int(rtoe)
00245     except ValueError:
00246         raise ValueError("A non-integer RTOE found in " \
00247                          +"the 'please wait' page, %s" % repr(rtoe))
00248 
00249  

Here is the caller graph for this function:

def Bio.Blast.NCBIWWW.qblast (   program,
  database,
  sequence,
  auto_format = None,
  composition_based_statistics = None,
  db_genetic_code = None,
  endpoints = None,
  entrez_query = '(none)',
  expect = 10.0,
  filter = None,
  gapcosts = None,
  genetic_code = None,
  hitlist_size = 50,
  i_thresh = None,
  layout = None,
  lcase_mask = None,
  matrix_name = None,
  nucl_penalty = None,
  nucl_reward = None,
  other_advanced = None,
  perc_ident = None,
  phi_pattern = None,
  query_file = None,
  query_believe_defline = None,
  query_from = None,
  query_to = None,
  searchsp_eff = None,
  service = None,
  threshold = None,
  ungapped_alignment = None,
  word_size = None,
  alignments = 500,
  alignment_view = None,
  descriptions = 500,
  entrez_links_new_window = None,
  expect_low = None,
  expect_high = None,
  format_entrez_query = None,
  format_object = None,
  format_type = 'XML',
  ncbi_gi = None,
  results_file = None,
  show_overview = None,
  megablast = None 
)
Do a BLAST search using the QBLAST server at NCBI.

Supports all parameters of the qblast API for Put and Get.
Some useful parameters:
program        blastn, blastp, blastx, tblastn, or tblastx (lower case)
database       Which database to search against (e.g. "nr").
sequence       The sequence to search.
ncbi_gi        TRUE/FALSE whether to give 'gi' identifier.
descriptions   Number of descriptions to show.  Def 500.
alignments     Number of alignments to show.  Def 500.
expect         An expect value cutoff.  Def 10.0.
matrix_name    Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
filter         "none" turns off filtering.  Default no filtering
format_type    "HTML", "Text", "ASN.1", or "XML".  Def. "XML".
entrez_query   Entrez query to limit Blast search
hitlist_size   Number of hits to return. Default 50
megablast      TRUE/FALSE whether to use MEga BLAST algorithm (blastn only)
service        plain, psi, phi, rpsblast, megablast (lower case)

This function does no checking of the validity of the parameters
and passes the values to the server as is.  More help is available at:
http://www.ncbi.nlm.nih.gov/BLAST/blast_overview.html

Definition at line 40 of file NCBIWWW.py.

00040 
00041            ):
00042     """Do a BLAST search using the QBLAST server at NCBI.
00043 
00044     Supports all parameters of the qblast API for Put and Get.
00045     Some useful parameters:
00046     program        blastn, blastp, blastx, tblastn, or tblastx (lower case)
00047     database       Which database to search against (e.g. "nr").
00048     sequence       The sequence to search.
00049     ncbi_gi        TRUE/FALSE whether to give 'gi' identifier.
00050     descriptions   Number of descriptions to show.  Def 500.
00051     alignments     Number of alignments to show.  Def 500.
00052     expect         An expect value cutoff.  Def 10.0.
00053     matrix_name    Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
00054     filter         "none" turns off filtering.  Default no filtering
00055     format_type    "HTML", "Text", "ASN.1", or "XML".  Def. "XML".
00056     entrez_query   Entrez query to limit Blast search
00057     hitlist_size   Number of hits to return. Default 50
00058     megablast      TRUE/FALSE whether to use MEga BLAST algorithm (blastn only)
00059     service        plain, psi, phi, rpsblast, megablast (lower case)
00060 
00061     This function does no checking of the validity of the parameters
00062     and passes the values to the server as is.  More help is available at:
00063     http://www.ncbi.nlm.nih.gov/BLAST/blast_overview.html
00064 
00065     """
00066     import urllib, urllib2
00067     import time
00068 
00069     assert program in ['blastn', 'blastp', 'blastx', 'tblastn', 'tblastx']
00070 
00071     # Format the "Put" command, which sends search requests to qblast.
00072     # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node5.html on 9 July 2007
00073     # Additional parameters are taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node9.html on 8 Oct 2010
00074     # To perform a PSI-BLAST or PHI-BLAST search the service ("Put" and "Get" commands) must be specified
00075     # (e.g. psi_blast = NCBIWWW.qblast("blastp", "refseq_protein", input_sequence, service="psi"))
00076     parameters = [
00077         ('AUTO_FORMAT',auto_format),
00078         ('COMPOSITION_BASED_STATISTICS',composition_based_statistics),
00079         ('DATABASE',database),
00080         ('DB_GENETIC_CODE',db_genetic_code),
00081         ('ENDPOINTS',endpoints),
00082         ('ENTREZ_QUERY',entrez_query),
00083         ('EXPECT',expect),
00084         ('FILTER',filter),
00085         ('GAPCOSTS',gapcosts),
00086         ('GENETIC_CODE',genetic_code),
00087         ('HITLIST_SIZE',hitlist_size),
00088         ('I_THRESH',i_thresh),
00089         ('LAYOUT',layout),
00090         ('LCASE_MASK',lcase_mask),
00091         ('MEGABLAST',megablast),
00092         ('MATRIX_NAME',matrix_name),
00093         ('NUCL_PENALTY',nucl_penalty),
00094         ('NUCL_REWARD',nucl_reward),
00095         ('OTHER_ADVANCED',other_advanced),
00096         ('PERC_IDENT',perc_ident),
00097         ('PHI_PATTERN',phi_pattern),
00098         ('PROGRAM',program),
00099         #('PSSM',pssm), - It is possible to use PSI-BLAST via this API?
00100         ('QUERY',sequence),
00101         ('QUERY_FILE',query_file),
00102         ('QUERY_BELIEVE_DEFLINE',query_believe_defline),
00103         ('QUERY_FROM',query_from),
00104         ('QUERY_TO',query_to),
00105         #('RESULTS_FILE',...), - Can we use this parameter?
00106         ('SEARCHSP_EFF',searchsp_eff),
00107         ('SERVICE',service),
00108         ('THRESHOLD',threshold),
00109         ('UNGAPPED_ALIGNMENT',ungapped_alignment),
00110         ('WORD_SIZE',word_size),
00111         ('CMD', 'Put'),
00112         ]
00113     query = [x for x in parameters if x[1] is not None]
00114     message = _as_bytes(urllib.urlencode(query))
00115 
00116     # Send off the initial query to qblast.
00117     # Note the NCBI do not currently impose a rate limit here, other
00118     # than the request not to make say 50 queries at once using multiple
00119     # threads.
00120     request = urllib2.Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
00121                               message,
00122                               {"User-Agent":"BiopythonClient"})
00123     handle = urllib2.urlopen(request)
00124 
00125     # Format the "Get" command, which gets the formatted results from qblast
00126     # Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node6.html on 9 July 2007       
00127     rid, rtoe = _parse_qblast_ref_page(handle)
00128     parameters = [
00129         ('ALIGNMENTS',alignments),
00130         ('ALIGNMENT_VIEW',alignment_view),
00131         ('DESCRIPTIONS',descriptions),
00132         ('ENTREZ_LINKS_NEW_WINDOW',entrez_links_new_window),
00133         ('EXPECT_LOW',expect_low),
00134         ('EXPECT_HIGH',expect_high),
00135         ('FORMAT_ENTREZ_QUERY',format_entrez_query),
00136         ('FORMAT_OBJECT',format_object),
00137         ('FORMAT_TYPE',format_type),
00138         ('NCBI_GI',ncbi_gi),
00139         ('RID',rid),
00140         ('RESULTS_FILE',results_file),
00141         ('SERVICE',service),
00142         ('SHOW_OVERVIEW',show_overview),
00143         ('CMD', 'Get'),
00144         ]
00145     query = [x for x in parameters if x[1] is not None]
00146     message = _as_bytes(urllib.urlencode(query))
00147 
00148     # Poll NCBI until the results are ready.  Use a 3 second wait
00149     delay = 3.0
00150     previous = time.time()
00151     while True:
00152         current = time.time()
00153         wait = previous + delay - current
00154         if wait > 0:
00155             time.sleep(wait)
00156             previous = current + wait
00157         else:
00158             previous = current
00159 
00160         request = urllib2.Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
00161                                   message,
00162                                   {"User-Agent":"BiopythonClient"})
00163         handle = urllib2.urlopen(request)
00164         results = _as_string(handle.read())
00165 
00166         # Can see an "\n\n" page while results are in progress,
00167         # if so just wait a bit longer...
00168         if results=="\n\n":
00169             continue
00170         # XML results don't have the Status tag when finished
00171         if results.find("Status=") < 0:
00172             break
00173         i = results.index("Status=")
00174         j = results.index("\n", i)
00175         status = results[i+len("Status="):j].strip()
00176         if status.upper() == "READY":
00177             break
00178 
00179     return StringIO(results)

Here is the call graph for this function: