Back to index

python-biopython  1.60
test_NCBI_qblast.py
Go to the documentation of this file.
00001 # Copyright 2008 by Peter Cock.  All rights reserved.
00002 # This code is part of the Biopython distribution and governed by its
00003 # license.  Please see the LICENSE file that should have been included
00004 # as part of this package.
00005 
00006 """Testing online code for fetching NCBI qblast.
00007 
00008 Uses Bio.Blast.NCBIWWW.qblast() to run some online blast queries, get XML
00009 blast results back, and then checks Bio.Blast.NCBIXML.parse() can read them.
00010 
00011 Goals:
00012     Make sure that all retrieval is working as expected.
00013     Make sure we can parse the latest XML format being used by the NCBI.
00014 """
00015 import sys
00016 import requires_internet
00017 requires_internet.check()
00018 from Bio import MissingExternalDependencyError 
00019 from urllib2 import HTTPError
00020 
00021 #We want to test these:
00022 from Bio.Blast import NCBIWWW
00023 from Bio.Blast import NCBIXML
00024 
00025 
00026 #####################################################################
00027 
00028 #List of qblast requests stored as a tuple of parameters:
00029 # - program
00030 # - database
00031 # - query identifier or sequence
00032 # - expectation value threshold
00033 # - Entrez filter string (or None)
00034 # - list of hit identifiers expected to be found (or None if expect 0)
00035 tests = [ \
00036     #Simple protein blast filtered for rat only, using protein GI:160837788
00037     #the actin related protein 2/3 complex, subunit 1B [Mus musculus]
00038     ("blastp", "nr", "160837788", 0.001,
00039      "rat [ORGN]", ['9506405','13592137','37589612','149064087','56912225']),
00040     #This next example finds PCR primer matches in Chimpanzees, e.g. BRCA1:
00041     ("blastn", "nr", "GTACCTTGATTTCGTATTC"+("N"*30)+"GACTCTACTACCTTTACCC",
00042      10, "pan [ORGN]", ["37953274","51104367","51104367","51104367"]),
00043     #Try an orchid EST (nucleotide) sequence against NR using BLASTX
00044     ("blastx", "nr", """>gi|116660609|gb|EG558220.1|EG558220 CR02019H04 Leaf CR02 cDNA library Catharanthus roseus cDNA clone CR02019H04 5', mRNA sequence
00045 CTCCATTCCCTCTCTATTTTCAGTCTAATCAAATTAGAGCTTAAAAGAATGAGATTTTTAACAAATAAAA
00046 AAACATAGGGGAGATTTCATAAAAGTTATATTAGTGATTTGAAGAATATTTTAGTCTATTTTTTTTTTTT
00047 TCTTTTTTTGATGAAGAAAGGGTATATAAAATCAAGAATCTGGGGTGTTTGTGTTGACTTGGGTCGGGTG
00048 TGTATAATTCTTGATTTTTTCAGGTAGTTGAAAAGGTAGGGAGAAAAGTGGAGAAGCCTAAGCTGATATT
00049 GAAATTCATATGGATGGAAAAGAACATTGGTTTAGGATTGGATCAAAAAATAGGTGGACATGGAACTGTA
00050 CCACTACGTCCTTACTATTTTTGGCCGAGGAAAGATGCTTGGGAAGAACTTAAAACAGTTTTAGAAAGCA
00051 AGCCATGGATTTCTCAGAAGAAAATGATTATACTTCTTAATCAGGCAACTGATATTATCAATTTATGGCA
00052 GCAGAGTGGTGGCTCCTTGTCCCAGCAGCAGTAATTACTTTTTTTTCTCTTTTTGTTTCCAAATTAAGAA
00053 ACATTAGTATCATATGGCTATTTGCTCAATTGCAGATTTCTTTCTTTTGTGAATG""",
00054      0.0000001, None, ["21554275","18409071","296087288"]),
00055 ]
00056 
00057 print "Checking Bio.Blast.NCBIWWW.qblast() with various queries"
00058 for program,database,query,e_value,entrez_filter,expected_hits in tests:
00059     print "qblast('%s', '%s', %s, ...)" % (program, database, repr(query))
00060     try:
00061         if program=="blastn":
00062             #Check the megablast parameter is accepted
00063             handle = NCBIWWW.qblast(program, database, query, \
00064                                     alignments=10, descriptions=10, \
00065                                     hitlist_size=10, \
00066                                     entrez_query=entrez_filter,
00067                                     expect=e_value, megablast="FALSE")
00068         else:
00069             handle = NCBIWWW.qblast(program, database, query, \
00070                                     alignments=10, descriptions=10, \
00071                                     hitlist_size=10, \
00072                                     entrez_query=entrez_filter,
00073                                     expect=e_value)
00074     except HTTPError:
00075         #e.g. a proxy error
00076         raise MissingExternalDependencyError("internet connection failed")
00077     record = NCBIXML.read(handle)
00078 
00079     if record.query == "No definition line":
00080         #We used a sequence as the query
00081         assert len(query) == record.query_letters
00082     elif query.startswith(">"):
00083         #We used a FASTA record as the query
00084         assert query[1:].split("\n",1)[0] == (record.query)
00085     else:
00086         #We used an identifier as the query
00087         assert query in record.query_id.split("|")
00088 
00089     #Check the recorded input parameters agree with those requested
00090     assert float(record.expect) == e_value
00091     assert record.application.lower() == program
00092     assert len(record.alignments) <= 10
00093     assert len(record.descriptions) <= 10
00094 
00095     #Check the expected result(s) are found in the alignments
00096     if expected_hits is None:
00097         assert len(record.alignments)==0, "Expected no alignments!"
00098     else:
00099         assert len(record.alignments) > 0, "Expected some alignments!"
00100         found_result = False
00101         for expected_hit in expected_hits:
00102             for alignment in record.alignments:
00103                 if expected_hit in alignment.hit_id.split("|"):
00104                     found_result = True
00105                     break
00106         if len(expected_hits)==1:
00107             print "Update this test to have some redundancy..."
00108             for alignment in record.alignments:
00109                 print alignment.hit_id
00110         assert found_result, "Missing all of %s in alignments" \
00111                % ", ".join(expected_hits)
00112 
00113     #Check the expected result(s) are found in the descriptions
00114     if expected_hits is None:
00115         assert len(record.descriptions)==0, "Expected no descriptions!"
00116     else:
00117         assert len(record.descriptions) > 0, "Expected some descriptions!"
00118         found_result = False
00119         for expected_hit in expected_hits:
00120             for descr in record.descriptions:
00121                 if expected_hit == descr.accession \
00122                 or expected_hit in descr.title.split(None,1)[0].split("|"):
00123                     found_result = True
00124                     break
00125         assert found_result, "Missing all of %s in descriptions" % expected_hit
00126 
00127 print "Done"