Back to index

python-biopython  1.60
query_pubmed.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 # Copyright 2000 by Jeffrey Chang.  All rights reserved.
00004 # This code is part of the Biopython distribution and governed by its
00005 # license.  Please see the LICENSE file that should have been included
00006 # as part of this package.
00007 
00008 import sys
00009 import getopt
00010 
00011 from Bio import Entrez
00012 
00013 def print_usage():
00014     print """query_pubmed.py [-h] [-c] [-d delay] query
00015 
00016 This script sends a query to PubMed (via the NCBI Entrez webservice*)
00017 and prints the MEDLINE formatted results to the screen.
00018 
00019 Arguments:
00020     -h           Print out this help message.
00021     -c           Count the hits, and don't print them out.
00022 
00023 * http://www.ncbi.nlm.nih.gov/Entrez/
00024 """
00025 
00026 if __name__ == '__main__':
00027     try:
00028         optlist, args = getopt.getopt(sys.argv[1:], "hcd:")
00029     except getopt.error, x:
00030         print x
00031         sys.exit(0)
00032     if len(args) != 1:     # If they gave extraneous arguments,
00033         print_usage()      # print the instructions and quit.
00034         sys.exit(0)
00035     query = args[0]
00036 
00037     show_help = False
00038     count_only = False
00039     for opt, arg in optlist:
00040         if opt == '-h':
00041             show_help = True
00042         elif opt == '-c':
00043             count_only = True
00044         elif opt == '-d':
00045             sys.stderr.write("The delay parameter is now ignored\n")
00046     if show_help:
00047         print_usage()
00048         sys.exit(0)
00049 
00050     print "Doing a PubMed search for %s..." % repr(query)
00051 
00052     if count_only:
00053         handle = Entrez.esearch(db="pubmed", term=query)
00054     else :
00055         handle = Entrez.esearch(db="pubmed", term=query, usehistory="Y")
00056     search_results = Entrez.read(handle)
00057     ids = search_results["IdList"]
00058     count = len(ids)
00059     print "Found %d citations" % count
00060 
00061     if count_only:
00062         sys.exit(0)
00063 
00064     webenv = search_results["WebEnv"]
00065     query_key = search_results["QueryKey"]
00066     batch_size = 3
00067     for start in range(0,count,batch_size) :
00068         end = min(count, start+batch_size)
00069         #print "Going to download record %i to %i" % (start+1, end)
00070         fetch_handle = Entrez.efetch(db="pubmed", rettype="medline",
00071                                      retmode="text",
00072                                      retstart=start, retmax=batch_size,
00073                                      webenv=webenv, query_key=query_key)
00074         data = fetch_handle.read()
00075         fetch_handle.close()
00076         sys.stdout.write(data)
00077         sys.stdout.flush()