Package Bio :: Package WWW :: Module NCBI
[hide private]
[frames] | no frames]

Source Code for Module Bio.WWW.NCBI

  1  # Copyright 1999-2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Provides code to access NCBI over the WWW. 
  7   
  8  The main Entrez web page is available at: 
  9  http://www.ncbi.nlm.nih.gov/Entrez/ 
 10   
 11  A list of the Entrez utilities is available at: 
 12  http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html 
 13   
 14  Documentation for the e-utilies are available at: 
 15  http://www.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html 
 16   
 17  The main Blast web page is available at: 
 18  http://www.ncbi.nlm.nih.gov/BLAST/ 
 19   
 20   
 21  Functions: 
 22  query        Query Entrez; retrieve results in HTML format. 
 23  pmfetch      Retrieve results using a unique identifier. 
 24  pmqty        Search PubMed. 
 25  pmneighbor   Return a list of related articles for a PubMed entry. 
 26   
 27  efetch       Access the efetch script. 
 28  _open 
 29   
 30  """ 
 31   
 32  import warnings 
 33  warnings.warn("Bio.WWW.NCBI is deprecated. The functions in Bio.WWW.NCBI are now available from Bio.Entrez; except for the pm* functions which the NCBI have retired.", DeprecationWarning) 
 34   
 35   
 36  import string 
 37  import urllib 
 38   
 39  from Bio import File 
 40   
41 -def query(cmd, db, cgi='http://www.ncbi.nlm.nih.gov/sites/entrez', 42 **keywds):
43 """query(cmd, db, cgi='http://www.ncbi.nlm.nih.gov/sites/entrez', 44 **keywds) -> handle 45 46 Query Entrez and return a handle to the results, consisting of 47 a web page in HTML format. 48 See the online documentation for an explanation of the parameters: 49 http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp 50 51 Raises an IOError exception if there's a network error. 52 53 """ 54 variables = {'cmd' : cmd, 'db' : db} 55 variables.update(keywds) 56 return _open(cgi, variables)
57
58 -def pmfetch(db, id, report=None, mode=None, 59 cgi="http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch.fcgi"):
60 """pmfetch(db, id, report=None, mode=None, 61 cgi="http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch.fcgi") 62 63 Query PmFetch and return a handle to the results. See the 64 online documentation for an explanation of the parameters: 65 http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html 66 67 Raises an IOError exception if there's a network error. 68 69 """ 70 # NCBI has retired PmFetch!!! 71 variables = {'db' : db, 'id' : id} 72 if report is not None: 73 variables['report'] = report 74 if mode is not None: 75 variables['mode'] = mode 76 return _open(cgi, variables)
77
78 -def pmqty(db, term, dopt=None, 79 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmqty.fcgi', 80 **keywds):
81 """pmqty(db, term, dopt=None, 82 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmqty.fcgi') -> handle 83 84 Query PmQty and return a handle to the results. See the 85 online documentation for an explanation of the parameters: 86 http://www.ncbi.nlm.nih.gov/entrez/utils/pmqty_help.html 87 88 Raises an IOError exception if there's a network error. 89 90 """ 91 # NCBI has retired PmQty!!! 92 variables = {'db' : db, 'term' : term} 93 if dopt is not None: 94 variables['dopt'] = dopt 95 variables.update(keywds) 96 return _open(cgi, variables)
97
98 -def pmneighbor(pmid, display, 99 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi'):
100 """pmneighbor(pmid, display, 101 cgi='http://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor.fcgi') -> handle 102 103 Query PMNeighbor and return a handle to the results. See the 104 online documentation for an explanation of the parameters: 105 http://www.ncbi.nlm.nih.gov/entrez/utils/pmneighbor_help.html 106 107 Raises an IOError exception if there's a network error. 108 109 """ 110 # NCBI has retired PmNeighbor!!! 111 # 112 # Warning: HUGE HACK HERE! pmneighbor expects the display 113 # parameter to be passed as just a tag, with no value. 114 # Unfortunately, _open doesn't support these types of parameters, 115 # so I'm building my own cgi string. This is really due to the 116 # limitations of urllib.urlencode. We'll have to figure out a 117 # good workaround. 118 fullcgi = "%s?pmid=%s&%s" % (cgi, pmid, display) 119 return _open(fullcgi)
120 121 # XXX retmode?
122 -def epost(db, id, cgi='http://www.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi', 123 **keywds):
124 """epost(db, id[, cgi]) -> handle 125 126 Query Entrez and return a handle to the results. See the online 127 documentation for an explanation of the parameters: 128 http://www.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html 129 130 Raises an IOError exception if there's a network error. 131 132 """ 133 variables = {'db' : db, 'id' : id} 134 variables.update(keywds) 135 return _open(cgi, variables)
136
137 -def efetch(db, cgi='http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', 138 **keywds):
139 """efetch(db[, cgi][...]) -> handle 140 141 Query Entrez and return a handle to the results. See the online 142 documentation for an explanation of the parameters: 143 http://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html 144 145 Raises an IOError exception if there's a network error. 146 147 """ 148 variables = {'db' : db} 149 variables.update(keywds) 150 return _open(cgi, variables)
151
152 -def esearch(db, term, 153 cgi='http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', 154 **keywds):
155 """esearch(db, term[, cgi][...]) -> handle 156 157 Query Entrez and return a handle to the results. See the online 158 documentation for an explanation of the parameters: 159 http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html 160 161 Raises an IOError exception if there's a network error. 162 163 """ 164 variables = {'db' : db, 165 'term' : term} 166 variables.update(keywds) 167 return _open(cgi, variables)
168 183
184 -def _open(cgi, params={}, get=1):
185 """_open(cgi, params={}, get=1) -> UndoHandle 186 187 Open a handle to Entrez. cgi is the URL for the cgi script to access. 188 params is a dictionary with the options to pass to it. get is a boolean 189 that describes whether a GET should be used. Does some 190 simple error checking, and will raise an IOError if it encounters one. 191 192 """ 193 # Open a handle to Entrez. 194 options = urllib.urlencode(params) 195 if get: # do a GET 196 fullcgi = cgi 197 if options: 198 fullcgi = "%s?%s" % (cgi, options) 199 # print fullcgi 200 handle = urllib.urlopen(fullcgi) 201 else: # do a POST 202 handle = urllib.urlopen(cgi, options) 203 204 # Wrap the handle inside an UndoHandle. 205 uhandle = File.UndoHandle(handle) 206 207 # Check for errors in the first 5 lines. 208 # This is kind of ugly. 209 lines = [] 210 for i in range(5): 211 lines.append(uhandle.readline()) 212 for i in range(4, -1, -1): 213 uhandle.saveline(lines[i]) 214 data = string.join(lines, '') 215 216 if string.find(data, "500 Proxy Error") >= 0: 217 # Sometimes Entrez returns a Proxy Error instead of results 218 raise IOError, "500 Proxy Error (NCBI busy?)" 219 elif string.find(data, "502 Proxy Error") >= 0: 220 raise IOError, "502 Proxy Error (NCBI busy?)" 221 elif string.find(data, "WWW Error 500 Diagnostic") >= 0: 222 raise IOError, "WWW Error 500 Diagnostic (NCBI busy?)" 223 elif data[:5] == "ERROR": 224 # XXX Possible bug here, because I don't know whether this really 225 # occurs on the first line. I need to check this! 226 raise IOError, "ERROR, possibly because id not available?" 227 # Should I check for 404? timeout? etc? 228 return uhandle
229