1
2
3
4
5
6
7 """Provides code to access NCBI over the WWW.
8
9 The main Entrez web page is available at:
10 http://www.ncbi.nlm.nih.gov/Entrez/
11
12 A list of the Entrez utilities is available at:
13 http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
14
15
16 Functions:
17 efetch Retrieves records in the requested format from a list of one or
18 more primary IDs or from the user's environment
19 epost Posts a file containing a list of primary IDs for future use in
20 the user's environment to use with subsequent search strategies
21 esearch Searches and retrieves primary IDs (for use in EFetch, ELink,
22 and ESummary) and term translations and optionally retains
23 results for future use in the user's environment.
24 elink Checks for the existence of an external or Related Articles link
25 from a list of one or more primary IDs. Retrieves primary IDs
26 and relevancy scores for links to Entrez databases or Related
27 Articles; creates a hyperlink to the primary LinkOut provider
28 for a specific ID and database, or lists LinkOut URLs
29 and Attributes for multiple IDs.
30 einfo Provides field index term counts, last update, and available
31 links for each database.
32 esummary Retrieves document summaries from a list of primary IDs or from
33 the user's environment.
34 egquery Provides Entrez database counts in XML for a single search
35 using Global Query.
36 espell Retrieves spelling suggestions.
37
38 read Parses the XML results returned by any of the above functions.
39 Typical usage is:
40 >>> handle = Entrez.einfo() # or esearch, efetch, ...
41 >>> record = Entrez.read(handle)
42 where record is now a Python dictionary or list.
43
44 _open Internally used function.
45
46 """
47 import urllib, time, warnings
48 import os.path
49 from Bio import File
50
51
52 email = None
53
54 -def query(cmd, db, cgi='http://www.ncbi.nlm.nih.gov/sites/entrez',
55 **keywds):
56 """Query Entrez and return a handle to the HTML results (DEPRECATED).
57
58 See the online documentation for an explanation of the parameters:
59 http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
60
61 Return a handle to the results.
62
63 Raises an IOError exception if there's a network error.
64 """
65 import warnings
66 warnings.warn("Bio.Entrez.query is deprecated, since it breaks NCBI's rule to only use the E-Utilities URL.", DeprecationWarning)
67
68
69 -def epost(db, cgi=None, **keywds):
70 """Post a file of identifiers for future use.
71
72 Posts a file containing a list of UIs for future use in the user's
73 environment to use with subsequent search strategies.
74
75 See the online documentation for an explanation of the parameters:
76 http://www.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html
77
78 Return a handle to the results.
79
80 Raises an IOError exception if there's a network error.
81 """
82 if cgi:
83 import warnings
84 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
85 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
86 variables = {'db' : db}
87 variables.update(keywds)
88 return _open(cgi, variables)
89
90 -def efetch(db, cgi=None, **keywds):
91 """Fetches Entrez results which are returned as a handle.
92
93 EFetch retrieves records in the requested format from a list of one or
94 more UIs or from user's environment.
95
96 See the online documentation for an explanation of the parameters:
97 http://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
98
99 Return a handle to the results.
100
101 Raises an IOError exception if there's a network error.
102
103 Short example:
104
105 from Bio import Entrez
106 handle = Entrez.efetch(db="nucleotide", id="57240072", rettype="gb")
107 print handle.read()
108 """
109 for key in keywds :
110 if key.lower()=="rettype" and keywds[key].lower()=="genbank" :
111 import warnings
112 warnings.warn('As of Easter 2009, Entrez EFtech no longer '
113 'supports the unofficial return type "genbank", '
114 'use "gb" or "gp" instead.', DeprecationWarning)
115 if db.lower()=="protein" :
116 keywds[key] = "gp"
117 else :
118 keywds[key] = "gb"
119 if cgi:
120 import warnings
121 warnings.warn("Using a URL other than NCBI's main url for the "
122 "E-Utilities is deprecated.", DeprecationWarning)
123 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
124 variables = {'db' : db}
125 variables.update(keywds)
126 return _open(cgi, variables)
127
128 -def esearch(db, term, cgi=None, **keywds):
129 """ESearch runs an Entrez search and returns a handle to the results.
130
131 ESearch searches and retrieves primary IDs (for use in EFetch, ELink
132 and ESummary) and term translations, and optionally retains results
133 for future use in the user's environment.
134
135 See the online documentation for an explanation of the parameters:
136 http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
137
138 Return a handle to the results which are always in XML format.
139
140 Raises an IOError exception if there's a network error.
141
142 Short example:
143
144 from Bio import Entez
145 handle = Entrez.esearch(db="nucleotide", retmax=10, term="Opuntia")
146 record = Entrez.read(handle)
147 print record["Count"]
148 print record["IdList"]
149 """
150 if cgi:
151 import warnings
152 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
153 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
154 variables = {'db' : db,
155 'term' : term}
156 variables.update(keywds)
157 return _open(cgi, variables)
158
159 -def elink(cgi=None, **keywds):
160 """ELink checks for linked external articles and returns a handle.
161
162 ELink checks for the existence of an external or Related Articles link
163 from a list of one or more primary IDs; retrieves IDs and relevancy
164 scores for links to Entrez databases or Related Articles; creates a
165 hyperlink to the primary LinkOut provider for a specific ID and
166 database, or lists LinkOut URLs and attributes for multiple IDs.
167
168 See the online documentation for an explanation of the parameters:
169 http://www.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
170
171 Return a handle to the results, by default in XML format.
172
173 Raises an IOError exception if there's a network error.
174 """
175 if cgi:
176 import warnings
177 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
178 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
179 variables = {}
180 variables.update(keywds)
181 return _open(cgi, variables)
182
183 -def einfo(cgi=None, **keywds):
184 """EInfo returns a summary of the Entez databases as a results handle.
185
186 EInfo provides field names, index term counts, last update, and
187 available links for each Entrez database.
188
189 See the online documentation for an explanation of the parameters:
190 http://www.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
191
192 Return a handle to the results, by default in XML format.
193
194 Raises an IOError exception if there's a network error.
195
196 Short example:
197
198 from Bio import Entrez
199 record = Entrez.read(Entrez.einfo())
200 print record['DbList']
201 """
202 if cgi:
203 import warnings
204 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
205 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
206 variables = {}
207 variables.update(keywds)
208 return _open(cgi, variables)
209
211 """ESummary retrieves document summaries as a results handle.
212
213 ESummary retrieves document summaries from a list of primary IDs or
214 from the user's environment.
215
216 See the online documentation for an explanation of the parameters:
217 http://www.ncbi.nlm.nih.gov/entrez/query/static/esummary_help.html
218
219 Return a handle to the results, by default in XML format.
220
221 Raises an IOError exception if there's a network error.
222 """
223 if cgi:
224 import warnings
225 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
226 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
227 variables = {}
228 variables.update(keywds)
229 return _open(cgi, variables)
230
232 """EGQuery provides Entrez database counts for a global search.
233
234 EGQuery provides Entrez database counts in XML for a single search
235 using Global Query.
236
237 See the online documentation for an explanation of the parameters:
238 http://www.ncbi.nlm.nih.gov/entrez/query/static/egquery_help.html
239
240 Return a handle to the results in XML format.
241
242 Raises an IOError exception if there's a network error.
243 """
244 if cgi:
245 import warnings
246 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
247 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi'
248 variables = {}
249 variables.update(keywds)
250 return _open(cgi, variables)
251
252 -def espell(cgi=None, **keywds):
253 """ESpell retrieves spelling suggestions, returned in a results handle.
254
255 ESpell retrieves spelling suggestions, if available.
256
257 See the online documentation for an explanation of the parameters:
258 http://www.ncbi.nlm.nih.gov/entrez/query/static/espell_help.html
259
260 Return a handle to the results, by default in XML format.
261
262 Raises an IOError exception if there's a network error.
263
264 Short example:
265
266 from Bio import Entrez
267 record = Entrez.read(Entrez.espell(term="biopythooon"))
268 print record["Query"]
269 print record["CorrectedQuery"]
270 """
271 if cgi:
272 import warnings
273 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
274 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi'
275 variables = {}
276 variables.update(keywds)
277 return _open(cgi, variables)
278
280 """Parses an XML file from the NCBI Entrez Utilities into python objects.
281
282 This function parses an XML file created by NCBI's Entrez Utilities,
283 returning a multilevel data structure of Python lists and dictionaries.
284 Most XML files returned by NCBI's Entrez Utilities can be parsed by
285 this function, provided its DTD is available. Biopython includes the
286 DTDs for most commonly used Entrez Utilities.
287
288 Whereas the data structure seems to consist of generic Python lists,
289 dictionaries, strings, and so on, each of these is actually a class
290 derived from the base type. This allows us to store the attributes
291 (if any) of each element in a dictionary my_element.attributes, and
292 the tag name in my_element.tag.
293 """
294 from Parser import DataHandler
295 DTDs = os.path.join(__path__[0], "DTDs")
296 handler = DataHandler(DTDs)
297 record = handler.run(handle)
298 return record
299
300 -def _open(cgi, params={}):
301 """Helper function to build the URL and open a handle to it (PRIVATE).
302
303 Open a handle to Entrez. cgi is the URL for the cgi script to access.
304 params is a dictionary with the options to pass to it. Does some
305 simple error checking, and will raise an IOError if it encounters one.
306
307 This function also enforces the "three second rule" to avoid abusing
308 the NCBI servers.
309 """
310
311
312 delay = 0.333333334
313 current = time.time()
314 wait = _open.previous + delay - current
315 if wait > 0:
316 time.sleep(wait)
317 _open.previous = current + wait
318 else:
319 _open.previous = current
320
321 for key, value in params.items():
322 if value is None:
323 del params[key]
324
325 if not "tool" in params:
326 params["tool"] = "biopython"
327
328 if not "email" in params:
329 if email!=None:
330 params["email"] = email
331
332 options = urllib.urlencode(params, doseq=True)
333 cgi += "?" + options
334 handle = urllib.urlopen(cgi)
335
336
337 uhandle = File.UndoHandle(handle)
338
339
340
341 lines = []
342 for i in range(7):
343 lines.append(uhandle.readline())
344 for i in range(6, -1, -1):
345 uhandle.saveline(lines[i])
346 data = ''.join(lines)
347
348 if "500 Proxy Error" in data:
349
350 raise IOError("500 Proxy Error (NCBI busy?)")
351 elif "502 Proxy Error" in data:
352 raise IOError("502 Proxy Error (NCBI busy?)")
353 elif "WWW Error 500 Diagnostic" in data:
354 raise IOError("WWW Error 500 Diagnostic (NCBI busy?)")
355 elif "<title>Service unavailable!</title>" in data :
356
357 raise IOError("Service unavailable!")
358 elif "<title>Bad Gateway!</title>" in data :
359
360
361
362 raise IOError("Bad Gateway!")
363 elif data.startswith("Error:") :
364
365 raise IOError(data.strip())
366 elif data.startswith("The resource is temporarily unavailable") :
367
368
369 raise IOError("The resource is temporarily unavailable")
370 elif data.startswith("download dataset is empty") :
371
372
373 raise IOError("download dataset is empty")
374 elif data[:5] == "ERROR":
375
376
377 raise IOError("ERROR, possibly because id not available?")
378
379 return uhandle
380
381 _open.previous = 0
382