1
2
3
4
5
6
7 """Provides code to access NCBI over the WWW.
8
9 The main Entrez web page is available at:
10 http://www.ncbi.nlm.nih.gov/Entrez/
11
12 A list of the Entrez utilities is available at:
13 http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
14
15
16 Functions:
17 efetch Retrieves records in the requested format from a list of one or
18 more primary IDs or from the user's environment
19 epost Posts a file containing a list of primary IDs for future use in
20 the user's environment to use with subsequent search strategies
21 esearch Searches and retrieves primary IDs (for use in EFetch, ELink,
22 and ESummary) and term translations and optionally retains
23 results for future use in the user's environment.
24 elink Checks for the existence of an external or Related Articles link
25 from a list of one or more primary IDs. Retrieves primary IDs
26 and relevancy scores for links to Entrez databases or Related
27 Articles; creates a hyperlink to the primary LinkOut provider
28 for a specific ID and database, or lists LinkOut URLs
29 and Attributes for multiple IDs.
30 einfo Provides field index term counts, last update, and available
31 links for each database.
32 esummary Retrieves document summaries from a list of primary IDs or from
33 the user's environment.
34 egquery Provides Entrez database counts in XML for a single search
35 using Global Query.
36 espell Retrieves spelling suggestions.
37
38 read Parses the XML results returned by any of the above functions.
39 Typical usage is:
40 >>> handle = Entrez.einfo() # or esearch, efetch, ...
41 >>> record = Entrez.read(handle)
42 where record is now a Python dictionary or list.
43
44 _open Internally used function.
45
46 """
47 import urllib, time, warnings
48 import os.path
49 from Bio import File
50
51
52 email = None
53
54
55
56 -def epost(db, **keywds):
57 """Post a file of identifiers for future use.
58
59 Posts a file containing a list of UIs for future use in the user's
60 environment to use with subsequent search strategies.
61
62 See the online documentation for an explanation of the parameters:
63 http://www.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html
64
65 Return a handle to the results.
66
67 Raises an IOError exception if there's a network error.
68 """
69 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
70 variables = {'db' : db}
71 variables.update(keywds)
72 return _open(cgi, variables, post=True)
73
75 """Fetches Entrez results which are returned as a handle.
76
77 EFetch retrieves records in the requested format from a list of one or
78 more UIs or from user's environment.
79
80 See the online documentation for an explanation of the parameters:
81 http://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
82
83 Return a handle to the results.
84
85 Raises an IOError exception if there's a network error.
86
87 Short example:
88
89 from Bio import Entrez
90 handle = Entrez.efetch(db="nucleotide", id="57240072", rettype="gb")
91 print handle.read()
92 """
93 for key in keywds :
94 if key.lower()=="rettype" and keywds[key].lower()=="genbank" :
95 import warnings
96 warnings.warn('As of Easter 2009, Entrez EFetch no longer '
97 'supports the unofficial return type "genbank", '
98 'use "gb" or "gp" instead.', DeprecationWarning)
99 if db.lower()=="protein" :
100 keywds[key] = "gp"
101 else :
102 keywds[key] = "gb"
103 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
104 variables = {'db' : db}
105 variables.update(keywds)
106 return _open(cgi, variables)
107
109 """ESearch runs an Entrez search and returns a handle to the results.
110
111 ESearch searches and retrieves primary IDs (for use in EFetch, ELink
112 and ESummary) and term translations, and optionally retains results
113 for future use in the user's environment.
114
115 See the online documentation for an explanation of the parameters:
116 http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
117
118 Return a handle to the results which are always in XML format.
119
120 Raises an IOError exception if there's a network error.
121
122 Short example:
123
124 from Bio import Entez
125 handle = Entrez.esearch(db="nucleotide", retmax=10, term="Opuntia")
126 record = Entrez.read(handle)
127 print record["Count"]
128 print record["IdList"]
129 """
130 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
131 variables = {'db' : db,
132 'term' : term}
133 variables.update(keywds)
134 return _open(cgi, variables)
135
137 """ELink checks for linked external articles and returns a handle.
138
139 ELink checks for the existence of an external or Related Articles link
140 from a list of one or more primary IDs; retrieves IDs and relevancy
141 scores for links to Entrez databases or Related Articles; creates a
142 hyperlink to the primary LinkOut provider for a specific ID and
143 database, or lists LinkOut URLs and attributes for multiple IDs.
144
145 See the online documentation for an explanation of the parameters:
146 http://www.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
147
148 Return a handle to the results, by default in XML format.
149
150 Raises an IOError exception if there's a network error.
151 """
152 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
153 variables = {}
154 variables.update(keywds)
155 return _open(cgi, variables)
156
158 """EInfo returns a summary of the Entez databases as a results handle.
159
160 EInfo provides field names, index term counts, last update, and
161 available links for each Entrez database.
162
163 See the online documentation for an explanation of the parameters:
164 http://www.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
165
166 Return a handle to the results, by default in XML format.
167
168 Raises an IOError exception if there's a network error.
169
170 Short example:
171
172 from Bio import Entrez
173 record = Entrez.read(Entrez.einfo())
174 print record['DbList']
175 """
176 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
177 variables = {}
178 variables.update(keywds)
179 return _open(cgi, variables)
180
182 """ESummary retrieves document summaries as a results handle.
183
184 ESummary retrieves document summaries from a list of primary IDs or
185 from the user's environment.
186
187 See the online documentation for an explanation of the parameters:
188 http://www.ncbi.nlm.nih.gov/entrez/query/static/esummary_help.html
189
190 Return a handle to the results, by default in XML format.
191
192 Raises an IOError exception if there's a network error.
193 """
194 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
195 variables = {}
196 variables.update(keywds)
197 return _open(cgi, variables)
198
200 """EGQuery provides Entrez database counts for a global search.
201
202 EGQuery provides Entrez database counts in XML for a single search
203 using Global Query.
204
205 See the online documentation for an explanation of the parameters:
206 http://www.ncbi.nlm.nih.gov/entrez/query/static/egquery_help.html
207
208 Return a handle to the results in XML format.
209
210 Raises an IOError exception if there's a network error.
211 """
212 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi'
213 variables = {}
214 variables.update(keywds)
215 return _open(cgi, variables)
216
218 """ESpell retrieves spelling suggestions, returned in a results handle.
219
220 ESpell retrieves spelling suggestions, if available.
221
222 See the online documentation for an explanation of the parameters:
223 http://www.ncbi.nlm.nih.gov/entrez/query/static/espell_help.html
224
225 Return a handle to the results, by default in XML format.
226
227 Raises an IOError exception if there's a network error.
228
229 Short example:
230
231 from Bio import Entrez
232 record = Entrez.read(Entrez.espell(term="biopythooon"))
233 print record["Query"]
234 print record["CorrectedQuery"]
235 """
236 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi'
237 variables = {}
238 variables.update(keywds)
239 return _open(cgi, variables)
240
242 """Parses an XML file from the NCBI Entrez Utilities into python objects.
243
244 This function parses an XML file created by NCBI's Entrez Utilities,
245 returning a multilevel data structure of Python lists and dictionaries.
246 Most XML files returned by NCBI's Entrez Utilities can be parsed by
247 this function, provided its DTD is available. Biopython includes the
248 DTDs for most commonly used Entrez Utilities.
249
250 Whereas the data structure seems to consist of generic Python lists,
251 dictionaries, strings, and so on, each of these is actually a class
252 derived from the base type. This allows us to store the attributes
253 (if any) of each element in a dictionary my_element.attributes, and
254 the tag name in my_element.tag.
255 """
256 from Parser import DataHandler
257 DTDs = os.path.join(__path__[0], "DTDs")
258 handler = DataHandler(DTDs)
259 record = handler.run(handle)
260 return record
261
268
269 -def _open(cgi, params={}, post=False):
270 """Helper function to build the URL and open a handle to it (PRIVATE).
271
272 Open a handle to Entrez. cgi is the URL for the cgi script to access.
273 params is a dictionary with the options to pass to it. Does some
274 simple error checking, and will raise an IOError if it encounters one.
275
276 This function also enforces the "up to three queries per second rule"
277 to avoid abusing the NCBI servers.
278 """
279
280
281 delay = 0.333333334
282 current = time.time()
283 wait = _open.previous + delay - current
284 if wait > 0:
285 time.sleep(wait)
286 _open.previous = current + wait
287 else:
288 _open.previous = current
289
290 for key, value in params.items():
291 if value is None:
292 del params[key]
293
294 if not "tool" in params:
295 params["tool"] = "biopython"
296
297 if not "email" in params:
298 if email!=None:
299 params["email"] = email
300
301 options = urllib.urlencode(params, doseq=True)
302 if post :
303
304 handle = urllib.urlopen(cgi, data=options)
305 else :
306
307 cgi += "?" + options
308 handle = urllib.urlopen(cgi)
309
310
311 uhandle = File.UndoHandle(handle)
312
313
314
315 lines = []
316 for i in range(7):
317 lines.append(uhandle.readline())
318 for i in range(6, -1, -1):
319 uhandle.saveline(lines[i])
320 data = ''.join(lines)
321
322 if "500 Proxy Error" in data:
323
324 raise IOError("500 Proxy Error (NCBI busy?)")
325 elif "502 Proxy Error" in data:
326 raise IOError("502 Proxy Error (NCBI busy?)")
327 elif "WWW Error 500 Diagnostic" in data:
328 raise IOError("WWW Error 500 Diagnostic (NCBI busy?)")
329 elif "<title>Service unavailable!</title>" in data :
330
331 raise IOError("Service unavailable!")
332 elif "<title>Bad Gateway!</title>" in data :
333
334
335
336 raise IOError("Bad Gateway!")
337 elif "<title>414 Request-URI Too Large</title>" in data \
338 or "<h1>Request-URI Too Large</h1>" in data :
339 raise IOError("Requested URL too long (try using EPost?)")
340 elif data.startswith("Error:") :
341
342 raise IOError(data.strip())
343 elif data.startswith("The resource is temporarily unavailable") :
344
345
346 raise IOError("The resource is temporarily unavailable")
347 elif data.startswith("download dataset is empty") :
348
349
350 raise IOError("download dataset is empty")
351 elif data[:5] == "ERROR":
352
353
354 raise IOError("ERROR, possibly because id not available?")
355
356 return uhandle
357
358 _open.previous = 0
359