1
2
3
4
5
6
7 """Provides code to access NCBI over the WWW.
8
9 The main Entrez web page is available at:
10 http://www.ncbi.nlm.nih.gov/Entrez/
11
12 A list of the Entrez utilities is available at:
13 http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
14
15
16 Functions:
17 efetch Retrieves records in the requested format from a list of one or
18 more primary IDs or from the user's environment
19 epost Posts a file containing a list of primary IDs for future use in
20 the user's environment to use with subsequent search strategies
21 esearch Searches and retrieves primary IDs (for use in EFetch, ELink,
22 and ESummary) and term translations and optionally retains
23 results for future use in the user's environment.
24 elink Checks for the existence of an external or Related Articles link
25 from a list of one or more primary IDs. Retrieves primary IDs
26 and relevancy scores for links to Entrez databases or Related
27 Articles; creates a hyperlink to the primary LinkOut provider
28 for a specific ID and database, or lists LinkOut URLs
29 and Attributes for multiple IDs.
30 einfo Provides field index term counts, last update, and available
31 links for each database.
32 esummary Retrieves document summaries from a list of primary IDs or from
33 the user's environment.
34 egquery Provides Entrez database counts in XML for a single search
35 using Global Query.
36 espell Retrieves spelling suggestions.
37
38 read Parses the XML results returned by any of the above functions.
39 Typical usage is:
40 >>> handle = Entrez.einfo() # or esearch, efetch, ...
41 >>> record = Entrez.read(handle)
42 where record is now a Python dictionary or list.
43
44 _open Internally used function.
45
46 """
47 import urllib, time
48 import os.path
49 from Bio import File
50
51 -def query(cmd, db, cgi='http://www.ncbi.nlm.nih.gov/sites/entrez',
52 **keywds):
53 """Query Entrez and return a handle to the HTML results (DEPRECATED).
54
55 See the online documentation for an explanation of the parameters:
56 http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
57
58 Return a handle to the results.
59
60 Raises an IOError exception if there's a network error.
61 """
62 import warnings
63 warnings.warn("Bio.Entrez.query is deprecated, since it breaks NCBI's rule to only use the E-Utilities URL.", DeprecationWarning)
64
65
66 -def epost(db, cgi=None, **keywds):
67 """Post a file of identifiers for future use.
68
69 Posts a file containing a list of UIs for future use in the user's
70 environment to use with subsequent search strategies.
71
72 See the online documentation for an explanation of the parameters:
73 http://www.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html
74
75 Return a handle to the results.
76
77 Raises an IOError exception if there's a network error.
78 """
79 if cgi:
80 import warnings
81 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
82 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
83 variables = {'db' : db}
84 variables.update(keywds)
85 return _open(cgi, variables)
86
87 -def efetch(db, cgi=None, **keywds):
88 """Fetches Entrez results which are returned as a handle.
89
90 EFetch retrieves records in the requested format from a list of one or
91 more UIs or from user's environment.
92
93 See the online documentation for an explanation of the parameters:
94 http://www.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html
95
96 Return a handle to the results.
97
98 Raises an IOError exception if there's a network error.
99
100 Short example:
101
102 from Bio import Entrez
103 handle = Entrez.efetch(db="nucleotide", id="57240072", rettype="genbank")
104 print handle.read()
105 """
106 if cgi:
107 import warnings
108 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
109 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
110 variables = {'db' : db}
111 variables.update(keywds)
112 return _open(cgi, variables)
113
114 -def esearch(db, term, cgi=None, **keywds):
115 """ESearch runs an Entrez search and returns a handle to the results.
116
117 ESearch searches and retrieves primary IDs (for use in EFetch, ELink
118 and ESummary) and term translations, and optionally retains results
119 for future use in the user's environment.
120
121 See the online documentation for an explanation of the parameters:
122 http://www.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
123
124 Return a handle to the results which are always in XML format.
125
126 Raises an IOError exception if there's a network error.
127
128 Short example:
129
130 from Bio import Entez
131 handle = Entrez.esearch(db="nucleotide", retmax=10, term="Opuntia")
132 record = Entrez.read(handle)
133 print record["Count"]
134 print record["IdList"]
135 """
136 if cgi:
137 import warnings
138 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
139 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
140 variables = {'db' : db,
141 'term' : term}
142 variables.update(keywds)
143 return _open(cgi, variables)
144
145 -def elink(cgi=None, **keywds):
146 """ELink checks for linked external articles and returns a handle.
147
148 ELink checks for the existence of an external or Related Articles link
149 from a list of one or more primary IDs; retrieves IDs and relevancy
150 scores for links to Entrez databases or Related Articles; creates a
151 hyperlink to the primary LinkOut provider for a specific ID and
152 database, or lists LinkOut URLs and attributes for multiple IDs.
153
154 See the online documentation for an explanation of the parameters:
155 http://www.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html
156
157 Return a handle to the results, by default in XML format.
158
159 Raises an IOError exception if there's a network error.
160 """
161 if cgi:
162 import warnings
163 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
164 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
165 variables = {}
166 variables.update(keywds)
167 return _open(cgi, variables)
168
169 -def einfo(cgi=None, **keywds):
170 """EInfo returns a summary of the Entez databases as a results handle.
171
172 EInfo provides field names, index term counts, last update, and
173 available links for each Entrez database.
174
175 See the online documentation for an explanation of the parameters:
176 http://www.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html
177
178 Return a handle to the results, by default in XML format.
179
180 Raises an IOError exception if there's a network error.
181
182 Short example:
183
184 from Bio import Entrez
185 record = Entrez.read(Entrez.einfo())
186 print record['DbList']
187 """
188 if cgi:
189 import warnings
190 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
191 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
192 variables = {}
193 variables.update(keywds)
194 return _open(cgi, variables)
195
197 """ESummary retrieves document summaries as a results handle.
198
199 ESummary retrieves document summaries from a list of primary IDs or
200 from the user's environment.
201
202 See the online documentation for an explanation of the parameters:
203 http://www.ncbi.nlm.nih.gov/entrez/query/static/esummary_help.html
204
205 Return a handle to the results, by default in XML format.
206
207 Raises an IOError exception if there's a network error.
208 """
209 if cgi:
210 import warnings
211 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
212 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
213 variables = {}
214 variables.update(keywds)
215 return _open(cgi, variables)
216
218 """EGQuery provides Entrez database counts for a global search.
219
220 EGQuery provides Entrez database counts in XML for a single search
221 using Global Query.
222
223 See the online documentation for an explanation of the parameters:
224 http://www.ncbi.nlm.nih.gov/entrez/query/static/egquery_help.html
225
226 Return a handle to the results in XML format.
227
228 Raises an IOError exception if there's a network error.
229 """
230 if cgi:
231 import warnings
232 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
233 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi'
234 variables = {}
235 variables.update(keywds)
236 return _open(cgi, variables)
237
238 -def espell(cgi=None, **keywds):
239 """ESpell retrieves spelling suggestions, returned in a results handle.
240
241 ESpell retrieves spelling suggestions, if available.
242
243 See the online documentation for an explanation of the parameters:
244 http://www.ncbi.nlm.nih.gov/entrez/query/static/espell_help.html
245
246 Return a handle to the results, by default in XML format.
247
248 Raises an IOError exception if there's a network error.
249
250 Short example:
251
252 from Bio import Entrez
253 record = Entrez.read(Entrez.espell(term="biopythooon"))
254 print record["Query"]
255 print record["CorrectedQuery"]
256 """
257 if cgi:
258 import warnings
259 warnings.warn("Using a URL other than NCBI's main url for the E-Utilities is deprecated.", DeprecationWarning)
260 cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi'
261 variables = {}
262 variables.update(keywds)
263 return _open(cgi, variables)
264
266 """Parses an XML file from the NCBI Entrez Utilities into python objects.
267
268 This function parses an XML file created by NCBI's Entrez Utilities,
269 returning a multilevel data structure of Python lists and dictionaries.
270 Most XML files returned by NCBI's Entrez Utilities can be parsed by
271 this function, provided its DTD is available. Biopython includes the
272 DTDs for most commonly used Entrez Utilities.
273
274 Whereas the data structure seems to consist of generic Python lists,
275 dictionaries, strings, and so on, each of these is actually a class
276 derived from the base type. This allows us to store the attributes
277 (if any) of each element in a dictionary my_element.attributes, and
278 the tag name in my_element.tag.
279 """
280 from Parser import DataHandler
281 DTDs = os.path.join(__path__[0], "DTDs")
282 handler = DataHandler(DTDs)
283 record = handler.run(handle)
284 return record
285
286 -def _open(cgi, params={}):
287 """Helper function to build the URL and open a handle to it (PRIVATE).
288
289 Open a handle to Entrez. cgi is the URL for the cgi script to access.
290 params is a dictionary with the options to pass to it. Does some
291 simple error checking, and will raise an IOError if it encounters one.
292
293 This function also enforces the "three second rule" to avoid abusing
294 the NCBI servers.
295 """
296
297 delay = 3.0
298 current = time.time()
299 wait = _open.previous + delay - current
300 if wait > 0:
301 time.sleep(wait)
302 _open.previous = current + wait
303 else:
304 _open.previous = current
305
306 if not "tool" in params:
307 params["tool"] = "biopython"
308
309 options = urllib.urlencode(params, doseq=True)
310 cgi += "?" + options
311 handle = urllib.urlopen(cgi)
312
313
314 uhandle = File.UndoHandle(handle)
315
316
317
318 lines = []
319 for i in range(5):
320 lines.append(uhandle.readline())
321 for i in range(4, -1, -1):
322 uhandle.saveline(lines[i])
323 data = ''.join(lines)
324
325 if "500 Proxy Error" in data:
326
327 raise IOError, "500 Proxy Error (NCBI busy?)"
328 elif "502 Proxy Error" in data:
329 raise IOError, "502 Proxy Error (NCBI busy?)"
330 elif "WWW Error 500 Diagnostic" in data:
331 raise IOError, "WWW Error 500 Diagnostic (NCBI busy?)"
332 elif data.startswith("Error:") :
333
334 raise IOError, data.strip()
335 elif data.startswith("The resource is temporarily unavailable") :
336
337
338 raise IOError, "The resource is temporarily unavailable"
339 elif data.startswith("download dataset is empty") :
340
341
342 raise IOError, "download dataset is empty"
343 elif data[:5] == "ERROR":
344
345
346 raise IOError, "ERROR, possibly because id not available?"
347
348 return uhandle
349
350 _open.previous = 0
351