1 """This module contains code to access EZRetrieve (OBSOLETE).
2
3 This is a very simple interface to the EZRetrieve website described in:
4
5 Zhang, H., Ramanathan, Y., Soteropoulos, P., Recce, M., and Tolias, P.P. (2002).
6 EZ-Retrieve: A web-server for batch retrieval of coordinate-specified human
7 DNA sequences and underscoring putative transcription factor-binding sites.
8 Nucl. Acids. Res. 2002 30: e121.
9 http://dx.doi.org/10.1093/nar/gnf120
10
11 Functions:
12 retrieve_single Retrieve a single sequence from EZRetrieve.
13 parse_single Parse the results from EZRetrieve into FASTA format.
14
15 This module is now considered to be obsolete, and is likely to be deprecated
16 in a future release of Biopython, and later removed.
17 """
18
19 -def retrieve_single(id, from_, to, retrieve_by=None, organism=None,
20 parse_results=1):
21 import urllib
22
23 CGI = "http://siriusb.umdnj.edu:18080/EZRetrieve/single_r_run.jsp"
24 org2value = {"Hs" : "0", "Mm" : "1", "Rn" : 2}
25 organism = organism or "Hs"
26 assert organism in org2value
27
28 acctype2value = {"genbank":0, "unigene":1, "locuslink":2, "image":3}
29 retrieve_by = retrieve_by or "GenBank"
30 retrieve_by = retrieve_by.lower()
31 assert retrieve_by in acctype2value
32
33 params = {
34 "input" : str(id),
35 "from" : str(from_),
36 "to" : str(to),
37 "org" : org2value[organism],
38 "AccType" : acctype2value[retrieve_by],
39 }
40 options = urllib.urlencode(params)
41 handle = urllib.urlopen(CGI, options)
42 if parse_results:
43 results = parse_single(handle)
44 else:
45 results = handle.read()
46 return results
47
49 """Return a FASTA-formatted string for the sequence. May raise an
50 AssertionError if there was a problem retrieving the sequence.
51
52 """
53 import re
54 results = handle.read()
55 lresults = results.lower()
56
57 i = results.find("Error: ")
58 if i >= 0:
59 j = lresults.index("<br>", i)
60 errmsg = results[i:j].strip()
61 raise AssertionError(errmsg)
62
63 i = lresults.find("<b>>")
64 assert i >= 0, "Couldn't find sequence."
65 j = lresults.find("<br><br>", i)
66 seqdata = results[i:j]
67 reobj = re.compile(r"<[^>]*>", re.IGNORECASE|re.DOTALL)
68 seqdata = reobj.sub("", seqdata)
69 seqdata = re.sub(r"\s+", r"\n", seqdata)
70 seqdata = seqdata.strip() + "\n"
71 return seqdata
72