Package Bio :: Module EZRetrieve
[hide private]
[frames] | no frames]

Source Code for Module Bio.EZRetrieve

 1  """This module contains code to access EZRetrieve (OBSOLETE). 
 2   
 3  This is a very simple interface to the EZRetrieve website described in: 
 4   
 5  Zhang, H., Ramanathan, Y., Soteropoulos, P., Recce, M., and Tolias, P.P. (2002). 
 6  EZ-Retrieve: A web-server for batch retrieval of coordinate-specified human 
 7  DNA sequences and underscoring putative transcription factor-binding sites. 
 8  Nucl. Acids. Res. 2002 30: e121. 
 9  http://dx.doi.org/10.1093/nar/gnf120 
10   
11  Functions: 
12  retrieve_single  Retrieve a single sequence from EZRetrieve. 
13  parse_single     Parse the results from EZRetrieve into FASTA format. 
14   
15  This module is now considered to be obsolete, and is likely to be deprecated 
16  in a future release of Biopython, and later removed. 
17  """ 
18   
19 -def retrieve_single(id, from_, to, retrieve_by=None, organism=None, 20 parse_results=1):
21 import urllib 22 23 CGI = "http://siriusb.umdnj.edu:18080/EZRetrieve/single_r_run.jsp" 24 org2value = {"Hs" : "0", "Mm" : "1", "Rn" : 2} 25 organism = organism or "Hs" 26 assert organism in org2value 27 28 acctype2value = {"genbank":0, "unigene":1, "locuslink":2, "image":3} 29 retrieve_by = retrieve_by or "GenBank" 30 retrieve_by = retrieve_by.lower() 31 assert retrieve_by in acctype2value 32 33 params = { 34 "input" : str(id), 35 "from" : str(from_), 36 "to" : str(to), 37 "org" : org2value[organism], 38 "AccType" : acctype2value[retrieve_by], 39 } 40 options = urllib.urlencode(params) 41 handle = urllib.urlopen(CGI, options) 42 if parse_results: 43 results = parse_single(handle) 44 else: 45 results = handle.read() 46 return results
47
48 -def parse_single(handle):
49 """Return a FASTA-formatted string for the sequence. May raise an 50 AssertionError if there was a problem retrieving the sequence. 51 52 """ 53 import re 54 results = handle.read() 55 lresults = results.lower() 56 57 i = results.find("Error: ") 58 if i >= 0: 59 j = lresults.index("<br>", i) 60 errmsg = results[i:j].strip() 61 raise AssertionError(errmsg) 62 63 i = lresults.find("<b>>") 64 assert i >= 0, "Couldn't find sequence." 65 j = lresults.find("<br><br>", i) 66 seqdata = results[i:j] 67 reobj = re.compile(r"<[^>]*>", re.IGNORECASE|re.DOTALL) 68 seqdata = reobj.sub("", seqdata) 69 seqdata = re.sub(r"\s+", r"\n", seqdata) 70 seqdata = seqdata.strip() + "\n" 71 return seqdata
72