Package Bio :: Package EUtils :: Module DBIdsClient
[hide private]
[frames] | no frames]

Source Code for Module Bio.EUtils.DBIdsClient

  1  """Search and retrieve information given a set of database identifiers. 
  2   
  3  EUtils has two major modes.  One uses history while the other uses 
  4  database identifiers.  This is a high-level interface for working with 
  5  identifiers.  You should use this module to get information about a 
  6  set of known database identifiers. 
  7   
  8  See HistoryClient if you want to work with a large number of 
  9  identifiers or potentially large search results. 
 10   
 11  >>> from Bio import EUtils 
 12  >>> from Bio.EUtils import DBIdsClient 
 13  >>> client = DBIdsClient.DBIdsClient() 
 14  >>> result = client.search("dalke", retmax = 100) 
 15  >>> len(result) 
 16  30 
 17  >>> print result[0].efetch(retmode = "text", rettype = "abstract").read() 
 18   
 19  1: Pac Symp Biocomput  1997;:85-96 
 20   
 21  Using Tcl for molecular visualization and analysis. 
 22   
 23  Dalke A, Schulten K. 
 24   
 25  Beckman Institute, Urbana, IL 61801, USA. 
 26   
 27  Reading and manipulating molecular structure data is a standard task in every 
 28  molecular visualization and analysis program, but is rarely available in a form 
 29  readily accessible to the user. Instead, the development of new methods for 
 30  analysis, display, and interaction is often achieved by writing a new program, 
 31  rather than building on pre-existing software. We present the Tcl-based script 
 32  language used in our molecular modeling program, VMD, and show how it can access 
 33  information about the molecular structure, perform analysis, and graphically 
 34  display and animate the results. The commands are available to the user and make 
 35  VMD a useful environment for studying biomolecules. 
 36   
 37   
 38  PMID: 9390282 [PubMed - indexed for MEDLINE] 
 39   
 40  >>> 
 41   
 42   
 43  Find sequences similar to GI:4579714 which were published in 2002. 
 44   
 45  >>> protein = DBIdsClient.from_dbids(EUtils.DBIds("protein", "4579714")) 
 46  >>> neighbors = protein.neighbor_links("protein", 
 47  ...        daterange = EUtils.DateRange("2002/01/01", "2002/12/31", "pdat")) 
 48  >>> dbids = neighbors.linksetdbs["protein_protein"].dbids 
 49  >>> len(dbids) 
 50  28 
 51  >>> print dbids 
 52  DBIds(u'protein', [u'4579714', u'25298947', u'24158913', u'24158914', u'24158915', u'17942993', u'17942994', u'17942995', u'20150921', u'20150922', u'20151159', u'25298949', u'19716034', u'20663737', u'20663738', u'20663741', u'24987328', u'25533128', u'25298946', u'25298948', u'23008597', u'20219020', u'21218340', u'21218344', u'19075395', u'21218338', u'21218342', u'21311795']) 
 53  >>>  
 54  >>> print client.from_dbids(dbids[:5]).efetch(retmode="text", 
 55  ...                                           rettype="summary").read() 
 56   
 57  1: BAA75200 
 58  Bacteriorhodopsin [Halobacterium sp.] 
 59  gi|4579714|dbj|BAA75200.1|[4579714] 
 60   
 61   
 62  2: H84300 
 63  bacteriorhodopsin [imported] - Halobacterium sp. NRC-1 
 64  gi|25298947|pir||H84300[25298947] 
 65   
 66   
 67  3: 1M0KA 
 68  Chain A, Bacteriorhodopsin K Intermediate At 1.43 A Resolution 
 69  gi|24158913|pdb|1M0K|A[24158913] 
 70   
 71   
 72  4: 1M0LA 
 73  Chain A, BacteriorhodopsinLIPID COMPLEX AT 1.47 A RESOLUTION 
 74  gi|24158914|pdb|1M0L|A[24158914] 
 75   
 76   
 77  5: 1M0MA 
 78  Chain A, Bacteriorhodopsin M1 Intermediate At 1.43 A Resolution 
 79  gi|24158915|pdb|1M0M|A[24158915] 
 80   
 81  >>> 
 82   
 83  """ 
 84   
 85  import types 
 86  import parse, Mixins, Config, ThinClient, Datatypes 
 87   
88 -class DBIdsLookup(object):
89 """Look up information about a DBIds 90 91 To get the list of dbids, as interpreted by fetching the 92 server's "uilist", use the "dbids" attribute. 93 """
94 - def __init__(self, eutils, records_dbids):
95 self.eutils = eutils 96 self.records_dbids = records_dbids
97
98 - def esummary(self, retmode = 'xml', rettype = None):
99 """call esummary on this DBIds; returns the socket handle""" 100 return self.eutils.esummary_using_dbids( 101 dbids = self.records_dbids)
102
103 - def summary(self):
104 """get the summary for these DBIds, parsed into a Datatypes.Summary""" 105 return parse.parse_summary_xml(self.esummary("xml"))
106 122
123 - def _get_dbids(self):
124 infile = self.efetch(retmode = "text", rettype = "uilist") 125 ids = parse.parse_fetch_identifiers(infile) 126 return Datatypes.DBIds(self.records_dbids.db, ids)
127 dbids = property(_get_dbids, None, None, 128 "The DBIds for this results set, validated from the server's 'uilist'")
129 130
131 -class DBIdsRecord(DBIdsLookup):
132 """A single record on the server"""
133 - def summary(self):
134 return DBIdsLookup.summary(self)[0]
135
136 -class SequenceDBIdsFetchMixin:
137 """Support 'efetch' for sequence records"""
138 - def efetch(self, retmode = 'xml', rettype = None, 139 seq_start = None, seq_stop = None, strand = None, 140 complexity = None):
141 if strand not in (None, 1, 2): 142 raise TypeError("Strand can only be 1 (plus, default) or 2 (minus)") 143 return self.eutils.efetch_using_dbids( 144 dbids = self.records_dbids, 145 retmode = retmode, 146 rettype = rettype, 147 seq_start = seq_start, 148 seq_stop = seq_stop, 149 strand = strand, 150 complexity = complexity)
151
152 -class SequenceDBIdsRecord(Mixins.SequenceFetchMixin, 153 SequenceDBIdsFetchMixin, 154 DBIdsRecord):
155 """a single sequence record, referenced by database identifier""" 156 pass
157
158 -class PublicationDBIdsFetchMixin:
159 """Support 'efetch' for publication records"""
160 - def efetch(self, retmode = "xml", rettype = None):
161 return self.eutils.efetch_using_dbids( 162 dbids = self.records_dbids, 163 retmode = retmode, 164 rettype = rettype)
165
166 -class PublicationDBIdsRecord(Mixins.PublicationFetchMixin, 167 PublicationDBIdsFetchMixin, 168 DBIdsRecord):
169 """a single publication record, referenced by database identifier""" 170 pass
171
172 -class BaseDBIdsRecordSet(DBIdsLookup):
173 """Base class for dealing with a set of records, reference by identifier"""
174 - def __init__(self, eutils, records_dbids, metadata = None):
175 DBIdsLookup.__init__(self, eutils, records_dbids) 176 self.metadata = metadata
177
178 - def __len__(self):
179 """Number of records referenced by this RecordSet""" 180 return len(self.records_dbids)
181
182 - def __getitem__(self, i):
183 """Return subset of the records""" 184 if isinstance(i, types.SliceType): 185 # Metadata is not passed downwards 186 if i.step is None: 187 return self.__class__( 188 self.eutils, 189 self.records_dbids[i.start:i.stop]) 190 return self.__class__( 191 self.eutils, 192 self.records_dbids[i.start:i.stop:i.step]) 193 194 return self._record_class(self.eutils, self.records_dbids.item(i))
195
196 -class SequenceDBIdsRecordSet(Mixins.SequenceFetchMixin, 197 SequenceDBIdsFetchMixin, 198 BaseDBIdsRecordSet):
199 """a set of sequence records, referenced by database identifier""" 200 _record_class = SequenceDBIdsRecord
201
202 -class PublicationDBIdsRecordSet(Mixins.PublicationFetchMixin, 203 PublicationDBIdsFetchMixin, 204 BaseDBIdsRecordSet):
205 """a set of publication records, referenced by database identifier""" 206 _record_class = PublicationDBIdsRecord
207 208
209 -def _get_recordset_constructor(db, dbtype):
210 """get the right DataSet constructor for a database""" 211 dbtype = Config.databases.gettype(db, dbtype) 212 if dbtype == Config.SEQUENCE_TYPE: 213 return SequenceDBIdsRecordSet 214 elif dbtype == Config.PUBLICATION_TYPE: 215 return PublicationDBIdsRecordSet 216 else: 217 raise TypeError("Unknown database type: %r" % (dbtype,))
218
219 -def from_dbids(dbids, dbtype = None, eutils = None):
220 """create a RecordSet interface for the set of database identifiers 221 222 Parameters are: 223 dbids -- a DBIds 224 dbtype -- the dbtype to use (EUtils.Config.{SEQUENCE,PUBLIATION}_TYPE) 225 in case dbids.db isn't in the list of know NCBI databases. 226 Defaults to None. 227 eutils -- the ThinClient to use, defaults to creating a new 228 ThinClient.ThinClient() 229 """ 230 return DBIdsClient(eutils).from_dbids(dbids, dbtype)
231
232 -class DBIdsClient:
233 """Create a RecordSet either from a search or a set of dbids 234 235 The constructor takes an optional ThinClient to use for 236 connecting to NCBI. 237 """
238 - def __init__(self, eutils = None):
239 if eutils is None: 240 eutils = ThinClient.ThinClient() 241 self.eutils = eutils
242
243 - def from_dbids(self, dbids, dbtype = None):
244 """Return a RecordSet given the DBIds 245 246 This RecordSet can be used to fetch data from NCBI 247 related to the given DBIds. 248 """ 249 set_klass = _get_recordset_constructor(dbids.db, dbtype) 250 return set_klass(self.eutils, dbids, None)
251
252 - def search(self, 253 term, 254 db = "pubmed", 255 field = None, 256 257 retstart = 0, 258 retmax = 20, 259 260 daterange = None, 261 dbtype = None, 262 ):
263 """do an Entrez search 264 265 The parameters are: 266 'term' -- the query string in the Entrez query language; see 267 http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html 268 'db' -- the database to search 269 270 'field' -- the field to use for unqualified words 271 Eg, "dalke[au] AND gene" with field==None becomes 272 dalke[au] AND (genes[MeSH Terms] OR gene[Text Word] 273 and "dalke[au] AND gene" with field=="au" becomes 274 dalke[au] AND genes[Author] 275 (Yes, I think the first "au" should be "Author" too) 276 277 'retstart' -- include identifiers in the output, starting with 278 position 'retstart' (normally starts with 0) 279 'retmax' -- return at most 'retmax' identifiers in the output 280 (if not specified, NCBI returns 20 identifiers) 281 'daterange' -- a date restriction; either WithinNDays or DateRange 282 283 'dbtype' -- (optional) the database type (Config.PUBLICATION_TYPE 284 or SEQUENCE_TYPE). Overrides the type based on the 'db' 285 """ 286 set_klass = _get_recordset_constructor(db, dbtype) 287 infile = self.eutils.esearch( 288 term = term, 289 db = db, 290 field = field, 291 retstart = retstart, 292 retmax = retmax, 293 daterange = daterange) 294 searchinfo = parse.parse_search(infile, [None]) 295 296 dbids = Datatypes.DBIds(db, searchinfo.ids) 297 return set_klass(self.eutils, dbids, searchinfo)
298