1 """Search and retrieve information given a set of database identifiers.
2
3 EUtils has two major modes. One uses history while the other uses
4 database identifiers. This is a high-level interface for working with
5 identifiers. You should use this module to get information about a
6 set of known database identifiers.
7
8 See HistoryClient if you want to work with a large number of
9 identifiers or potentially large search results.
10
11 >>> from Bio import EUtils
12 >>> from Bio.EUtils import DBIdsClient
13 >>> client = DBIdsClient.DBIdsClient()
14 >>> result = client.search("dalke", retmax = 100)
15 >>> len(result)
16 30
17 >>> print result[0].efetch(retmode = "text", rettype = "abstract").read()
18
19 1: Pac Symp Biocomput 1997;:85-96
20
21 Using Tcl for molecular visualization and analysis.
22
23 Dalke A, Schulten K.
24
25 Beckman Institute, Urbana, IL 61801, USA.
26
27 Reading and manipulating molecular structure data is a standard task in every
28 molecular visualization and analysis program, but is rarely available in a form
29 readily accessible to the user. Instead, the development of new methods for
30 analysis, display, and interaction is often achieved by writing a new program,
31 rather than building on pre-existing software. We present the Tcl-based script
32 language used in our molecular modeling program, VMD, and show how it can access
33 information about the molecular structure, perform analysis, and graphically
34 display and animate the results. The commands are available to the user and make
35 VMD a useful environment for studying biomolecules.
36
37
38 PMID: 9390282 [PubMed - indexed for MEDLINE]
39
40 >>>
41
42
43 Find sequences similar to GI:4579714 which were published in 2002.
44
45 >>> protein = DBIdsClient.from_dbids(EUtils.DBIds("protein", "4579714"))
46 >>> neighbors = protein.neighbor_links("protein",
47 ... daterange = EUtils.DateRange("2002/01/01", "2002/12/31", "pdat"))
48 >>> dbids = neighbors.linksetdbs["protein_protein"].dbids
49 >>> len(dbids)
50 28
51 >>> print dbids
52 DBIds(u'protein', [u'4579714', u'25298947', u'24158913', u'24158914', u'24158915', u'17942993', u'17942994', u'17942995', u'20150921', u'20150922', u'20151159', u'25298949', u'19716034', u'20663737', u'20663738', u'20663741', u'24987328', u'25533128', u'25298946', u'25298948', u'23008597', u'20219020', u'21218340', u'21218344', u'19075395', u'21218338', u'21218342', u'21311795'])
53 >>>
54 >>> print client.from_dbids(dbids[:5]).efetch(retmode="text",
55 ... rettype="summary").read()
56
57 1: BAA75200
58 Bacteriorhodopsin [Halobacterium sp.]
59 gi|4579714|dbj|BAA75200.1|[4579714]
60
61
62 2: H84300
63 bacteriorhodopsin [imported] - Halobacterium sp. NRC-1
64 gi|25298947|pir||H84300[25298947]
65
66
67 3: 1M0KA
68 Chain A, Bacteriorhodopsin K Intermediate At 1.43 A Resolution
69 gi|24158913|pdb|1M0K|A[24158913]
70
71
72 4: 1M0LA
73 Chain A, BacteriorhodopsinLIPID COMPLEX AT 1.47 A RESOLUTION
74 gi|24158914|pdb|1M0L|A[24158914]
75
76
77 5: 1M0MA
78 Chain A, Bacteriorhodopsin M1 Intermediate At 1.43 A Resolution
79 gi|24158915|pdb|1M0M|A[24158915]
80
81 >>>
82
83 """
84
85 import types
86 import parse, Mixins, Config, ThinClient, Datatypes
87
89 """Look up information about a DBIds
90
91 To get the list of dbids, as interpreted by fetching the
92 server's "uilist", use the "dbids" attribute.
93 """
94 - def __init__(self, eutils, records_dbids):
95 self.eutils = eutils
96 self.records_dbids = records_dbids
97
98 - def esummary(self, retmode = 'xml', rettype = None):
99 """call esummary on this DBIds; returns the socket handle"""
100 return self.eutils.esummary_using_dbids(
101 dbids = self.records_dbids)
102
106
107 - def elink(self,
108 db = "pubmed",
109 cmd = "neighbor",
110 term = None,
111 field = None,
112 daterange = None):
113 """call elink on this DBIds; returns the socket handle"""
114 return self.eutils.elink_using_dbids(
115 dbids = self.dbids,
116 db = db,
117 cmd = cmd,
118 daterange = daterange,
119 term = term,
120 field = field,
121 )
122
127 dbids = property(_get_dbids, None, None,
128 "The DBIds for this results set, validated from the server's 'uilist'")
129
130
132 """A single record on the server"""
135
137 """Support 'efetch' for sequence records"""
138 - def efetch(self, retmode = 'xml', rettype = None,
139 seq_start = None, seq_stop = None, strand = None,
140 complexity = None):
141 if strand not in (None, 1, 2):
142 raise TypeError("Strand can only be 1 (plus, default) or 2 (minus)")
143 return self.eutils.efetch_using_dbids(
144 dbids = self.records_dbids,
145 retmode = retmode,
146 rettype = rettype,
147 seq_start = seq_start,
148 seq_stop = seq_stop,
149 strand = strand,
150 complexity = complexity)
151
152 -class SequenceDBIdsRecord(Mixins.SequenceFetchMixin,
153 SequenceDBIdsFetchMixin,
154 DBIdsRecord):
155 """a single sequence record, referenced by database identifier"""
156 pass
157
159 """Support 'efetch' for publication records"""
160 - def efetch(self, retmode = "xml", rettype = None):
161 return self.eutils.efetch_using_dbids(
162 dbids = self.records_dbids,
163 retmode = retmode,
164 rettype = rettype)
165
169 """a single publication record, referenced by database identifier"""
170 pass
171
173 """Base class for dealing with a set of records, reference by identifier"""
174 - def __init__(self, eutils, records_dbids, metadata = None):
177
179 """Number of records referenced by this RecordSet"""
180 return len(self.records_dbids)
181
183 """Return subset of the records"""
184 if isinstance(i, types.SliceType):
185
186 if i.step is None:
187 return self.__class__(
188 self.eutils,
189 self.records_dbids[i.start:i.stop])
190 return self.__class__(
191 self.eutils,
192 self.records_dbids[i.start:i.stop:i.step])
193
194 return self._record_class(self.eutils, self.records_dbids.item(i))
195
199 """a set of sequence records, referenced by database identifier"""
200 _record_class = SequenceDBIdsRecord
201
205 """a set of publication records, referenced by database identifier"""
206 _record_class = PublicationDBIdsRecord
207
208
218
219 -def from_dbids(dbids, dbtype = None, eutils = None):
220 """create a RecordSet interface for the set of database identifiers
221
222 Parameters are:
223 dbids -- a DBIds
224 dbtype -- the dbtype to use (EUtils.Config.{SEQUENCE,PUBLIATION}_TYPE)
225 in case dbids.db isn't in the list of know NCBI databases.
226 Defaults to None.
227 eutils -- the ThinClient to use, defaults to creating a new
228 ThinClient.ThinClient()
229 """
230 return DBIdsClient(eutils).from_dbids(dbids, dbtype)
231
233 """Create a RecordSet either from a search or a set of dbids
234
235 The constructor takes an optional ThinClient to use for
236 connecting to NCBI.
237 """
242
244 """Return a RecordSet given the DBIds
245
246 This RecordSet can be used to fetch data from NCBI
247 related to the given DBIds.
248 """
249 set_klass = _get_recordset_constructor(dbids.db, dbtype)
250 return set_klass(self.eutils, dbids, None)
251
252 - def search(self,
253 term,
254 db = "pubmed",
255 field = None,
256
257 retstart = 0,
258 retmax = 20,
259
260 daterange = None,
261 dbtype = None,
262 ):
263 """do an Entrez search
264
265 The parameters are:
266 'term' -- the query string in the Entrez query language; see
267 http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
268 'db' -- the database to search
269
270 'field' -- the field to use for unqualified words
271 Eg, "dalke[au] AND gene" with field==None becomes
272 dalke[au] AND (genes[MeSH Terms] OR gene[Text Word]
273 and "dalke[au] AND gene" with field=="au" becomes
274 dalke[au] AND genes[Author]
275 (Yes, I think the first "au" should be "Author" too)
276
277 'retstart' -- include identifiers in the output, starting with
278 position 'retstart' (normally starts with 0)
279 'retmax' -- return at most 'retmax' identifiers in the output
280 (if not specified, NCBI returns 20 identifiers)
281 'daterange' -- a date restriction; either WithinNDays or DateRange
282
283 'dbtype' -- (optional) the database type (Config.PUBLICATION_TYPE
284 or SEQUENCE_TYPE). Overrides the type based on the 'db'
285 """
286 set_klass = _get_recordset_constructor(db, dbtype)
287 infile = self.eutils.esearch(
288 term = term,
289 db = db,
290 field = field,
291 retstart = retstart,
292 retmax = retmax,
293 daterange = daterange)
294 searchinfo = parse.parse_search(infile, [None])
295
296 dbids = Datatypes.DBIds(db, searchinfo.ids)
297 return set_klass(self.eutils, dbids, searchinfo)
298