1 """Various EUtils datatypes."""
2
3 import re, types
4
6 """Base class for all EUtils-specific errors
7
8 Contains a single error string -- use str(err) to get it.
9 """
10 pass
11
13 """Used when the ESearch XML says there is an ERROR
14
15 The main error is in err.errmsg but more information
16 may be available in err.errors or err.warnings. Eg,
17 the error message is often "Can't run executor" but
18 you can get more information from the list of errors.
19
20 """
21 - def __init__(self, errmsg, errors = None, warnings = None):
22 EUtilsError.__init__(self, errmsg)
23
24 if errors is None: errors = []
25 if warnings is None: warnings = []
26
27 self.errmsg = errmsg
28 self.errors = errors
29 self.warnings = warnings
31 return "%s(%r, %r, %r)" % (self.__class__.__name__,
32 self.errmsg, self.errors, self.warnings)
34 s = self.errmsg
35 if self.errors:
36 s = s + "; ERRORS: " + ", ".join(map(str, self.errors))
37 if self.warnings:
38 s = s + "; WARNINGS: " + ", ".join(map(str, self.warnings))
39 return s.encode("latin1")
40
41
42
43
45 """Store a list of identifiers for a database
46
47 This is used as input for the '*_using_dbids' functions.
48
49 Constructed with the database name and list of identifier strings.
50
51 """
53 """db, ids
54
55 'db' -- the database for those identifiers
56 'ids' -- a list of identifiers for the given database
57 """
58 self.db = db
59 self.ids = ids
61 """number of identifers"""
62 return len(self.ids)
64 """get an identifier or a subset of the DBIds"""
65 if isinstance(i, types.SliceType):
66
67
68 step = i.step
69 start = i.start
70 if start is None: start = 0
71 stop = i.stop
72 if stop is None: stop = len(self.ids)
73 if step is None:
74 return self.__class__(self.db, self.ids[start:stop])
75 else:
76 return self.__class__(self.db, self.ids[start:stop:step])
77
78
79 return self.ids[i]
81 """Get a DBIds containing the item at position i
82
83 Can't use dbids[i] since that returns only the identifier.
84 This returns a DBIds, which can be used for another request.
85 """
86 return self.__class__(self.db, [self.ids[i]])
87
89 """Iterate over the list of identifiers"""
90 return iter(self.ids)
92 return "DBIds(%r, %r)" % (self.db, self.ids)
94 """does this DBIds equal the other?
95
96 The database names must match, but the identifiers
97 themselves can be in any order.
98 """
99 if self.ids == other.ids:
100 return self.db == other.db
101 if self.db != other.db:
102 return 0
103
104
105
106 d1 = {}
107 for x in self.ids:
108 d1[x] = 0
109 d2 = {}
110 for x in other.ids:
111 d2[x] = 0
112 return d1 == d2
114 """check if this isn't equal to the other DBIds"""
115 return not self == other
116
118 """DBIds of the identifiers in this set which aren't in the other"""
119 if self.db != other.db:
120 raise TypeError("Different databases: %r and %r" % (
121 self.db, other.db))
122 other_d = {}
123 for x in other.ids:
124 other_d[x] = 0
125 new_ids = [x for x in self.ids if x not in other_d]
126 return DBIds(self.db, new_ids)
127
129 """Restrict a search to matches in the last N days
130
131 Eg, to see what's been published in PubMed about rabies
132 in the last 20 days.
133
134 client.search("rabies", daterange = WithinNDays(20, "pdat")
135 """
136 - def __init__(self, ndays, datetype = None):
137 """ndays, datetype = None
138
139 'ndays' -- within this many days of now (the 'reldate' field
140 of a search)
141 'datetype' -- the date field to use (defaults to Entrez date,
142 which is "edat")
143 """
144 self.ndays = ndays
145 self.datetype = datetype
147 """returns the fields to add to the EUtils query
148
149 This is an internal implementation feature you can ignore.
150 """
151 return {"reldate": self.ndays,
152 "datetype": self.datetype}
153
154
155 _date_re_match = re.compile(r"\d{4}(/\d\d(/\d\d)?)?$").match
156
158 """Restrict a search to matches within a date range
159
160 Some examples:
161 matches between 1995 and 2000 -- DateRange("1995", "1999/12/31")
162 matches before 1990 -- DateRange(maxdate = "1990/01/01")
163 matches in 2002 or later -- DateRange(mindate = "2002/01/01")
164 matches in June or July of 2001 -- DateRange("2001/06", "2001/07")
165
166 """
167 - def __init__(self, mindate = None, maxdate = None, datetype = None):
168 """mindate = None, maxdate = None, datetype = None
169
170 'mindate' -- matches must be on or after this date
171 'maxdate' -- matches must be on or before this date
172 'datetype' -- the date field to use for the search (defaults
173 to Entrez date, which is "edat")
174
175 At least one of mindate or maxdate must be specified.
176 If mindate is omitted, all results on or before maxdate are returned.
177 If maxdate is omitted, all results on or after mindate are returned.
178
179 Dates must be formatted as 'YYYY/MM/DD', 'YYYY/MM', or 'YYYY'.
180 """
181 if mindate is None and maxdate is None:
182 raise TypeError("Must specify at least one of mindate or maxdate")
183
184 errinfo = None
185 if mindate is not None and _date_re_match(mindate) is None:
186 errinfo = ("mindate", mindate)
187 elif maxdate is not None and _date_re_match(maxdate) is None:
188 errinfo = ("maxdate", maxdate)
189 if errinfo:
190 raise TypeError(
191 "%s is not in YYYY/MM/DD format (month and "
192 "day are optional): %r" % errinfo)
193 self.mindate = mindate
194 self.maxdate = maxdate
195 self.datetype = datetype
196
198 """returns the fields to add to the EUtils query
199
200 This is an internal implementation feature you can ignore.
201 """
202 return {"mindate": str(self.mindate),
203 "maxdate": str(self.maxdate),
204 "datetype": self.datetype}
205
206
207
209 """Base class for the Expression given in the eSearch output
210
211 NCBI does some processing on the request. They return the
212 translated expression as part of the search results. To get the
213 expression as an Entrez string, use str(expression).
214
215 iter(expression) traverses the expression tree in postfix order.
216 """
218 """intersection of two expressions"""
219 return And(self, other)
221 """union of two expressions"""
222 return Or(self, other)
224 """Traverse the tree in postfix order"""
225 raise NotImplementedError
226
227 -class Term(Expression):
228 """Information about an Expression Term, which is the leaf node
229
230 The fields are:
231 term -- a word from the search term
232 field -- the field searched by this term
233 count -- the number of records matching this word
234 explode -- no idea
235 """
236 - def __init__(self, term, field, count, explode):
237 self.term = term
238 self.field = field
239 self.count = count
240 self.explode = explode
244 """Traverse the tree in postfix order"""
245 yield self
246
248 """Base class for binary expressions. Has a left and a right child"""
250 self.left = left
251 self.right = right
253 """Traverse the tree in postfix order"""
254 for x in self.left:
255 yield x
256 for x in self.right:
257 yield x
258 yield self
259
260
261
262
263 -class And(BinaryOp):
264 """intersection of two subexpressions"""
266 return "(%s AND %s)" % (self.left, self.right)
267
269 """union two subexpressions"""
271 return "(%s OR %s)" % (self.left, self.right)
272
273
274 -class Not(BinaryOp):
275 """the set of the left child without elements from the right child
276
277 This is used for something like "poliovirus NOT polio"
278 """
280 return "(%s NOT %s)" % (self.left, self.right)
281
283 """Used to store a date range"""
285 if left.field != right.field:
286 raise TypeError("dates must have the same field: %r and %r" %
287 (left.field, right.field))
288 BinaryOp.__init__(self, left, right)
289
291 i = self.left.term.rfind("[")
292 if i == -1:
293 i = len(self.left.term)
294 x = self.left.term[:i]
295
296 i = self.right.term.rfind("[")
297 if i == -1:
298 i = len(self.right.term)
299 y = self.right.term[:i]
300
301 return "%s:%s[%s]" % (x, y, self.left.field)
302
303
304
306 """Store results from a database search
307
308 Attributes are:
309 count -- total number of matches to the query
310 retmax -- total number of identifiers requested
311 retstart -- a search can return a portion of the total
312 number of results. retstart is the offset into this list
313 ids -- matching identifiers (may be a subset of the full list)
314 translation_set -- dict mapping an input name to the canonical
315 form prefered by NCBI
316 expression -- the full equery as understood by NCBI
317 webenv -- the WebEnv string (if use_history is set)
318 query_key -- the query_key (if use_history is set)
319 errors -- list of Problems in the ErrorList
320 warnings -- list of Problems in the WarningList
321 timestamp -- timestamp (from time.time()) when this record
322 was received from the server.
323
324 Returns a list of identifers instead of a DBIds because the output
325 from NCBI's eSearch doesn't include the database name.
326 """
327 - def __init__(self,
328 count, retmax, retstart, ids,
329 translation_set, expression,
330 webenv, query_key, errors,
331 warnings, timestamp):
332 self.count = count
333 self.retmax = retmax
334 self.retstart = retstart
335 self.ids = ids
336 self.translation_set = translation_set
337 self.expression = expression
338 self.webenv = webenv
339 self.query_key = query_key
340 self.errors = errors
341 self.warnings = warnings
342 self.timestamp = timestamp
343
345 """Store the results of a Post
346
347 Attributes are:
348 webenv -- the WebEnv string
349 query_key -- the query_ket
350 timestamp -- timestamp (from time.time()) when this record
351 was received from the server.
352 """
353 - def __init__(self, webenv, query_key, invalid_ids, timestamp):
354 self.webenv = webenv
355 self.query_key = query_key
356 self.invalid_ids = invalid_ids
357 self.timestamp = timestamp
358
360 """Store information from calling eSummary
361
362 Attributes are:
363 id -- the identifier string for this record
364 dataitems -- an OrderedDictList containing the parsed Item
365 elements for this Summary.
366 """
368 self.id = id
369 self.dataitems = dataitems
371 return "Summary(%r, %r)" % (self.id, self.dataitems)
373 return "<Summary id=%s, %s>" % (self.id, self.dataitems)
374
375
377 """Allow simple Date storage
378
379 Parameters and attributes are 'year', 'month', and 'day'
380 """
382 self.year = year
383 self.month = month
384 self.day = day
386 return "%s(%r, %r, %r)" % (self.__class__.__name__,
387 self.year, self.month, self.day)
389 return "%4d/%02d/%02d" % (self.year, self.month, self.day)
391 """Return the 9-tuple needed by various time functions"""
392
393
394 return (self.year, self.month, self.day, 0, 0, 0, 0, 0, -1)
396 """Are these two times equal?"""
397 return (self.year == other.year and
398 self.month == other.month and
399 self.day == other.day)
401 """Are these two times dissimilar?"""
402 return not self == other
403
404
405
406
407
408
409
410
412 """Base class for Search Errors or Warnings
413
414 A problem has:
415 text -- the text of the problem
416 severity -- either Problem.ERROR or Problem.WARNING
417 category -- how NCBI categorizes this problem
418 """
419 ERROR = "ERROR"
420 WARNING = "WARNING"
428 return not self == other
430 return "%s(%r)" % (self.__class__.__name__, self.text)
432 return str(self.text)
433
436
439
442
446
449
452
455
457 """Internal: make a map from category name (in XML) to the right class"""
458 mapping = {}
459 for v in globals().values():
460 try:
461 if issubclass(v, Problem) and hasattr(v, "category"):
462 mapping[v.category] = v
463 except TypeError:
464 pass
465 return mapping
466
467 problem_category_mapping = _build_problem_mapping()
468
469
470
472 """Store neighbor Link information for a given record
473
474 Attributes are;
475 id -- the identifier used as the input for the neighbor request
476 score -- the amount of similarity, high numbers are better
477 """
484 return not self == other
486 return "Link(%r, %r)" % (self.id, self.score)
487
489 """Store results from an lcheck link
490
491 Attributes are:
492 id -- the id of the requested record
493 has_linkout -- boolean, either it does or doesn't
494 has_neighbor -- boolean, either it does or doesn't
495 """
496 - def __init__(self, id, has_linkout = 0, has_neighbor = 0):
497 self.id = id
498 self.has_linkout = has_linkout
499 self.has_neighbor = has_neighbor
501 return (self.id == other.id and
502 self.has_linkout == other.has_linkout and
503 self.has_neighbor == other.has_neighbor)
505 return not self == other
507 return "IdCheck(%r, %r, %r)" % (self.id, self.has_linkout, self.has_neighbor)
508
510 """Used in eLink with cmd == neighbor
511
512 Attributes are:
513 dbto -- the links are TO this database name
514 linkname -- the name for this set (eg, "pubmed_protein")
515 links -- list of Links, one per matching record (includes score)
516 List order is the sames as the XML, which is ordered from
517 most likely to least. The identifer is from 'dbto'
518 info -- ignored; this is only used as a warning when there is
519 an empty list
520
521 You can also use
522 dbids -- get a DBIds of dbto and the identifiers in each Link
523 """
524 - def __init__(self, dbto, linkname, links = None, info = None):
525 if links is None:
526 if info is None:
527 raise TypeError("At least one of 'links' and 'info' must be set")
528 links = []
529 self.dbto = dbto
530 self.linkname = linkname
531 self.links = links
532
535 dbids = property(_get_dbids)
536
538 return (self.dbto == other.dbto and
539 self.linkname == other.linkname and
540 self.links == other.links)
542 return not self == other
544 return "LinkSetDb(%r, %r, %r)" % (self.dbto, self.linkname, self.links)
545
547 """Results from an eLink neighbor search
548
549 Attributes are:
550 dbids -- the DBIds of the *REQUESTED* identifiers
551 linksetdbs -- an OrderedMultiDict of LinkSetDb objects
552
553 """
555 self.dbids = dbids
556 self.linksetdbs = linksetdbs
558 return (self.dbids == other.dbids and
559 self.linksetdbs == other.linksetdbs)
561 return not self == other
562
564 return "NeighborLinkSet(%r, %r)" % (self.dbids, self.linksetdbs)
565
566
568 """Results from 'ncheck' and 'lcheck' searches
569
570 This is used to check if a set of records has neighbors
571 or links.
572
573 Attributes are:
574 dbfrom -- the database containing those records
575 idchecks -- list of IdCheck objects, one per id
576
577 dbids -- the DBIds make from dbfrom and the idchecks
578 """
580 self.dbfrom = dbfrom
581 self.idchecks = idchecks
582
584 return DBIds(self.dbfrom, [idcheck.id for idcheck in self.idchecks])
585 dbids = property(_get_dbids)
586
588 return (self.dbfrom == other.dbfrom and
589 self.idchecks == other.idchecks)
591 return not self == other
593 return "CheckLinkSet(%r, %r)" % (self.dbfrom, self.idchecks)
594
595
596
598 """The Provider, as listed in 'llinks' (LinkOut)
599
600 Attributes are:
601 name -- name of the provider
602 name_abbr -- an abbreviated name for the provider
603 id -- a unique id for the provider
604 url -- where to go for more information about the provider
605 icon_url -- a small image to use for the provider
606
607 """
608 - def __init__(self, name, name_abbr, id,
609 url = None, icon_url = None):
610 self.name = name
611 self.name_abbr = name_abbr
612 self.id = id
613 self.url = url
614 self.icon_url = icon_url
616 return (self.name == other.name and
617 self.name_abbr == other.name_abbr and
618 self.id == other.id and
619 self.url == other.url and
620 self.icon_url == other.icon_url)
622 return not self == other
624 return "Provider(%r, %r, %r, %r, %r)" % (
625 self.name, self.name_abbr, self.id, self.url, self.icon_url)
626
627
629 """The ObjUrl containing LinkOut information for a record
630
631 Attributes are:
632 subject_types -- list of strings describing this link (0 or more)
633 provider -- a Provider instance
634 linkname -- a name used to categorize this link (optional)
635 attributes -- list of attributes (text strings), (0 or more)
636 url -- URL of the link (optional)
637 iconurl -- URL containing image for this link (optional)
638 """
639 - def __init__(self, subject_types, provider,
640 linkname = None, url = None, attributes = None):
641 assert isinstance(subject_types, list)
642 self.subject_types = subject_types
643 self.provider = provider
644 self.linkname = linkname
645 if attributes is None:
646 attributes = []
647 self.url = url
648 self.attributes = attributes
650 return (self.linkname == other.linkname and
651 self.subject_types == other.subject_types and
652 self.url == other.url and
653 self.attributes == other.attributes and
654 self.provider == other.provider)
656 return not self == other
658 return "ObjUrl(%r, %r, %r, %r, %r)" % (
659 self.subject_types, self.provider, self.linkname,
660 self.url, self.attributes)
661
663 """Set of ObjUrls for the record with the given 'id'"""
665 self.id = id
666 self.objurls = objurls
668 return (self.id == other.id and
669 self.objurls == other.objurls)
671 return not self == other
673 return "IdUrlSet(%r, %r)" % (self.id, self.objurls)
674
676 """Results of an 'llink' (LinkOut) search
677
678 Finds links from records in a given database to external
679 resources.
680
681 Fields are:
682 dbfrom -- the database in which search started
683 idurlset -- a list of IdUrlSet, one for each identifier
684 """
685
687 self.dbfrom = dbfrom
688 self.idurlset = idurlset
690 return (self.dbfrom == other.dbfrom and
691 self.idurlset == other.idurlset)
693 return not self == other
695 return "LinksLinkSet(%r, %r)" % (self.dbfrom, self.idurlset)
696