1 """Useful utilities for helping in parsing GenBank files.
2 """
3
4 import string
5
7 """Provide specialized capabilities for cleaning up values in features.
8
9 This class is designed to provide a mechanism to clean up and process
10 values in the key/value pairs of GenBank features. This is useful
11 because in cases like:
12
13 /translation="MED
14 YDPWNLRFQSKYKSRDA"
15
16 you'll end up with a value with \012s and spaces in it like:
17 "MED\012 YDPWEL..."
18
19 which you probably don't want.
20
21 This cleaning needs to be done on a case by case basis since it is
22 impossible to interpret whether you should be concatenating everything
23 (as in translations), or combining things with spaces (as might be
24 the case with /notes).
25 """
26 keys_to_process = ["translation"]
28 """Initialize with the keys we should deal with.
29 """
30 self._to_process = to_process
31
33 """Clean the specified value and return it.
34
35 If the value is not specified to be dealt with, the original value
36 will be returned.
37 """
38 if key_name in self._to_process:
39 try:
40 cleaner = getattr(self, "_clean_%s" % key_name)
41 value = cleaner(value)
42 except AttributeError:
43 raise AssertionError("No function to clean key: %s"
44 % key_name)
45 return value
46
48 """Concatenate a translation value to one long protein string.
49 """
50 translation_parts = value.split()
51 return string.join(translation_parts, '')
52