1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """ FeatureSet module
17
18 Provides:
19
20 o FeatureSet - container for Feature objects
21
22 For drawing capabilities, this module uses reportlab to draw and write
23 the diagram:
24
25 http://www.reportlab.com
26
27 For dealing with biological information, the package expects BioPython
28 objects:
29
30 http://www.biopython.org
31 """
32
33
34
35
36
37 from reportlab.pdfbase import _fontdata
38 from reportlab.lib import colors
39
40
41 from _Feature import Feature
42
43
44 import re
45
46
47
48
49
50
51
53 """ FeatureSet
54
55 Provides:
56
57 Methods:
58
59 o __init__(self, set_id=None, name=None) Called on instantiation
60
61 o add_feature(self, feature, color=colors.lightgreen) Add a Feature
62 object to the set
63
64 o del_feature(self, feature_id) Remove a feature from the set, by id
65
66 o set_all_features(self, attr, value) Set the passed attribute to the
67 passed value in all features in the set
68
69 o get_features(self) Returns a list of Features from the set
70
71 o get_ids(self) Returns a list of unique ids for features in the set
72
73 o range(self) Returns the range of bases covered by features in
74 the set
75
76 o to_string(self, verbose=0) Returns a string describing the set
77
78 o __len__(self) Returns the length of sequence covered by the set
79
80 o __getitem__(self, key) Returns a feature from the set, keyed by id
81
82 o __str__(self) Returns a string describing the set
83
84 Attributes:
85
86 o id Unique id for the set
87
88 o name String describing the set
89 """
90 - def __init__(self, set_id=None, name=None, parent=None):
91 """ __init__(self, set_id=None, name=None)
92
93 o set_id Unique id for the set
94
95 o name String identifying the feature set
96 """
97 self.parent = parent
98 self.id = id
99 self.next_id = 0
100 self.features = {}
101 self.name = name
102
103
105 """ add_feature(self, feature, **args)
106
107 o feature Bio.SeqFeature object
108
109 o **kwargs Keyword arguments for Feature. Named attributes
110 of the Feature
111
112
113 Add a Bio.SeqFeature object to the diagram (will be stored
114 internally in a Feature wrapper
115 """
116 id = self.next_id
117 self.features[id] = Feature(self, id, feature)
118 for key in kwargs:
119 if key == "colour" or key == "color" :
120
121
122
123
124
125 self.features[id].set_color(kwargs[key])
126 continue
127 setattr(self.features[id], key, kwargs[key])
128 self.next_id += 1
129
130
132 """ del_feature(self, feature_id)
133
134 o feature_id Unique id of the feature to delete
135
136 Remove a feature from the set, indicated by its id
137 """
138 del self.features[feature_id]
139
140
142 """ set_all_features(self, attr, value)
143
144 o attr An attribute of the Feature class
145
146 o value The value to set that attribute
147
148 Set the passed attribute of all features in the set to the
149 passed value
150 """
151 changed = 0
152 for feature in self.features.values():
153
154 if hasattr(feature, attr):
155 if getattr(feature, attr) != value:
156 setattr(feature, attr, value)
157
158
159
160
161
162
163
164 - def get_features(self, attribute=None, value=None, comparator=None):
165 """ get_features(self, attribute=None, value=None, comparator=None) ->
166 [Feature, Feature, ...]
167
168 o attribute String, attribute of a Feature object
169
170 o value The value desired of the attribute
171
172 o comparator String, how to compare the Feature attribute to the
173 passed value
174
175 If no attribute or value is given, return a list of all features in the
176 feature set. If both an attribute and value are given, then depending
177 on the comparator, then a list of all features in the FeatureSet
178 matching (or not) the passed value will be returned. Allowed comparators
179 are: 'startswith', 'not', 'like'.
180
181 The user is expected to make a responsible decision about which feature
182 attributes to use with which passed values and comparator settings.
183 """
184
185 if attribute is None or value is None:
186 return self.features.values()
187
188
189 if comparator is None:
190 return [feature for feature in self.features.values() if\
191 getattr(feature, attribute) == value]
192
193
194 elif comparator == 'not':
195 return [feature for feature in self.features.values() if\
196 getattr(feature, attribute) != value]
197
198
199 elif comparator == 'startswith':
200 return [feature for feature in self.features.values() if\
201 getattr(feature, attribute).startswith(value)]
202
203
204 elif comparator == 'like':
205 return [feature for feature in self.features.values() if\
206 re.search(value, getattr(feature, attribute))]
207
208 return []
209
210
211
213 """ get_ids(self) -> [int, int, ...]
214
215 Return a list of all ids for the feature set
216 """
217 return self.features.keys()
218
219
221 """ range(self)
222
223 Returns the lowest and highest base (or mark) numbers as a tuple
224 """
225 lows, highs = [], []
226 for feature in self.features.values():
227 for start, end in feature.locations:
228 lows.append(start)
229 highs.append(end)
230 if len(lows) != 0 and len(highs) != 0:
231 return (min(lows), max(highs))
232 return 0, 0
233
234
236 """ to_string(self, verbose=0) -> ""
237
238 o verbose Boolean indicating whether a short or complete
239 account of the set is required
240
241 Returns a formatted string with information about the set
242 """
243 if not verbose:
244 return "%s" % self
245 else:
246 outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
247 outstr.append("%d features" % len(self.features))
248 for key in self.features:
249 outstr.append("feature: %s" % self.features[key])
250 return "\n".join(outstr)
251
253 """ __len__(self) -> int
254
255 Return the number of features in the set
256 """
257 return len(self.features)
258
259
261 """ __getitem__(self, key) -> Feature
262
263 Return a feature, keyed by id
264 """
265 return self.features[key]
266
267
269 """ __str__(self) -> ""
270
271 Returns a formatted string with information about the feature set
272 """
273 outstr = ["\n<%s: %s %d features>" % (self.__class__, self.name,
274 len(self.features))]
275 return "\n".join(outstr)
276
277
278
279
280
281 if __name__ == '__main__':
282
283 from Bio import GenBank
284 from Bio.SeqFeature import SeqFeature
285
286 parser = GenBank.FeatureParser()
287 fhandle = open('/Users/lpritc/Documents/Genomes/Bacteria/Nanoarchaeum_equitans/NC_005213.gbk', 'r')
288 genbank_entry = parser.parse(fhandle)
289 fhandle.close()
290
291
292 gdfs = FeatureSet(0, 'Nanoarchaeum equitans CDS')
293 for feature in genbank_entry.features:
294 if feature.type == 'CDS':
295 gdfs.add_feature(feature)
296
297
298
299
300
301
302
303
304
305