1
2 try:
3 set = set
4 except NameError:
5 from sets import Set as set
6
7 from Bio import Alphabet
8 from Bio.Alphabet import IUPAC
9 from Bio.Data import IUPACData
10
11 unambiguous_dna_by_name = {}
12 unambiguous_dna_by_id = {}
13 unambiguous_rna_by_name = {}
14 unambiguous_rna_by_id = {}
15 generic_by_name = {}
16 generic_by_id = {}
17 ambiguous_generic_by_name = {}
18 ambiguous_generic_by_id = {}
19
20
21 standard_dna_table = None
22 standard_rna_table = None
23
24
25
26
27
30
107
118
119
131
132
135
138
139
140
143 names = name.split("; ")
144
145 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
146 stop_codons)
147
148 rna_table = {}
149 generic_table = {}
150 for codon, val in table.items():
151 generic_table[codon] = val
152 codon = codon.replace("T", "U")
153 generic_table[codon] = val
154 rna_table[codon] = val
155 rna_start_codons = []
156 generic_start_codons = []
157 for codon in start_codons:
158 generic_start_codons.append(codon)
159 codon = codon.replace("T", "U")
160 generic_start_codons.append(codon)
161 rna_start_codons.append(codon)
162 rna_stop_codons = []
163 generic_stop_codons = []
164 for codon in stop_codons:
165 generic_stop_codons.append(codon)
166 codon = codon.replace("T", "U")
167 generic_stop_codons.append(codon)
168 rna_stop_codons.append(codon)
169
170 generic = NCBICodonTable(id, names + [alt_name], generic_table,
171 generic_start_codons, generic_stop_codons)
172 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table,
173 rna_start_codons, rna_stop_codons)
174
175 if id == 1:
176 global standard_dna_table, standard_rna_table
177 standard_dna_table = dna
178 standard_rna_table = rna
179
180 unambiguous_dna_by_id[id] = dna
181 unambiguous_rna_by_id[id] = rna
182 generic_by_id[id] = generic
183
184 if alt_name is not None:
185 names.append(alt_name)
186
187 for name in names:
188 unambiguous_dna_by_name[name] = dna
189 unambiguous_rna_by_name[name] = rna
190 generic_by_name[name] = generic
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266 register_ncbi_table(name = 'Standard',
267 alt_name = 'SGC0', id = 1,
268 table = {
269 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
270 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
271 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
272 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
273 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
274 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
275 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
276 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
277 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
278 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
279 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
280 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
281 'GGG': 'G', },
282 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
283 start_codons = [ 'TTG', 'CTG', 'ATG', ]
284 )
285 register_ncbi_table(name = 'Vertebrate Mitochondrial',
286 alt_name = 'SGC1', id = 2,
287 table = {
288 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
289 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
290 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
291 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
292 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
293 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
294 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
295 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
296 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V',
297 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A',
298 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
299 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
300 stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ],
301 start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ]
302 )
303 register_ncbi_table(name = 'Yeast Mitochondrial',
304 alt_name = 'SGC2', id = 3,
305 table = {
306 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
307 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
308 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T',
309 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P',
310 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
311 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
312 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
313 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
314 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
315 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
316 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
317 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
318 'GGA': 'G', 'GGG': 'G', },
319 stop_codons = [ 'TAA', 'TAG', ],
320 start_codons = [ 'ATG', ]
321 )
322 register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma',
323 alt_name = 'SGC3', id = 4,
324 table = {
325 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
326 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
327 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
328 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
329 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
330 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
331 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
332 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
333 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
334 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
335 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
336 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
337 'GGA': 'G', 'GGG': 'G', },
338 stop_codons = [ 'TAA', 'TAG', ],
339 start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC',
340 'ATA', 'ATG', 'GTG', ]
341 )
342 register_ncbi_table(name = 'Invertebrate Mitochondrial',
343 alt_name = 'SGC4', id = 5,
344 table = {
345 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
346 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
347 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
348 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
349 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
350 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
351 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
352 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
353 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
354 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
355 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
356 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
357 'GGA': 'G', 'GGG': 'G', },
358 stop_codons = [ 'TAA', 'TAG', ],
359 start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG',
360 'GTG', ]
361 )
362 register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear',
363 alt_name = 'SGC5', id = 6,
364 table = {
365 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
366 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
367 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W',
368 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
369 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
370 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
371 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
372 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
373 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
374 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
375 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
376 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
377 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
378 stop_codons = [ 'TGA', ],
379 start_codons = [ 'ATG', ]
380 )
381 register_ncbi_table(name = 'Echinoderm Mitochondrial',
382 alt_name = 'SGC8', id = 9,
383 table = {
384 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
385 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
386 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
387 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
388 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
389 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
390 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
391 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
392 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
393 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
394 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
395 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
396 'GGA': 'G', 'GGG': 'G', },
397 stop_codons = [ 'TAA', 'TAG', ],
398 start_codons = [ 'ATG', ]
399 )
400 register_ncbi_table(name = 'Euplotid Nuclear',
401 alt_name = 'SGC9', id = 10,
402 table = {
403 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
404 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
405 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L',
406 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
407 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
408 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
409 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
410 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
411 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
412 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
413 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
414 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
415 'GGA': 'G', 'GGG': 'G', },
416 stop_codons = [ 'TAA', 'TAG', ],
417 start_codons = [ 'ATG', ]
418 )
419 register_ncbi_table(name = 'Bacterial',
420 alt_name = None, id = 11,
421 table = {
422 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
423 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
424 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
425 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
426 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
427 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
428 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
429 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
430 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
431 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
432 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
433 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
434 'GGG': 'G', },
435 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
436 start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA',
437 'ATG', 'GTG', ]
438 )
439 register_ncbi_table(name = 'Alternative Yeast Nuclear',
440 alt_name = None, id = 12,
441 table = {
442 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
443 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
444 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
445 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
446 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
447 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
448 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
449 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
450 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
451 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
452 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
453 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
454 'GGG': 'G', },
455 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
456 start_codons = [ 'CTG', 'ATG', ]
457 )
458 register_ncbi_table(name = 'Ascidian Mitochondrial',
459 alt_name = None, id = 13,
460 table = {
461 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
462 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
463 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
464 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
465 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
466 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
467 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
468 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
469 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G',
470 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
471 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
472 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
473 'GGA': 'G', 'GGG': 'G', },
474 stop_codons = [ 'TAA', 'TAG', ],
475 start_codons = [ 'ATG', ]
476 )
477 register_ncbi_table(name = 'Flatworm Mitochondrial',
478 alt_name = None, id = 14,
479 table = {
480 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
481 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
482 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W',
483 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
484 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
485 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
486 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
487 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
488 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
489 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
490 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
491 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
492 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
493 stop_codons = [ 'TAG', ],
494 start_codons = [ 'ATG', ]
495 )
496 register_ncbi_table(name = 'Blepharisma Macronuclear',
497 alt_name = None, id = 15,
498 table = {
499 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
500 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
501 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L',
502 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
503 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
504 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
505 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
506 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
507 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
508 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
509 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
510 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
511 'GGA': 'G', 'GGG': 'G', },
512 stop_codons = [ 'TAA', 'TGA', ],
513 start_codons = [ 'ATG', ]
514 )
515
516
517
519 - def __init__(self, codon_table,
520 ambiguous_nucleotide_alphabet,
521 ambiguous_nucleotide_values,
522 ambiguous_protein_alphabet,
523 ambiguous_protein_values):
524 CodonTable.__init__(self,
525 ambiguous_nucleotide_alphabet,
526 ambiguous_protein_alphabet,
527 AmbiguousForwardTable(codon_table.forward_table,
528 ambiguous_nucleotide_values,
529 ambiguous_protein_values),
530 codon_table.back_table,
531
532
533
534
535 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values),
536 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values)
537 )
538 self._codon_table = codon_table
539
540
541
542
544 return getattr(self._codon_table, name)
545
547 c1, c2, c3 = codon
548 x1 = ambiguous_nucleotide_values[c1]
549 x2 = ambiguous_nucleotide_values[c2]
550 x3 = ambiguous_nucleotide_values[c3]
551 possible = {}
552 stops = []
553 for y1 in x1:
554 for y2 in x2:
555 for y3 in x3:
556 try:
557 possible[forward_table[y1+y2+y3]] = 1
558 except KeyError:
559
560 stops.append(y1+y2+y3)
561 if stops:
562 if possible.keys():
563 raise TranslationError("ambiguous codon '%s' codes " % codon \
564 + "for both proteins and stop codons")
565
566 raise KeyError(codon)
567 return possible.keys()
568
570 """Extends a codon list to include all possible ambigous codons.
571
572 e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
573 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']
574
575 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'.
576 Thus only two more codons are added in the following:
577
578 e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']
579
580 Returns a new (longer) list of codon strings.
581 """
582
583
584
585
586 c1_list = [letter for (letter, meanings) \
587 in ambiguous_nucleotide_values.iteritems() \
588 if set([codon[0] for codon in codons]).issuperset(set(meanings))]
589 c2_list = [letter for (letter, meanings) \
590 in ambiguous_nucleotide_values.iteritems() \
591 if set([codon[1] for codon in codons]).issuperset(set(meanings))]
592 c3_list = [letter for (letter, meanings) \
593 in ambiguous_nucleotide_values.iteritems() \
594 if set([codon[2] for codon in codons]).issuperset(set(meanings))]
595 set2 = set([codon[1] for codon in codons])
596 set3 = set([codon[2] for codon in codons])
597 candidates = set([c1+c2+c3 for c1 in c1_list for c2 in c2_list for c3 in c3_list])
598 candidates.difference_update(codons)
599 answer = codons[:]
600
601 for ambig_codon in candidates :
602 wanted = True
603
604 for codon in [c1+c2+c3 \
605 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] \
606 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] \
607 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]:
608 if codon not in codons :
609
610 wanted=False
611
612 continue
613 if wanted :
614 answer.append(ambig_codon)
615 return answer
616 assert list_ambiguous_codons(['TGA', 'TAA'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA']
617 assert list_ambiguous_codons(['TAG', 'TGA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TGA']
618 assert list_ambiguous_codons(['TAG', 'TAA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR']
619 assert list_ambiguous_codons(['UAG', 'UAA'],IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR']
620 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA']
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
638 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
639 self.forward_table = forward_table
640
641 self.ambiguous_nucleotide = ambiguous_nucleotide
642 self.ambiguous_protein = ambiguous_protein
643
644 inverted = {}
645 for name, val in ambiguous_protein.items():
646 for c in val:
647 x = inverted.get(c, {})
648 x[name] = 1
649 inverted[c] = x
650 for name, val in inverted.items():
651 inverted[name] = val.keys()
652 self._inverted = inverted
653
654 self._cache = {}
655
656 - def get(self, codon, failobj = None):
657 try:
658 return self.__getitem__(codon)
659 except KeyError:
660 return failobj
661
663 try:
664 x = self._cache[codon]
665 except KeyError:
666 pass
667 else:
668 if x is TranslationError:
669 raise TranslationError(codon)
670 if x is KeyError:
671 raise KeyError(codon)
672 return x
673 try:
674 x = self.forward_table[codon]
675 self._cache[codon] = x
676 return x
677 except KeyError:
678 pass
679
680
681
682 try:
683 possible = list_possible_proteins(codon,
684 self.forward_table,
685 self.ambiguous_nucleotide)
686 except KeyError:
687 self._cache[codon] = KeyError
688 raise KeyError(codon)
689 except TranslationError:
690 self._cache[codon] = TranslationError
691 raise TranslationError(codon)
692 assert len(possible) > 0, "unambiguous codons must code"
693
694
695 if len(possible) == 1:
696 self._cache[codon] = possible[0]
697 return possible[0]
698
699
700
701 ambiguous_possible = {}
702 for amino in possible:
703 for term in self._inverted[amino]:
704 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1
705
706 n = len(possible)
707 possible = []
708 for amino, val in ambiguous_possible.items():
709 if val == n:
710 possible.append(amino)
711
712
713 if len(possible) == 0:
714 self._cache[codon] = TranslationError
715 raise TranslationError(codon)
716
717
718
719
720 def _sort(x, y, table = self.ambiguous_protein):
721 a = cmp(len(table[x]), len(table[y]))
722 if a == 0:
723 return cmp(x, y)
724 return a
725 possible.sort(_sort)
726
727 x = possible[0]
728 self._cache[codon] = x
729 return x
730
731
732 ambiguous_dna_by_name = {}
733 for key, val in unambiguous_dna_by_name.items():
734 ambiguous_dna_by_name[key] = AmbiguousCodonTable(val,
735 IUPAC.ambiguous_dna,
736 IUPACData.ambiguous_dna_values,
737 IUPAC.extended_protein,
738 IUPACData.extended_protein_values)
739 ambiguous_dna_by_id = {}
740 for key, val in unambiguous_dna_by_id.items():
741 ambiguous_dna_by_id[key] = AmbiguousCodonTable(val,
742 IUPAC.ambiguous_dna,
743 IUPACData.ambiguous_dna_values,
744 IUPAC.extended_protein,
745 IUPACData.extended_protein_values)
746
747 ambiguous_rna_by_name = {}
748 for key, val in unambiguous_rna_by_name.items():
749 ambiguous_rna_by_name[key] = AmbiguousCodonTable(val,
750 IUPAC.ambiguous_rna,
751 IUPACData.ambiguous_rna_values,
752 IUPAC.extended_protein,
753 IUPACData.extended_protein_values)
754 ambiguous_rna_by_id = {}
755 for key, val in unambiguous_rna_by_id.items():
756 ambiguous_rna_by_id[key] = AmbiguousCodonTable(val,
757 IUPAC.ambiguous_rna,
758 IUPACData.ambiguous_rna_values,
759 IUPAC.extended_protein,
760 IUPACData.extended_protein_values)
761
762
763 _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems())
764 _merged_values["T"] = "U"
765
766 for key, val in generic_by_name.items():
767 ambiguous_generic_by_name[key] = AmbiguousCodonTable(val,
768 Alphabet.NucleotideAlphabet(),
769 _merged_values,
770 IUPAC.extended_protein,
771 IUPACData.extended_protein_values)
772
773 for key, val in generic_by_id.items():
774 ambiguous_generic_by_id[key] = AmbiguousCodonTable(val,
775 Alphabet.NucleotideAlphabet(),
776 _merged_values,
777 IUPAC.extended_protein,
778 IUPACData.extended_protein_values)
779 del _merged_values
780 del key, val
781
782
783 for n in ambiguous_generic_by_id.keys() :
784 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V"
785 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V"
786 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X"
787
788 if "UAA" in unambiguous_rna_by_id[n].stop_codons \
789 and "UGA" in unambiguous_rna_by_id[n].stop_codons :
790 try :
791 print ambiguous_dna_by_id[n].forward_table["TRA"]
792 assert False, "Should be a stop only"
793 except KeyError :
794 pass
795 assert "URA" in ambiguous_generic_by_id[n].stop_codons
796 assert "URA" in ambiguous_rna_by_id[n].stop_codons
797 assert "TRA" in ambiguous_generic_by_id[n].stop_codons
798 assert "TRA" in ambiguous_dna_by_id[n].stop_codons
799 del n
800 assert ambiguous_generic_by_id[1].stop_codons == ambiguous_generic_by_name["Standard"].stop_codons
801 assert ambiguous_generic_by_id[4].stop_codons == ambiguous_generic_by_name["SGC3"].stop_codons
802 assert ambiguous_generic_by_id[15].stop_codons == ambiguous_generic_by_name['Blepharisma Macronuclear'].stop_codons
803