1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex :
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82 import itertools
83
84
85 try:
86 set = set
87 except NameError:
88 from sets import Set as set
89
90 from Bio.Seq import Seq, MutableSeq
91 from Bio.Alphabet import IUPAC
92
93 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict,\
94 typedict, suppliers as suppliers_dict
95 from Bio.Restriction.RanaConfig import *
96 from Bio.Restriction.PrintFormat import PrintFormat
97 from Bio.Restriction.DNAUtils import check_bases
98
99
100
101 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
102 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
103 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
104 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
105 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
106 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
107
108 DNA = Seq
109
207
208
210 """RestrictionType. Type from which derives all enzyme classes.
211
212 Implement the operator methods."""
213
214 - def __init__(cls, name='', bases=(), dct={}) :
215 """RE(name, bases, dct) -> RestrictionType instance.
216
217 Not intended to be used in normal operation. The enzymes are
218 instantiated when importing the module.
219
220 see below."""
221 super(RestrictionType, cls).__init__(cls, name, bases, dct)
222 cls.compsite = re.compile(cls.compsite)
223
225 """RE.__add__(other) -> RestrictionBatch().
226
227 if other is an enzyme returns a batch of the two enzymes.
228 if other is already a RestrictionBatch add enzyme to it."""
229 if isinstance(other, RestrictionType) :
230 return RestrictionBatch([cls, other])
231 elif isinstance(other, RestrictionBatch) :
232 return other.add_nocheck(cls)
233 else :
234 raise TypeError
235
237 """RE.__div__(other) -> list.
238
239 RE/other
240 returns RE.search(other)."""
241 return cls.search(other)
242
244 """RE.__rdiv__(other) -> list.
245
246 other/RE
247 returns RE.search(other)."""
248 return cls.search(other)
249
251 """RE.__truediv__(other) -> list.
252
253 RE/other
254 returns RE.search(other)."""
255 return cls.search(other)
256
258 """RE.__rtruediv__(other) -> list.
259
260 other/RE
261 returns RE.search(other)."""
262 return cls.search(other)
263
265 """RE.__floordiv__(other) -> list.
266
267 RE//other
268 returns RE.catalyse(other)."""
269 return cls.catalyse(other)
270
272 """RE.__rfloordiv__(other) -> list.
273
274 other//RE
275 returns RE.catalyse(other)."""
276 return cls.catalyse(other)
277
279 """RE.__str__() -> str.
280
281 return the name of the enzyme."""
282 return cls.__name__
283
285 """RE.__repr__() -> str.
286
287 used with eval or exec will instantiate the enzyme."""
288 return "%s" % cls.__name__
289
291 """RE.__len__() -> int.
292
293 length of the recognition site."""
294 return cls.size
295
297 """RE == other -> bool
298
299 True if RE and other are the same enzyme."""
300 return other is cls
301
303 """RE != other -> bool.
304 isoschizomer strict, same recognition site, same restriction -> False
305 all the other-> True"""
306 if not isinstance(other, RestrictionType) :
307 return True
308 elif cls.charac == other.charac :
309 return False
310 else :
311 return True
312
314 """RE >> other -> bool.
315
316 neoschizomer : same recognition site, different restriction. -> True
317 all the others : -> False"""
318 if not isinstance(other, RestrictionType) :
319 return False
320 elif cls.site == other.site and cls.charac != other.charac :
321 return True
322 else :
323 return False
324
326 """a % b -> bool.
327
328 Test compatibility of the overhang of a and b.
329 True if a and b have compatible overhang."""
330 if not isinstance(other, RestrictionType) :
331 raise TypeError( \
332 'expected RestrictionType, got %s instead' % type(other))
333 return cls._mod1(other)
334
336 """a >= b -> bool.
337
338 a is greater or equal than b if the a site is longer than b site.
339 if their site have the same length sort by alphabetical order of their
340 names."""
341 if not isinstance(other, RestrictionType) :
342 raise NotImplementedError
343 if len(cls) > len(other) :
344 return True
345 elif cls.size == len(other) and cls.__name__ >= other.__name__ :
346 return True
347 else :
348 return False
349
351 """a > b -> bool.
352
353 sorting order :
354 1. size of the recognition site.
355 2. if equal size, alphabetical order of the names."""
356 if not isinstance(other, RestrictionType) :
357 raise NotImplementedError
358 if len(cls) > len(other) :
359 return True
360 elif cls.size == len(other) and cls.__name__ > other.__name__ :
361 return True
362 else :
363 return False
364
366 """a <= b -> bool.
367
368 sorting order :
369 1. size of the recognition site.
370 2. if equal size, alphabetical order of the names."""
371 if not isinstance(other, RestrictionType) :
372 raise NotImplementedError
373 elif len(cls) < len(other) :
374 return True
375 elif len(cls) == len(other) and cls.__name__ <= other.__name__ :
376 return True
377 else :
378 return False
379
381 """a < b -> bool.
382
383 sorting order :
384 1. size of the recognition site.
385 2. if equal size, alphabetical order of the names."""
386 if not isinstance(other, RestrictionType) :
387 raise NotImplementedError
388 elif len(cls) < len(other) :
389 return True
390 elif len(cls) == len(other) and cls.__name__ < other.__name__ :
391 return True
392 else :
393 return False
394
395
397 """Implement the methods that are common to all restriction enzymes.
398
399 All the methods are classmethod.
400
401 For internal use only. Not meant to be instantiate."""
402
404 """RE.search(dna, linear=True) -> list.
405
406 return a list of all the site of RE in dna. Compensate for circular
407 sequences and so on.
408
409 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
410
411 if linear is False, the restriction sites than span over the boundaries
412 will be included.
413
414 The positions are the first base of the 3' fragment,
415 i.e. the first base after the position the enzyme will cut. """
416
417
418
419
420
421
422
423 if isinstance(dna, FormattedSeq) :
424 cls.dna = dna
425 return cls._search()
426 else :
427 cls.dna = FormattedSeq(dna, linear)
428 return cls._search()
429 search = classmethod(search)
430
432 """RE.all_suppliers -> print all the suppliers of R"""
433 supply = [x[0] for x in suppliers_dict.itervalues()]
434 supply.sort()
435 print ",\n".join(supply)
436 return
437 all_suppliers = classmethod(all_suppliers)
438
440 """RE.is_equischizomers(other) -> bool.
441
442 True if other is an isoschizomer of RE.
443 False else.
444
445 equischizomer <=> same site, same position of restriction."""
446 return not self != other
447 is_equischizomer = classmethod(is_equischizomer)
448
450 """RE.is_neoschizomers(other) -> bool.
451
452 True if other is an isoschizomer of RE.
453 False else.
454
455 neoschizomer <=> same site, different position of restriction."""
456 return self >> other
457 is_neoschizomer = classmethod(is_neoschizomer)
458
460 """RE.is_isoschizomers(other) -> bool.
461
462 True if other is an isoschizomer of RE.
463 False else.
464
465 isoschizomer <=> same site."""
466 return (not self != other) or self >> other
467 is_isoschizomer = classmethod(is_isoschizomer)
468
470 """RE.equischizomers([batch]) -> list.
471
472 return a tuple of all the isoschizomers of RE.
473 if batch is supplied it is used instead of the default AllEnzymes.
474
475 equischizomer <=> same site, same position of restriction."""
476 if not batch : batch = AllEnzymes
477 r = [x for x in batch if not self != x]
478 i = r.index(self)
479 del r[i]
480 r.sort()
481 return r
482 equischizomers = classmethod(equischizomers)
483
485 """RE.neoschizomers([batch]) -> list.
486
487 return a tuple of all the neoschizomers of RE.
488 if batch is supplied it is used instead of the default AllEnzymes.
489
490 neoschizomer <=> same site, different position of restriction."""
491 if not batch : batch = AllEnzymes
492 r = [x for x in batch if self >> x]
493 r.sort()
494 return r
495 neoschizomers = classmethod(neoschizomers)
496
498 """RE.isoschizomers([batch]) -> list.
499
500 return a tuple of all the equischizomers and neoschizomers of RE.
501 if batch is supplied it is used instead of the default AllEnzymes."""
502 if not batch : batch = AllEnzymes
503 r = [x for x in batch if (self >> x) or (not self != x)]
504 i = r.index(self)
505 del r[i]
506 r.sort()
507 return r
508 isoschizomers = classmethod(isoschizomers)
509
511 """RE.frequency() -> int.
512
513 frequency of the site."""
514 return self.freq
515 frequency = classmethod(frequency)
516
517
518 -class NoCut(AbstractCut) :
519 """Implement the methods specific to the enzymes that do not cut.
520
521 These enzymes are generally enzymes that have been only partially
522 characterised and the way they cut the DNA is unknow or enzymes for
523 which the pattern of cut is to complex to be recorded in Rebase
524 (ncuts values of 0 in emboss_e.###).
525
526 When using search() with these enzymes the values returned are at the start of
527 the restriction site.
528
529 Their catalyse() method returns a TypeError.
530
531 Unknown and NotDefined are also part of the base classes of these enzymes.
532
533 Internal use only. Not meant to be instantiated."""
534
536 """RE.cut_once() -> bool.
537
538 True if the enzyme cut the sequence one time on each strand."""
539 return False
540 cut_once = classmethod(cut_once)
541
543 """RE.cut_twice() -> bool.
544
545 True if the enzyme cut the sequence twice on each strand."""
546 return False
547 cut_twice = classmethod(cut_twice)
548
550 """RE._modify(location) -> int.
551
552 for internal use only.
553
554 location is an integer corresponding to the location of the match for
555 the enzyme pattern in the sequence.
556 _modify returns the real place where the enzyme will cut.
557
558 example :
559 EcoRI pattern : GAATTC
560 EcoRI will cut after the G.
561 so in the sequence :
562 ______
563 GAATACACGGAATTCGA
564 |
565 10
566 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
567 EcoRI cut after the G so :
568 EcoRI._modify(10) -> 11.
569
570 if the enzyme cut twice _modify will returns two integer corresponding
571 to each cutting site.
572 """
573 yield location
574 _modify = classmethod(_modify)
575
577 """RE._rev_modify(location) -> generator of int.
578
579 for internal use only.
580
581 as _modify for site situated on the antiparallel strand when the
582 enzyme is not palindromic
583 """
584 yield location
585 _rev_modify = classmethod(_rev_modify)
586
588 """RE.characteristic() -> tuple.
589
590 the tuple contains the attributes :
591 fst5 -> first 5' cut ((current strand) or None
592 fst3 -> first 3' cut (complementary strand) or None
593 scd5 -> second 5' cut (current strand) or None
594 scd5 -> second 3' cut (complementary strand) or None
595 site -> recognition site."""
596 return None, None, None, None, self.site
597 characteristic = classmethod(characteristic)
598
600 """Implement the methods specific to the enzymes that cut the DNA only once
601
602 Correspond to ncuts values of 2 in emboss_e.###
603
604 Internal use only. Not meant to be instantiated."""
605
607 """RE.cut_once() -> bool.
608
609 True if the enzyme cut the sequence one time on each strand."""
610 return True
611 cut_once = classmethod(cut_once)
612
614 """RE.cut_twice() -> bool.
615
616 True if the enzyme cut the sequence twice on each strand."""
617 return False
618 cut_twice = classmethod(cut_twice)
619
621 """RE._modify(location) -> int.
622
623 for internal use only.
624
625 location is an integer corresponding to the location of the match for
626 the enzyme pattern in the sequence.
627 _modify returns the real place where the enzyme will cut.
628
629 example :
630 EcoRI pattern : GAATTC
631 EcoRI will cut after the G.
632 so in the sequence :
633 ______
634 GAATACACGGAATTCGA
635 |
636 10
637 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
638 EcoRI cut after the G so :
639 EcoRI._modify(10) -> 11.
640
641 if the enzyme cut twice _modify will returns two integer corresponding
642 to each cutting site.
643 """
644 yield location + self.fst5
645 _modify = classmethod(_modify)
646
648 """RE._rev_modify(location) -> generator of int.
649
650 for internal use only.
651
652 as _modify for site situated on the antiparallel strand when the
653 enzyme is not palindromic
654 """
655 yield location - self.fst3
656 _rev_modify = classmethod(_rev_modify)
657
659 """RE.characteristic() -> tuple.
660
661 the tuple contains the attributes :
662 fst5 -> first 5' cut ((current strand) or None
663 fst3 -> first 3' cut (complementary strand) or None
664 scd5 -> second 5' cut (current strand) or None
665 scd5 -> second 3' cut (complementary strand) or None
666 site -> recognition site."""
667 return self.fst5, self.fst3, None, None, self.site
668 characteristic = classmethod(characteristic)
669
670
672 """Implement the methods specific to the enzymes that cut the DNA twice
673
674 Correspond to ncuts values of 4 in emboss_e.###
675
676 Internal use only. Not meant to be instantiated."""
677
679 """RE.cut_once() -> bool.
680
681 True if the enzyme cut the sequence one time on each strand."""
682 return False
683 cut_once = classmethod(cut_once)
684
686 """RE.cut_twice() -> bool.
687
688 True if the enzyme cut the sequence twice on each strand."""
689 return True
690 cut_twice = classmethod(cut_twice)
691
693 """RE._modify(location) -> int.
694
695 for internal use only.
696
697 location is an integer corresponding to the location of the match for
698 the enzyme pattern in the sequence.
699 _modify returns the real place where the enzyme will cut.
700
701 example :
702 EcoRI pattern : GAATTC
703 EcoRI will cut after the G.
704 so in the sequence :
705 ______
706 GAATACACGGAATTCGA
707 |
708 10
709 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
710 EcoRI cut after the G so :
711 EcoRI._modify(10) -> 11.
712
713 if the enzyme cut twice _modify will returns two integer corresponding
714 to each cutting site.
715 """
716 yield location + self.fst5
717 yield location + self.scd5
718 _modify = classmethod(_modify)
719
721 """RE._rev_modify(location) -> generator of int.
722
723 for internal use only.
724
725 as _modify for site situated on the antiparallel strand when the
726 enzyme is not palindromic
727 """
728 yield location - self.fst3
729 yield location - self.scd3
730 _rev_modify = classmethod(_rev_modify)
731
733 """RE.characteristic() -> tuple.
734
735 the tuple contains the attributes :
736 fst5 -> first 5' cut ((current strand) or None
737 fst3 -> first 3' cut (complementary strand) or None
738 scd5 -> second 5' cut (current strand) or None
739 scd5 -> second 3' cut (complementary strand) or None
740 site -> recognition site."""
741 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
742 characteristic = classmethod(characteristic)
743
744
746 """Implement the information about methylation.
747
748 Enzymes of this class possess a site which is methylable."""
749
751 """RE.is_methylable() -> bool.
752
753 True if the recognition site is a methylable."""
754 return True
755 is_methylable = classmethod(is_methylable)
756
758 """Implement informations about methylation sensitibility.
759
760 Enzymes of this class are not sensible to methylation."""
761
763 """RE.is_methylable() -> bool.
764
765 True if the recognition site is a methylable."""
766 return False
767 is_methylable = classmethod(is_methylable)
768
770 """Implement the methods specific to the enzymes which are palindromic
771
772 palindromic means : the recognition site and its reverse complement are
773 identical.
774 Remarks : an enzyme with a site CGNNCG is palindromic even if some
775 of the sites that it will recognise are not.
776 for example here : CGAACG
777
778 Internal use only. Not meant to be instantiated."""
779
781 """RE._search() -> list.
782
783 for internal use only.
784
785 implement the search method for palindromic and non palindromic enzyme.
786 """
787 siteloc = self.dna.finditer(self.compsite,self.size)
788 self.results = [r for s,g in siteloc for r in self._modify(s)]
789 if self.results : self._drop()
790 return self.results
791 _search = classmethod(_search)
792
794 """RE.is_palindromic() -> bool.
795
796 True if the recognition site is a palindrom."""
797 return True
798 is_palindromic = classmethod(is_palindromic)
799
800
802 """Implement the methods specific to the enzymes which are not palindromic
803
804 palindromic means : the recognition site and its reverse complement are
805 identical.
806
807 Internal use only. Not meant to be instantiated."""
808
810 """RE._search() -> list.
811
812 for internal use only.
813
814 implement the search method for palindromic and non palindromic enzyme.
815 """
816 iterator = self.dna.finditer(self.compsite, self.size)
817 self.results = []
818 modif = self._modify
819 revmodif = self._rev_modify
820 s = str(self)
821 self.on_minus = []
822 for start, group in iterator :
823 if group(s) :
824 self.results += [r for r in modif(start)]
825 else :
826 self.on_minus += [r for r in revmodif(start)]
827 self.results += self.on_minus
828 if self.results :
829 self.results.sort()
830 self._drop()
831 return self.results
832 _search = classmethod(_search)
833
835 """RE.is_palindromic() -> bool.
836
837 True if the recognition site is a palindrom."""
838 return False
839 is_palindromic = classmethod(is_palindromic)
840
842 """Implement the methods specific to the enzymes for which the overhang
843 is unknown.
844
845 These enzymes are also NotDefined and NoCut.
846
847 Internal use only. Not meant to be instantiated."""
848
850 """RE.catalyse(dna, linear=True) -> tuple of DNA.
851 RE.catalyze(dna, linear=True) -> tuple of DNA.
852
853 return a tuple of dna as will be produced by using RE to restrict the
854 dna.
855
856 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
857
858 if linear is False, the sequence is considered to be circular and the
859 output will be modified accordingly."""
860 raise NotImplementedError('%s restriction is unknown.' \
861 % self.__name__)
862 catalyze = catalyse = classmethod(catalyse)
863
865 """RE.is_blunt() -> bool.
866
867 True if the enzyme produces blunt end.
868
869 see also :
870 RE.is_3overhang()
871 RE.is_5overhang()
872 RE.is_unknown()"""
873 return False
874 is_blunt = classmethod(is_blunt)
875
877 """RE.is_5overhang() -> bool.
878
879 True if the enzyme produces 5' overhang sticky end.
880
881 see also :
882 RE.is_3overhang()
883 RE.is_blunt()
884 RE.is_unknown()"""
885 return False
886 is_5overhang = classmethod(is_5overhang)
887
889 """RE.is_3overhang() -> bool.
890
891 True if the enzyme produces 3' overhang sticky end.
892
893 see also :
894 RE.is_5overhang()
895 RE.is_blunt()
896 RE.is_unknown()"""
897 return False
898 is_3overhang = classmethod(is_3overhang)
899
901 """RE.overhang() -> str. type of overhang of the enzyme.,
902
903 can be "3' overhang", "5' overhang", "blunt", "unknown" """
904 return 'unknown'
905 overhang = classmethod(overhang)
906
908 """RE.compatible_end() -> list.
909
910 list of all the enzymes that share compatible end with RE."""
911 return []
912 compatible_end = classmethod(compatible_end)
913
914 - def _mod1(self, other) :
915 """RE._mod1(other) -> bool.
916
917 for internal use only
918
919 test for the compatibility of restriction ending of RE and other."""
920 return False
921 _mod1 = classmethod(_mod1)
922
923 -class Blunt(AbstractCut) :
924 """Implement the methods specific to the enzymes for which the overhang
925 is blunt.
926
927 The enzyme cuts the + strand and the - strand of the DNA at the same
928 place.
929
930 Internal use only. Not meant to be instantiated."""
931
933 """RE.catalyse(dna, linear=True) -> tuple of DNA.
934 RE.catalyze(dna, linear=True) -> tuple of DNA.
935
936 return a tuple of dna as will be produced by using RE to restrict the
937 dna.
938
939 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
940
941 if linear is False, the sequence is considered to be circular and the
942 output will be modified accordingly."""
943 r = self.search(dna, linear)
944 d = self.dna
945 if not r : return d[1:],
946 fragments = []
947 length = len(r)-1
948 if d.is_linear() :
949
950
951
952 fragments.append(d[1:r[0]])
953 if length :
954
955
956
957 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
958
959
960
961 fragments.append(d[r[-1]:])
962 else :
963
964
965
966 fragments.append(d[r[-1]:]+d[1:r[0]])
967 if not length:
968
969
970
971 return tuple(fragments)
972
973
974
975 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
976 return tuple(fragments)
977 catalyze = catalyse = classmethod(catalyse)
978
980 """RE.is_blunt() -> bool.
981
982 True if the enzyme produces blunt end.
983
984 see also :
985 RE.is_3overhang()
986 RE.is_5overhang()
987 RE.is_unknown()"""
988 return True
989 is_blunt = classmethod(is_blunt)
990
992 """RE.is_5overhang() -> bool.
993
994 True if the enzyme produces 5' overhang sticky end.
995
996 see also :
997 RE.is_3overhang()
998 RE.is_blunt()
999 RE.is_unknown()"""
1000 return False
1001 is_5overhang = classmethod(is_5overhang)
1002
1004 """RE.is_3overhang() -> bool.
1005
1006 True if the enzyme produces 3' overhang sticky end.
1007
1008 see also :
1009 RE.is_5overhang()
1010 RE.is_blunt()
1011 RE.is_unknown()"""
1012 return False
1013 is_3overhang = classmethod(is_3overhang)
1014
1016 """RE.overhang() -> str. type of overhang of the enzyme.,
1017
1018 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1019 return 'blunt'
1020 overhang = classmethod(overhang)
1021
1023 """RE.compatible_end() -> list.
1024
1025 list of all the enzymes that share compatible end with RE."""
1026 if not batch : batch = AllEnzymes
1027 r = [x for x in iter(AllEnzymes) if x.is_blunt()]
1028 r.sort()
1029 return r
1030 compatible_end = classmethod(compatible_end)
1031
1033 """RE._mod1(other) -> bool.
1034
1035 for internal use only
1036
1037 test for the compatibility of restriction ending of RE and other."""
1038 if issubclass(other, Blunt) : return True
1039 else : return False
1040 _mod1 = staticmethod(_mod1)
1041
1042 -class Ov5(AbstractCut) :
1043 """Implement the methods specific to the enzymes for which the overhang
1044 is recessed in 3'.
1045
1046 The enzyme cuts the + strand after the - strand of the DNA.
1047
1048 Internal use only. Not meant to be instantiated."""
1049
1051 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1052 RE.catalyze(dna, linear=True) -> tuple of DNA.
1053
1054 return a tuple of dna as will be produced by using RE to restrict the
1055 dna.
1056
1057 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1058
1059 if linear is False, the sequence is considered to be circular and the
1060 output will be modified accordingly."""
1061 r = self.search(dna, linear)
1062 d = self.dna
1063 if not r : return d[1:],
1064 length = len(r)-1
1065 fragments = []
1066 if d.is_linear() :
1067
1068
1069
1070 fragments.append(d[1:r[0]])
1071 if length :
1072
1073
1074
1075 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1076
1077
1078
1079 fragments.append(d[r[-1]:])
1080 else :
1081
1082
1083
1084 fragments.append(d[r[-1]:]+d[1:r[0]])
1085 if not length:
1086
1087
1088
1089 return tuple(fragments)
1090
1091
1092
1093 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1094 return tuple(fragments)
1095 catalyze = catalyse = classmethod(catalyse)
1096
1098 """RE.is_blunt() -> bool.
1099
1100 True if the enzyme produces blunt end.
1101
1102 see also :
1103 RE.is_3overhang()
1104 RE.is_5overhang()
1105 RE.is_unknown()"""
1106 return False
1107 is_blunt = classmethod(is_blunt)
1108
1110 """RE.is_5overhang() -> bool.
1111
1112 True if the enzyme produces 5' overhang sticky end.
1113
1114 see also :
1115 RE.is_3overhang()
1116 RE.is_blunt()
1117 RE.is_unknown()"""
1118 return True
1119 is_5overhang = classmethod(is_5overhang)
1120
1122 """RE.is_3overhang() -> bool.
1123
1124 True if the enzyme produces 3' overhang sticky end.
1125
1126 see also :
1127 RE.is_5overhang()
1128 RE.is_blunt()
1129 RE.is_unknown()"""
1130 return False
1131 is_3overhang = classmethod(is_3overhang)
1132
1134 """RE.overhang() -> str. type of overhang of the enzyme.,
1135
1136 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1137 return "5' overhang"
1138 overhang = classmethod(overhang)
1139
1141 """RE.compatible_end() -> list.
1142
1143 list of all the enzymes that share compatible end with RE."""
1144 if not batch : batch = AllEnzymes
1145 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self]
1146 r.sort()
1147 return r
1148 compatible_end = classmethod(compatible_end)
1149
1150 - def _mod1(self, other) :
1151 """RE._mod1(other) -> bool.
1152
1153 for internal use only
1154
1155 test for the compatibility of restriction ending of RE and other."""
1156 if issubclass(other, Ov5) : return self._mod2(other)
1157 else : return False
1158 _mod1 = classmethod(_mod1)
1159
1160
1161 -class Ov3(AbstractCut) :
1162 """Implement the methods specific to the enzymes for which the overhang
1163 is recessed in 5'.
1164
1165 The enzyme cuts the - strand after the + strand of the DNA.
1166
1167 Internal use only. Not meant to be instantiated."""
1168
1170 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1171 RE.catalyze(dna, linear=True) -> tuple of DNA.
1172
1173 return a tuple of dna as will be produced by using RE to restrict the
1174 dna.
1175
1176 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1177
1178 if linear is False, the sequence is considered to be circular and the
1179 output will be modified accordingly."""
1180 r = self.search(dna, linear)
1181 d = self.dna
1182 if not r : return d[1:],
1183 fragments = []
1184 length = len(r)-1
1185 if d.is_linear() :
1186
1187
1188
1189 fragments.append(d[1:r[0]])
1190 if length :
1191
1192
1193
1194 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1195
1196
1197
1198 fragments.append(d[r[-1]:])
1199 else :
1200
1201
1202
1203 fragments.append(d[r[-1]:]+d[1:r[0]])
1204 if not length:
1205
1206
1207
1208 return tuple(fragments)
1209
1210
1211
1212 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1213 return tuple(fragments)
1214 catalyze = catalyse = classmethod(catalyse)
1215
1217 """RE.is_blunt() -> bool.
1218
1219 True if the enzyme produces blunt end.
1220
1221 see also :
1222 RE.is_3overhang()
1223 RE.is_5overhang()
1224 RE.is_unknown()"""
1225 return False
1226 is_blunt = classmethod(is_blunt)
1227
1229 """RE.is_5overhang() -> bool.
1230
1231 True if the enzyme produces 5' overhang sticky end.
1232
1233 see also :
1234 RE.is_3overhang()
1235 RE.is_blunt()
1236 RE.is_unknown()"""
1237 return False
1238 is_5overhang = classmethod(is_5overhang)
1239
1241 """RE.is_3overhang() -> bool.
1242
1243 True if the enzyme produces 3' overhang sticky end.
1244
1245 see also :
1246 RE.is_5overhang()
1247 RE.is_blunt()
1248 RE.is_unknown()"""
1249 return True
1250 is_3overhang = classmethod(is_3overhang)
1251
1253 """RE.overhang() -> str. type of overhang of the enzyme.,
1254
1255 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1256 return "3' overhang"
1257 overhang = classmethod(overhang)
1258
1260 """RE.compatible_end() -> list.
1261
1262 list of all the enzymes that share compatible end with RE."""
1263 if not batch : batch = AllEnzymes
1264 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self]
1265 r.sort()
1266 return r
1267 compatible_end = classmethod(compatible_end)
1268
1269 - def _mod1(self, other) :
1270 """RE._mod1(other) -> bool.
1271
1272 for internal use only
1273
1274 test for the compatibility of restriction ending of RE and other."""
1275
1276
1277
1278 if issubclass(other, Ov3) : return self._mod2(other)
1279 else : return False
1280 _mod1 = classmethod(_mod1)
1281
1282
1284 """Implement the methods specific to the enzymes for which the overhang
1285 and the cut are not variable.
1286
1287 Typical example : EcoRI -> G^AATT_C
1288 The overhang will always be AATT
1289 Notes :
1290 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1291 There overhang is always the same : blunt!
1292
1293 Internal use only. Not meant to be instantiated."""
1294
1296 """RE._drop() -> list.
1297
1298 for internal use only.
1299
1300 drop the site that are situated outside the sequence in linear sequence.
1301 modify the index for site in circular sequences."""
1302
1303
1304
1305
1306
1307
1308
1309
1310 length = len(self.dna)
1311 drop = itertools.dropwhile
1312 take = itertools.takewhile
1313 if self.dna.is_linear() :
1314 self.results = [x for x in drop(lambda x:x<1, self.results)]
1315 self.results = [x for x in take(lambda x:x<length, self.results)]
1316 else :
1317 for index, location in enumerate(self.results) :
1318 if location < 1 :
1319 self.results[index] += length
1320 else :
1321 break
1322 for index, location in enumerate(self.results[::-1]) :
1323 if location > length :
1324 self.results[-(index+1)] -= length
1325 else :
1326 break
1327 return
1328 _drop = classmethod(_drop)
1329
1331 """RE.is_defined() -> bool.
1332
1333 True if the sequence recognised and cut is constant,
1334 i.e. the recognition site is not degenerated AND the enzyme cut inside
1335 the site.
1336
1337 see also :
1338 RE.is_ambiguous()
1339 RE.is_unknown()"""
1340 return True
1341 is_defined = classmethod(is_defined)
1342
1344 """RE.is_ambiguous() -> bool.
1345
1346 True if the sequence recognised and cut is ambiguous,
1347 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1348 the site.
1349
1350 see also :
1351 RE.is_defined()
1352 RE.is_unknown()"""
1353 return False
1354 is_ambiguous = classmethod(is_ambiguous)
1355
1357 """RE.is_unknown() -> bool.
1358
1359 True if the sequence is unknown,
1360 i.e. the recognition site has not been characterised yet.
1361
1362 see also :
1363 RE.is_defined()
1364 RE.is_ambiguous()"""
1365 return False
1366 is_unknown = classmethod(is_unknown)
1367
1369 """RE.elucidate() -> str
1370
1371 return a representation of the site with the cut on the (+) strand
1372 represented as '^' and the cut on the (-) strand as '_'.
1373 ie :
1374 >>> EcoRI.elucidate() # 5' overhang
1375 'G^AATT_C'
1376 >>> KpnI.elucidate() # 3' overhang
1377 'G_GTAC^C'
1378 >>> EcoRV.elucidate() # blunt
1379 'GAT^_ATC'
1380 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1381 '? GTATAC ?'
1382 >>>
1383 """
1384 f5 = self.fst5
1385 f3 = self.fst3
1386 site = self.site
1387 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1388 elif self.is_5overhang() :
1389 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N'
1390 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N'
1391 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1392 elif self.is_blunt() :
1393 re = site[:f5] + '^_' + site[f5:]
1394 else :
1395 if f5 == f3 == 0 : re = 'N_'+ site + '^N'
1396 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:]
1397 return re
1398 elucidate = classmethod(elucidate)
1399
1400 - def _mod2(self, other) :
1401 """RE._mod2(other) -> bool.
1402
1403 for internal use only
1404
1405 test for the compatibility of restriction ending of RE and other."""
1406
1407
1408
1409 if other.ovhgseq == self.ovhgseq :
1410 return True
1411 elif issubclass(other, Ambiguous) :
1412 return other._mod2(self)
1413 else:
1414 return False
1415 _mod2 = classmethod(_mod2)
1416
1417
1419 """Implement the methods specific to the enzymes for which the overhang
1420 is variable.
1421
1422 Typical example : BstXI -> CCAN_NNNN^NTGG
1423 The overhang can be any sequence of 4 bases.
1424 Notes :
1425 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1426 There overhang is always the same : blunt!
1427
1428 Internal use only. Not meant to be instantiated."""
1429
1431 """RE._drop() -> list.
1432
1433 for internal use only.
1434
1435 drop the site that are situated outside the sequence in linear sequence.
1436 modify the index for site in circular sequences."""
1437 length = len(self.dna)
1438 drop = itertools.dropwhile
1439 take = itertools.takewhile
1440 if self.dna.is_linear() :
1441 self.results = [x for x in drop(lambda x : x < 1, self.results)]
1442 self.results = [x for x in take(lambda x : x <length, self.results)]
1443 else :
1444 for index, location in enumerate(self.results) :
1445 if location < 1 :
1446 self.results[index] += length
1447 else :
1448 break
1449 for index, location in enumerate(self.results[::-1]) :
1450 if location > length :
1451 self.results[-(index+1)] -= length
1452 else :
1453 break
1454 return
1455 _drop = classmethod(_drop)
1456
1458 """RE.is_defined() -> bool.
1459
1460 True if the sequence recognised and cut is constant,
1461 i.e. the recognition site is not degenerated AND the enzyme cut inside
1462 the site.
1463
1464 see also :
1465 RE.is_ambiguous()
1466 RE.is_unknown()"""
1467 return False
1468 is_defined = classmethod(is_defined)
1469
1471 """RE.is_ambiguous() -> bool.
1472
1473 True if the sequence recognised and cut is ambiguous,
1474 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1475 the site.
1476
1477
1478 see also :
1479 RE.is_defined()
1480 RE.is_unknown()"""
1481 return True
1482 is_ambiguous = classmethod(is_ambiguous)
1483
1485 """RE.is_unknown() -> bool.
1486
1487 True if the sequence is unknown,
1488 i.e. the recognition site has not been characterised yet.
1489
1490 see also :
1491 RE.is_defined()
1492 RE.is_ambiguous()"""
1493 return False
1494 is_unknown = classmethod(is_unknown)
1495
1496 - def _mod2(self, other) :
1497 """RE._mod2(other) -> bool.
1498
1499 for internal use only
1500
1501 test for the compatibility of restriction ending of RE and other."""
1502
1503
1504
1505 if len(self.ovhgseq) != len(other.ovhgseq) :
1506 return False
1507 else :
1508 se = self.ovhgseq
1509 for base in se :
1510 if base in 'ATCG' :
1511 pass
1512 if base in 'N' :
1513 se = '.'.join(se.split('N'))
1514 if base in 'RYWMSKHDBV':
1515 expand = '['+ matching[base] + ']'
1516 se = expand.join(se.split(base))
1517 if re.match(se, other.ovhgseq) :
1518 return True
1519 else :
1520 return False
1521 _mod2 = classmethod(_mod2)
1522
1524 """RE.elucidate() -> str
1525
1526 return a representation of the site with the cut on the (+) strand
1527 represented as '^' and the cut on the (-) strand as '_'.
1528 ie :
1529 >>> EcoRI.elucidate() # 5' overhang
1530 'G^AATT_C'
1531 >>> KpnI.elucidate() # 3' overhang
1532 'G_GTAC^C'
1533 >>> EcoRV.elucidate() # blunt
1534 'GAT^_ATC'
1535 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1536 '? GTATAC ?'
1537 >>>
1538 """
1539 f5 = self.fst5
1540 f3 = self.fst3
1541 length = len(self)
1542 site = self.site
1543 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1544 elif self.is_5overhang() :
1545 if f3 == f5 == 0 :
1546 re = 'N^' + site +'_N'
1547 elif 0 <= f5 <= length and 0 <= f3+length <= length :
1548 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1549 elif 0 <= f5 <= length :
1550 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N'
1551 elif 0 <= f3+length <= length :
1552 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1553 elif f3+length < 0 :
1554 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site
1555 elif f5 > length :
1556 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N'
1557 else :
1558 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N'
1559 elif self.is_blunt() :
1560 if f5 < 0 :
1561 re = 'N^_' + abs(f5)*'N' + site
1562 elif f5 > length :
1563 re = site + (f5-length)*'N' + '^_N'
1564 else :
1565 raise ValueError('%s.easyrepr() : error f5=%i' \
1566 % (self.name,f5))
1567 else :
1568 if f3 == 0 :
1569 if f5 == 0 : re = 'N_' + site + '^N'
1570 else : re = site + '_' + (f5-length)*'N' + '^N'
1571 elif 0 < f3+length <= length and 0 <= f5 <= length :
1572 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1573 elif 0 < f3+length <= length :
1574 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N'
1575 elif 0 <= f5 <= length:
1576 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:]
1577 elif f3 > 0 :
1578 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N'
1579 elif f5 < 0 :
1580 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site
1581 else :
1582 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N'
1583 return re
1584 elucidate = classmethod(elucidate)
1585
1586
1588 """Implement the methods specific to the enzymes for which the overhang
1589 is not characterised.
1590
1591 Correspond to NoCut and Unknown.
1592
1593 Internal use only. Not meant to be instantiated."""
1594
1596 """RE._drop() -> list.
1597
1598 for internal use only.
1599
1600 drop the site that are situated outside the sequence in linear sequence.
1601 modify the index for site in circular sequences."""
1602 if self.dna.is_linear() :
1603 return
1604 else :
1605 length = len(self.dna)
1606 for index, location in enumerate(self.results) :
1607 if location < 1 :
1608 self.results[index] += length
1609 else :
1610 break
1611 for index, location in enumerate(self.results[:-1]) :
1612 if location > length :
1613 self.results[-(index+1)] -= length
1614 else :
1615 break
1616 return
1617 _drop = classmethod(_drop)
1618
1620 """RE.is_defined() -> bool.
1621
1622 True if the sequence recognised and cut is constant,
1623 i.e. the recognition site is not degenerated AND the enzyme cut inside
1624 the site.
1625
1626 see also :
1627 RE.is_ambiguous()
1628 RE.is_unknown()"""
1629 return False
1630 is_defined = classmethod(is_defined)
1631
1633 """RE.is_ambiguous() -> bool.
1634
1635 True if the sequence recognised and cut is ambiguous,
1636 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1637 the site.
1638
1639
1640 see also :
1641 RE.is_defined()
1642 RE.is_unknown()"""
1643 return False
1644 is_ambiguous = classmethod(is_ambiguous)
1645
1647 """RE.is_unknown() -> bool.
1648
1649 True if the sequence is unknown,
1650 i.e. the recognition site has not been characterised yet.
1651
1652 see also :
1653 RE.is_defined()
1654 RE.is_ambiguous()"""
1655 return True
1656 is_unknown = classmethod(is_unknown)
1657
1658 - def _mod2(self, other) :
1659 """RE._mod2(other) -> bool.
1660
1661 for internal use only
1662
1663 test for the compatibility of restriction ending of RE and other."""
1664
1665
1666
1667
1668
1669
1670 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \
1671 % (str(self), str(other), str(self)))
1672 _mod2 = classmethod(_mod2)
1673
1675 """RE.elucidate() -> str
1676
1677 return a representation of the site with the cut on the (+) strand
1678 represented as '^' and the cut on the (-) strand as '_'.
1679 ie :
1680 >>> EcoRI.elucidate() # 5' overhang
1681 'G^AATT_C'
1682 >>> KpnI.elucidate() # 3' overhang
1683 'G_GTAC^C'
1684 >>> EcoRV.elucidate() # blunt
1685 'GAT^_ATC'
1686 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1687 '? GTATAC ?'
1688 >>>
1689 """
1690 return '? %s ?' % self.site
1691 elucidate = classmethod(elucidate)
1692
1693
1695
1696
1697
1698
1699 """Implement the methods specific to the enzymes which are commercially
1700 available.
1701
1702 Internal use only. Not meant to be instantiated."""
1703
1705 """RE.suppliers() -> print the suppliers of RE."""
1706 supply = suppliers_dict.items()
1707 for k,v in supply :
1708 if k in self.suppl :
1709 print v[0]+','
1710 return
1711 suppliers = classmethod(suppliers)
1712
1714 """RE.supplier_list() -> list.
1715
1716 list of the supplier names for RE."""
1717 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1718 supplier_list = classmethod(supplier_list)
1719
1721 """RE.buffers(supplier) -> string.
1722
1723 not implemented yet."""
1724 return
1725 buffers = classmethod(buffers)
1726
1728 """RE.iscomm() -> bool.
1729
1730 True if RE has suppliers."""
1731 return True
1732 is_comm = classmethod(is_comm)
1733
1734
1736 """Implement the methods specific to the enzymes which are not commercially
1737 available.
1738
1739 Internal use only. Not meant to be instantiated."""
1740
1742 """RE.suppliers() -> print the suppliers of RE."""
1743 return None
1744 suppliers = staticmethod(suppliers)
1745
1747 """RE.supplier_list() -> list.
1748
1749 list of the supplier names for RE."""
1750 return []
1751 supplier_list = classmethod(supplier_list)
1752
1754 """RE.buffers(supplier) -> string.
1755
1756 not implemented yet."""
1757 raise TypeError("Enzyme not commercially available.")
1758 buffers = classmethod(buffers)
1759
1761 """RE.iscomm() -> bool.
1762
1763 True if RE has suppliers."""
1764 return False
1765 is_comm = classmethod(is_comm)
1766
1767
1768
1769
1770
1771
1772
1773
1774
1776
1777 - def __init__(self, first=[], suppliers=[]) :
1778 """RestrictionBatch([sequence]) -> new RestrictionBatch."""
1779 first = [self.format(x) for x in first]
1780 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
1781 set.__init__(self, first)
1782 self.mapping = dict.fromkeys(self)
1783 self.already_mapped = DNA('')
1784
1786 if len(self) < 5 :
1787 return '+'.join(self.elements())
1788 else :
1789 return '...'.join(('+'.join(self.elements()[:2]),\
1790 '+'.join(self.elements()[-2:])))
1791
1793 return 'RestrictionBatch(%s)' % self.elements()
1794
1801
1803 return self.search(other)
1804
1806 return self.search(other)
1807
1809 """B.get(enzyme[, add]) -> enzyme class.
1810
1811 if add is True and enzyme is not in B add enzyme to B.
1812 if add is False (which is the default) only return enzyme.
1813 if enzyme is not a RestrictionType or can not be evaluated to
1814 a RestrictionType, raise a ValueError."""
1815 e = self.format(enzyme)
1816 if e in self :
1817 return e
1818 elif add :
1819 self.add(e)
1820 return e
1821 else :
1822 raise ValueError('enzyme %s is not in RestrictionBatch' \
1823 % e.__name__)
1824
1826 """B.lambdasplit(func) -> RestrictionBatch .
1827
1828 the new batch will contains only the enzymes for which
1829 func return True."""
1830 d = [x for x in itertools.ifilter(func, self)]
1831 new = RestrictionBatch()
1832 new._data = dict(map(None, d, [True]*len(d)))
1833 return new
1834
1836 """B.add_supplier(letter) -> add a new set of enzyme to B.
1837
1838 letter represents the suppliers as defined in the dictionary
1839 RestrictionDictionary.suppliers
1840 return None.
1841 raise a KeyError if letter is not a supplier code."""
1842 supplier = suppliers_dict[letter]
1843 self.suppliers.append(letter)
1844 for x in supplier[1] :
1845 self.add_nocheck(eval(x))
1846 return
1847
1849 """B.current_suppliers() -> add a new set of enzyme to B.
1850
1851 return a sorted list of the suppliers which have been used to
1852 create the batch."""
1853 suppl_list = [suppliers_dict[x][0] for x in self.suppliers]
1854 suppl_list.sort()
1855 return suppl_list
1856
1858 """ b += other -> add other to b, check the type of other."""
1859 self.add(other)
1860 return self
1861
1863 """ b + other -> new RestrictionBatch."""
1864 new = self.__class__(self)
1865 new.add(other)
1866 return new
1867
1869 """B.remove(other) -> remove other from B if other is a RestrictionType.
1870
1871 Safe set.remove method. Verify that other is a RestrictionType or can be
1872 evaluated to a RestrictionType.
1873 raise a ValueError if other can not be evaluated to a RestrictionType.
1874 raise a KeyError if other is not in B."""
1875 return set.remove(self, self.format(other))
1876
1877 - def add(self, other) :
1878 """B.add(other) -> add other to B if other is a RestrictionType.
1879
1880 Safe set.add method. Verify that other is a RestrictionType or can be
1881 evaluated to a RestrictionType.
1882 raise a ValueError if other can not be evaluated to a RestrictionType.
1883 """
1884 return set.add(self, self.format(other))
1885
1887 """B.add_nocheck(other) -> add other to B. don't check type of other.
1888 """
1889 return set.add(self, other)
1890
1908
1909
1911 """B.is_restriction(y) -> bool.
1912
1913 True is y or eval(y) is a RestrictionType."""
1914 return isinstance(y, RestrictionType) or \
1915 isinstance(eval(str(y)), RestrictionType)
1916
1917 - def split(self, *classes, **bool) :
1918 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
1919
1920 it works but it is slow, so it has really an interest when splitting
1921 over multiple conditions."""
1922 def splittest(element) :
1923 for klass in classes :
1924 b = bool.get(klass.__name__, True)
1925 if issubclass(element, klass) :
1926 if b :
1927 continue
1928 else :
1929 return False
1930 elif b :
1931 return False
1932 else :
1933 continue
1934 return True
1935 d = [k for k in itertools.ifilter(splittest, self)]
1936 new = RestrictionBatch()
1937 new._data = dict(map(None, d, [True]*len(d)))
1938 return new
1939
1941 """B.elements() -> tuple.
1942
1943 give all the names of the enzymes in B sorted alphabetically."""
1944 l = [str(e) for e in self]
1945 l.sort()
1946 return l
1947
1949 """B.as_string() -> list.
1950
1951 return a list of the name of the elements of B."""
1952 return [str(e) for e in self]
1953
1955 """B.suppl_codes() -> dict
1956
1957 letter code for the suppliers"""
1958 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()])
1959 return supply
1960 suppl_codes = classmethod(suppl_codes)
1961
1963 "B.show_codes() -> letter codes for the suppliers"""
1964 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()]
1965 print '\n'.join(supply)
1966 return
1967 show_codes = classmethod(show_codes)
1968
1970 """B.search(dna) -> dict."""
1971
1972
1973
1974
1975 if isinstance(dna, DNA) :
1976 if (dna, linear) == self.already_mapped :
1977 return self.mapping
1978 else :
1979 self.already_mapped = dna, linear
1980 fseq = FormattedSeq(dna, linear)
1981 self.mapping = dict([(x, x.search(fseq)) for x in self])
1982 return self.mapping
1983 elif isinstance(dna, FormattedSeq) :
1984 if (dna, dna.linear) == self.already_mapped :
1985 return self.mapping
1986 else :
1987 self.already_mapped = dna, dna.linear
1988 self.mapping = dict([(x, x.search(dna)) for x in self])
1989 return self.mapping
1990 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\
1991 %type(dna))
1992
1993
1994
1995
1996
1997
1998
1999 -class Analysis(RestrictionBatch, PrintFormat) :
2000
2003 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2004
2005 For most of the method of this class if a dictionary is given it will
2006 be used as the base to calculate the results.
2007 If no dictionary is given a new analysis using the Restriction Batch
2008 which has been given when the Analysis class has been instantiated."""
2009 RestrictionBatch.__init__(self, restrictionbatch)
2010 self.rb = restrictionbatch
2011 self.sequence = sequence
2012 self.linear = linear
2013 if self.sequence :
2014 self.search(self.sequence, self.linear)
2015
2017 return 'Analysis(%s,%s,%s)'%\
2018 (repr(self.rb),repr(self.sequence),self.linear)
2019
2021 """A._sub_set(other_set) -> dict.
2022
2023 Internal use only.
2024
2025 screen the results through wanted set.
2026 Keep only the results for which the enzymes is in wanted set.
2027 """
2028 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2029
2031 """A._boundaries(start, end) -> tuple.
2032
2033 Format the boundaries for use with the methods that limit the
2034 search to only part of the sequence given to analyse.
2035 """
2036 if not isinstance(start, int) :
2037 raise TypeError('expected int, got %s instead' % type(start))
2038 if not isinstance(end, int) :
2039 raise TypeError('expected int, got %s instead' % type(end))
2040 if start < 1 :
2041 start += len(self.sequence)
2042 if end < 1 :
2043 end += len(self.sequence)
2044 if start < end :
2045 pass
2046 else :
2047 start, end == end, start
2048 if start < 1 :
2049 start == 1
2050 if start < end :
2051 return start, end, self._test_normal
2052 else :
2053 return start, end, self._test_reverse
2054
2056 """A._test_normal(start, end, site) -> bool.
2057
2058 Internal use only
2059 Test if site is in between start and end.
2060 """
2061 return start <= site < end
2062
2064 """A._test_reverse(start, end, site) -> bool.
2065
2066 Internal use only
2067 Test if site is in between end and start (for circular sequences).
2068 """
2069 return start <= site <= len(self.sequence) or 1 <= site < end
2070
2071 - def print_that(self, dct=None, title='', s1='') :
2072 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2073
2074 If dct is not given the full dictionary is used.
2075 """
2076 if not dct :
2077 dct = self.mapping
2078 print
2079 return PrintFormat.print_that(self, dct, title, s1)
2080
2082 """A.change(**attribute_name) -> Change attribute of Analysis.
2083
2084 It is possible to change the width of the shell by setting
2085 self.ConsoleWidth to what you want.
2086 self.NameWidth refer to the maximal length of the enzyme name.
2087
2088 Changing one of these parameters here might not give the results
2089 you expect. In which case, you can settle back to a 80 columns shell
2090 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2091 you get it right."""
2092 for k,v in what.iteritems() :
2093 if k in ('NameWidth', 'ConsoleWidth') :
2094 setattr(self, k, v)
2095 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2096 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2097 elif k is 'sequence' :
2098 setattr(self, 'sequence', v)
2099 self.search(self.sequence, self.linear)
2100 elif k is 'rb' :
2101 self = Analysis.__init__(self, v, self.sequence, self.linear)
2102 elif k is 'linear' :
2103 setattr(self, 'linear', v)
2104 self.search(self.sequence, v)
2105 elif k in ('Indent', 'Maxsize') :
2106 setattr(self, k, v)
2107 elif k in ('Cmodulo', 'PrefWidth') :
2108 raise AttributeError( \
2109 'To change %s, change NameWidth and/or ConsoleWidth' \
2110 % name)
2111 else :
2112 raise AttributeError( \
2113 'Analysis has no attribute %s' % name)
2114 return
2115
2117 """A.full() -> dict.
2118
2119 Full Restriction Map of the sequence."""
2120 return self.mapping
2121
2122 - def blunt(self, dct = None) :
2123 """A.blunt([dct]) -> dict.
2124
2125 Only the enzymes which have a 3'overhang restriction site."""
2126 if not dct :
2127 dct = self.mapping
2128 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2129
2131 """A.overhang5([dct]) -> dict.
2132
2133 Only the enzymes which have a 5' overhang restriction site."""
2134 if not dct :
2135 dct = self.mapping
2136 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2137
2138
2140 """A.Overhang3([dct]) -> dict.
2141
2142 Only the enzymes which have a 3'overhang restriction site."""
2143 if not dct :
2144 dct = self.mapping
2145 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2146
2147
2149 """A.defined([dct]) -> dict.
2150
2151 Only the enzymes that have a defined restriction site in Rebase."""
2152 if not dct :
2153 dct = self.mapping
2154 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2155
2157 """A.with_sites([dct]) -> dict.
2158
2159 Enzymes which have at least one site in the sequence."""
2160 if not dct :
2161 dct = self.mapping
2162 return dict([(k,v) for k,v in dct.iteritems() if v])
2163
2165 """A.without_site([dct]) -> dict.
2166
2167 Enzymes which have no site in the sequence."""
2168 if not dct :
2169 dct = self.mapping
2170 return dict([(k,v) for k,v in dct.iteritems() if not v])
2171
2173 """A.With_N_Sites(N [, dct]) -> dict.
2174
2175 Enzymes which cut N times the sequence."""
2176 if not dct :
2177 dct = self.mapping
2178 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2179
2181 if not dct :
2182 dct = self.mapping
2183 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2184
2186 """A.with_name(list_of_names [, dct]) ->
2187
2188 Limit the search to the enzymes named in list_of_names."""
2189 for i, enzyme in enumerate(names) :
2190 if not enzyme in AllEnzymes :
2191 print "no datas for the enzyme:", str(name)
2192 del names[i]
2193 if not dct :
2194 return RestrictionBatch(names).search(self.sequence)
2195 return dict([(n, dct[n]) for n in names if n in dct])
2196
2198 """A.with_site_size(site_size [, dct]) ->
2199
2200 Limit the search to the enzymes whose site is of size <site_size>."""
2201 sites = [name for name in self if name.size == site_size]
2202 if not dct :
2203 return RestrictionBatch(sites).search(self.sequence)
2204 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2205
2207 """A.only_between(start, end[, dct]) -> dict.
2208
2209 Enzymes that cut the sequence only in between start and end."""
2210 start, end, test = self._boundaries(start, end)
2211 if not dct :
2212 dct = self.mapping
2213 d = dict(dct)
2214 for key, sites in dct.iteritems() :
2215 if not sites :
2216 del d[key]
2217 continue
2218 for site in sites:
2219 if test(start, end, site) :
2220 continue
2221 else :
2222 del d[key]
2223 break
2224 return d
2225
2226 - def between(self, start, end, dct=None) :
2227 """A.between(start, end [, dct]) -> dict.
2228
2229 Enzymes that cut the sequence at least in between start and end.
2230 They may cut outside as well."""
2231 start, end, test = self._boundaries(start, end)
2232 d = {}
2233 if not dct :
2234 dct = self.mapping
2235 for key, sites in dct.iteritems() :
2236 for site in sites :
2237 if test(start, end, site) :
2238 d[key] = sites
2239 break
2240 continue
2241 return d
2242
2244 """A.show_only_between(start, end [, dct]) -> dict.
2245
2246 Enzymes that cut the sequence outside of the region
2247 in between start and end but do not cut inside."""
2248 d = []
2249 if start <= end :
2250 d = [(k, [vv for vv in v if start<=vv<=end])
2251 for v in self.between(start, end, dct)]
2252 else :
2253 d = [(k, [vv for vv in v if start<=vv or vv <= end])
2254 for v in self.between(start, end, dct)]
2255 return dict(d)
2256
2258 """A.only_outside(start, end [, dct]) -> dict.
2259
2260 Enzymes that cut the sequence outside of the region
2261 in between start and end but do not cut inside."""
2262 start, end, test = self._boundaries(start, end)
2263 if not dct : dct = self.mapping
2264 d = dict(dct)
2265 for key, sites in dct.iteritems() :
2266 if not sites :
2267 del d[key]
2268 continue
2269 for site in sites :
2270 if test(start, end, site) :
2271 del d[key]
2272 break
2273 else :
2274 continue
2275 return d
2276
2277 - def outside(self, start, end, dct=None) :
2278 """A.outside((start, end [, dct]) -> dict.
2279
2280 Enzymes that cut outside the region in between start and end.
2281 No test is made to know if they cut or not inside this region."""
2282 start, end, test = self._boundaries(start, end)
2283 if not dct :
2284 dct = self.mapping
2285 d = {}
2286 for key, sites in dct.iteritems() :
2287 for site in sites :
2288 if test(start, end, site) :
2289 continue
2290 else :
2291 d[key] = sites
2292 break
2293 return d
2294
2295
2297 """A.do_not_cut(start, end [, dct]) -> dict.
2298
2299 Enzymes that do not cut the region in between start and end."""
2300 if not dct :
2301 dct = self.mapping
2302 d = self.without_site()
2303 d.update(self.only_outside(start, end, dct))
2304 return d
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328 CommOnly = RestrictionBatch()
2329 NonComm = RestrictionBatch()
2330 for TYPE, (bases, enzymes) in typedict.iteritems() :
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348 bases = tuple([eval(x) for x in bases])
2349
2350
2351
2352
2353 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2354 for k in enzymes :
2355
2356
2357
2358
2359
2360 newenz = T(k, bases, enzymedict[k])
2361
2362
2363
2364
2365
2366 if newenz.is_comm() : CommOnly.add_nocheck(newenz)
2367 else : NonComm.add_nocheck(newenz)
2368
2369
2370
2371 AllEnzymes = CommOnly | NonComm
2372
2373
2374
2375 names = [str(x) for x in AllEnzymes]
2376 locals().update(dict(map(None, names, AllEnzymes)))
2377 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names
2378 del k, x, enzymes, TYPE, bases, names
2379