1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex :
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82 import itertools
83
84 from Bio.Seq import Seq, MutableSeq
85 from Bio.Alphabet import IUPAC
86
87 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict,\
88 typedict, suppliers as suppliers_dict
89 from Bio.Restriction.RanaConfig import *
90 from Bio.Restriction.PrintFormat import PrintFormat
91 from Bio.Restriction.DNAUtils import check_bases
92
93
94
95 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
96 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
97 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
98 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
99 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
100 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
101
102 DNA = Seq
103
201
202
204 """RestrictionType. Type from which derives all enzyme classes.
205
206 Implement the operator methods."""
207
208 - def __init__(cls, name='', bases=(), dct={}) :
209 """RE(name, bases, dct) -> RestrictionType instance.
210
211 Not intended to be used in normal operation. The enzymes are
212 instantiated when importing the module.
213
214 see below."""
215 super(RestrictionType, cls).__init__(cls, name, bases, dct)
216 cls.compsite = re.compile(cls.compsite)
217
219 """RE.__add__(other) -> RestrictionBatch().
220
221 if other is an enzyme returns a batch of the two enzymes.
222 if other is already a RestrictionBatch add enzyme to it."""
223 if isinstance(other, RestrictionType) :
224 return RestrictionBatch([cls, other])
225 elif isinstance(other, RestrictionBatch) :
226 return other.add_nocheck(cls)
227 else :
228 raise TypeError
229
231 """RE.__div__(other) -> list.
232
233 RE/other
234 returns RE.search(other)."""
235 return cls.search(other)
236
238 """RE.__rdiv__(other) -> list.
239
240 other/RE
241 returns RE.search(other)."""
242 return cls.search(other)
243
245 """RE.__truediv__(other) -> list.
246
247 RE/other
248 returns RE.search(other)."""
249 return cls.search(other)
250
252 """RE.__rtruediv__(other) -> list.
253
254 other/RE
255 returns RE.search(other)."""
256 return cls.search(other)
257
259 """RE.__floordiv__(other) -> list.
260
261 RE//other
262 returns RE.catalyse(other)."""
263 return cls.catalyse(other)
264
266 """RE.__rfloordiv__(other) -> list.
267
268 other//RE
269 returns RE.catalyse(other)."""
270 return cls.catalyse(other)
271
273 """RE.__str__() -> str.
274
275 return the name of the enzyme."""
276 return cls.__name__
277
279 """RE.__repr__() -> str.
280
281 used with eval or exec will instantiate the enzyme."""
282 return "%s" % cls.__name__
283
285 """RE.__len__() -> int.
286
287 length of the recognition site."""
288 return cls.size
289
291 """RE == other -> bool
292
293 True if RE and other are the same enzyme."""
294 return other is cls
295
297 """RE != other -> bool.
298 isoschizomer strict, same recognition site, same restriction -> False
299 all the other-> True"""
300 if not isinstance(other, RestrictionType) :
301 return True
302 elif cls.charac == other.charac :
303 return False
304 else :
305 return True
306
308 """RE >> other -> bool.
309
310 neoschizomer : same recognition site, different restriction. -> True
311 all the others : -> False"""
312 if not isinstance(other, RestrictionType) :
313 return False
314 elif cls.site == other.site and cls.charac != other.charac :
315 return True
316 else :
317 return False
318
320 """a % b -> bool.
321
322 Test compatibility of the overhang of a and b.
323 True if a and b have compatible overhang."""
324 if not isinstance(other, RestrictionType) :
325 raise TypeError( \
326 'expected RestrictionType, got %s instead' % type(other))
327 return cls._mod1(other)
328
330 """a >= b -> bool.
331
332 a is greater or equal than b if the a site is longer than b site.
333 if their site have the same length sort by alphabetical order of their
334 names."""
335 if not isinstance(other, RestrictionType) :
336 raise NotImplementedError
337 if len(cls) > len(other) :
338 return True
339 elif cls.size == len(other) and cls.__name__ >= other.__name__ :
340 return True
341 else :
342 return False
343
345 """a > b -> bool.
346
347 sorting order :
348 1. size of the recognition site.
349 2. if equal size, alphabetical order of the names."""
350 if not isinstance(other, RestrictionType) :
351 raise NotImplementedError
352 if len(cls) > len(other) :
353 return True
354 elif cls.size == len(other) and cls.__name__ > other.__name__ :
355 return True
356 else :
357 return False
358
360 """a <= b -> bool.
361
362 sorting order :
363 1. size of the recognition site.
364 2. if equal size, alphabetical order of the names."""
365 if not isinstance(other, RestrictionType) :
366 raise NotImplementedError
367 elif len(cls) < len(other) :
368 return True
369 elif len(cls) == len(other) and cls.__name__ <= other.__name__ :
370 return True
371 else :
372 return False
373
375 """a < b -> bool.
376
377 sorting order :
378 1. size of the recognition site.
379 2. if equal size, alphabetical order of the names."""
380 if not isinstance(other, RestrictionType) :
381 raise NotImplementedError
382 elif len(cls) < len(other) :
383 return True
384 elif len(cls) == len(other) and cls.__name__ < other.__name__ :
385 return True
386 else :
387 return False
388
389
391 """Implement the methods that are common to all restriction enzymes.
392
393 All the methods are classmethod.
394
395 For internal use only. Not meant to be instantiate."""
396
397 - def search(cls, dna, linear=True) :
398 """RE.search(dna, linear=True) -> list.
399
400 return a list of all the site of RE in dna. Compensate for circular
401 sequences and so on.
402
403 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
404
405 if linear is False, the restriction sites than span over the boundaries
406 will be included.
407
408 The positions are the first base of the 3' fragment,
409 i.e. the first base after the position the enzyme will cut. """
410
411
412
413
414
415
416
417 if isinstance(dna, FormattedSeq) :
418 cls.dna = dna
419 return cls._search()
420 else :
421 cls.dna = FormattedSeq(dna, linear)
422 return cls._search()
423 search = classmethod(search)
424
426 """RE.all_suppliers -> print all the suppliers of R"""
427 supply = [x[0] for x in suppliers_dict.itervalues()]
428 supply.sort()
429 print ",\n".join(supply)
430 return
431 all_suppliers = classmethod(all_suppliers)
432
434 """RE.is_equischizomers(other) -> bool.
435
436 True if other is an isoschizomer of RE.
437 False else.
438
439 equischizomer <=> same site, same position of restriction."""
440 return not self != other
441 is_equischizomer = classmethod(is_equischizomer)
442
444 """RE.is_neoschizomers(other) -> bool.
445
446 True if other is an isoschizomer of RE.
447 False else.
448
449 neoschizomer <=> same site, different position of restriction."""
450 return self >> other
451 is_neoschizomer = classmethod(is_neoschizomer)
452
454 """RE.is_isoschizomers(other) -> bool.
455
456 True if other is an isoschizomer of RE.
457 False else.
458
459 isoschizomer <=> same site."""
460 return (not self != other) or self >> other
461 is_isoschizomer = classmethod(is_isoschizomer)
462
464 """RE.equischizomers([batch]) -> list.
465
466 return a tuple of all the isoschizomers of RE.
467 if batch is supplied it is used instead of the default AllEnzymes.
468
469 equischizomer <=> same site, same position of restriction."""
470 if not batch : batch = AllEnzymes
471 r = [x for x in batch if not self != x]
472 i = r.index(self)
473 del r[i]
474 r.sort()
475 return r
476 equischizomers = classmethod(equischizomers)
477
479 """RE.neoschizomers([batch]) -> list.
480
481 return a tuple of all the neoschizomers of RE.
482 if batch is supplied it is used instead of the default AllEnzymes.
483
484 neoschizomer <=> same site, different position of restriction."""
485 if not batch : batch = AllEnzymes
486 r = [x for x in batch if self >> x]
487 r.sort()
488 return r
489 neoschizomers = classmethod(neoschizomers)
490
492 """RE.isoschizomers([batch]) -> list.
493
494 return a tuple of all the equischizomers and neoschizomers of RE.
495 if batch is supplied it is used instead of the default AllEnzymes."""
496 if not batch : batch = AllEnzymes
497 r = [x for x in batch if (self >> x) or (not self != x)]
498 i = r.index(self)
499 del r[i]
500 r.sort()
501 return r
502 isoschizomers = classmethod(isoschizomers)
503
505 """RE.frequency() -> int.
506
507 frequency of the site."""
508 return self.freq
509 frequency = classmethod(frequency)
510
511
512 -class NoCut(AbstractCut) :
513 """Implement the methods specific to the enzymes that do not cut.
514
515 These enzymes are generally enzymes that have been only partially
516 characterised and the way they cut the DNA is unknow or enzymes for
517 which the pattern of cut is to complex to be recorded in Rebase
518 (ncuts values of 0 in emboss_e.###).
519
520 When using search() with these enzymes the values returned are at the start of
521 the restriction site.
522
523 Their catalyse() method returns a TypeError.
524
525 Unknown and NotDefined are also part of the base classes of these enzymes.
526
527 Internal use only. Not meant to be instantiated."""
528
530 """RE.cut_once() -> bool.
531
532 True if the enzyme cut the sequence one time on each strand."""
533 return False
534 cut_once = classmethod(cut_once)
535
537 """RE.cut_twice() -> bool.
538
539 True if the enzyme cut the sequence twice on each strand."""
540 return False
541 cut_twice = classmethod(cut_twice)
542
544 """RE._modify(location) -> int.
545
546 for internal use only.
547
548 location is an integer corresponding to the location of the match for
549 the enzyme pattern in the sequence.
550 _modify returns the real place where the enzyme will cut.
551
552 example :
553 EcoRI pattern : GAATTC
554 EcoRI will cut after the G.
555 so in the sequence :
556 ______
557 GAATACACGGAATTCGA
558 |
559 10
560 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
561 EcoRI cut after the G so :
562 EcoRI._modify(10) -> 11.
563
564 if the enzyme cut twice _modify will returns two integer corresponding
565 to each cutting site.
566 """
567 yield location
568 _modify = classmethod(_modify)
569
571 """RE._rev_modify(location) -> generator of int.
572
573 for internal use only.
574
575 as _modify for site situated on the antiparallel strand when the
576 enzyme is not palindromic
577 """
578 yield location
579 _rev_modify = classmethod(_rev_modify)
580
582 """RE.characteristic() -> tuple.
583
584 the tuple contains the attributes :
585 fst5 -> first 5' cut ((current strand) or None
586 fst3 -> first 3' cut (complementary strand) or None
587 scd5 -> second 5' cut (current strand) or None
588 scd5 -> second 3' cut (complementary strand) or None
589 site -> recognition site."""
590 return None, None, None, None, self.site
591 characteristic = classmethod(characteristic)
592
594 """Implement the methods specific to the enzymes that cut the DNA only once
595
596 Correspond to ncuts values of 2 in emboss_e.###
597
598 Internal use only. Not meant to be instantiated."""
599
601 """RE.cut_once() -> bool.
602
603 True if the enzyme cut the sequence one time on each strand."""
604 return True
605 cut_once = classmethod(cut_once)
606
608 """RE.cut_twice() -> bool.
609
610 True if the enzyme cut the sequence twice on each strand."""
611 return False
612 cut_twice = classmethod(cut_twice)
613
615 """RE._modify(location) -> int.
616
617 for internal use only.
618
619 location is an integer corresponding to the location of the match for
620 the enzyme pattern in the sequence.
621 _modify returns the real place where the enzyme will cut.
622
623 example :
624 EcoRI pattern : GAATTC
625 EcoRI will cut after the G.
626 so in the sequence :
627 ______
628 GAATACACGGAATTCGA
629 |
630 10
631 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
632 EcoRI cut after the G so :
633 EcoRI._modify(10) -> 11.
634
635 if the enzyme cut twice _modify will returns two integer corresponding
636 to each cutting site.
637 """
638 yield location + self.fst5
639 _modify = classmethod(_modify)
640
642 """RE._rev_modify(location) -> generator of int.
643
644 for internal use only.
645
646 as _modify for site situated on the antiparallel strand when the
647 enzyme is not palindromic
648 """
649 yield location - self.fst3
650 _rev_modify = classmethod(_rev_modify)
651
653 """RE.characteristic() -> tuple.
654
655 the tuple contains the attributes :
656 fst5 -> first 5' cut ((current strand) or None
657 fst3 -> first 3' cut (complementary strand) or None
658 scd5 -> second 5' cut (current strand) or None
659 scd5 -> second 3' cut (complementary strand) or None
660 site -> recognition site."""
661 return self.fst5, self.fst3, None, None, self.site
662 characteristic = classmethod(characteristic)
663
664
666 """Implement the methods specific to the enzymes that cut the DNA twice
667
668 Correspond to ncuts values of 4 in emboss_e.###
669
670 Internal use only. Not meant to be instantiated."""
671
673 """RE.cut_once() -> bool.
674
675 True if the enzyme cut the sequence one time on each strand."""
676 return False
677 cut_once = classmethod(cut_once)
678
680 """RE.cut_twice() -> bool.
681
682 True if the enzyme cut the sequence twice on each strand."""
683 return True
684 cut_twice = classmethod(cut_twice)
685
687 """RE._modify(location) -> int.
688
689 for internal use only.
690
691 location is an integer corresponding to the location of the match for
692 the enzyme pattern in the sequence.
693 _modify returns the real place where the enzyme will cut.
694
695 example :
696 EcoRI pattern : GAATTC
697 EcoRI will cut after the G.
698 so in the sequence :
699 ______
700 GAATACACGGAATTCGA
701 |
702 10
703 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
704 EcoRI cut after the G so :
705 EcoRI._modify(10) -> 11.
706
707 if the enzyme cut twice _modify will returns two integer corresponding
708 to each cutting site.
709 """
710 yield location + self.fst5
711 yield location + self.scd5
712 _modify = classmethod(_modify)
713
715 """RE._rev_modify(location) -> generator of int.
716
717 for internal use only.
718
719 as _modify for site situated on the antiparallel strand when the
720 enzyme is not palindromic
721 """
722 yield location - self.fst3
723 yield location - self.scd3
724 _rev_modify = classmethod(_rev_modify)
725
727 """RE.characteristic() -> tuple.
728
729 the tuple contains the attributes :
730 fst5 -> first 5' cut ((current strand) or None
731 fst3 -> first 3' cut (complementary strand) or None
732 scd5 -> second 5' cut (current strand) or None
733 scd5 -> second 3' cut (complementary strand) or None
734 site -> recognition site."""
735 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
736 characteristic = classmethod(characteristic)
737
738
740 """Implement the information about methylation.
741
742 Enzymes of this class possess a site which is methylable."""
743
745 """RE.is_methylable() -> bool.
746
747 True if the recognition site is a methylable."""
748 return True
749 is_methylable = classmethod(is_methylable)
750
752 """Implement informations about methylation sensitibility.
753
754 Enzymes of this class are not sensible to methylation."""
755
757 """RE.is_methylable() -> bool.
758
759 True if the recognition site is a methylable."""
760 return False
761 is_methylable = classmethod(is_methylable)
762
764 """Implement the methods specific to the enzymes which are palindromic
765
766 palindromic means : the recognition site and its reverse complement are
767 identical.
768 Remarks : an enzyme with a site CGNNCG is palindromic even if some
769 of the sites that it will recognise are not.
770 for example here : CGAACG
771
772 Internal use only. Not meant to be instantiated."""
773
775 """RE._search() -> list.
776
777 for internal use only.
778
779 implement the search method for palindromic and non palindromic enzyme.
780 """
781 siteloc = self.dna.finditer(self.compsite,self.size)
782 self.results = [r for s,g in siteloc for r in self._modify(s)]
783 if self.results : self._drop()
784 return self.results
785 _search = classmethod(_search)
786
788 """RE.is_palindromic() -> bool.
789
790 True if the recognition site is a palindrom."""
791 return True
792 is_palindromic = classmethod(is_palindromic)
793
794
796 """Implement the methods specific to the enzymes which are not palindromic
797
798 palindromic means : the recognition site and its reverse complement are
799 identical.
800
801 Internal use only. Not meant to be instantiated."""
802
804 """RE._search() -> list.
805
806 for internal use only.
807
808 implement the search method for palindromic and non palindromic enzyme.
809 """
810 iterator = self.dna.finditer(self.compsite, self.size)
811 self.results = []
812 modif = self._modify
813 revmodif = self._rev_modify
814 s = str(self)
815 self.on_minus = []
816 for start, group in iterator :
817 if group(s) :
818 self.results += [r for r in modif(start)]
819 else :
820 self.on_minus += [r for r in revmodif(start)]
821 self.results += self.on_minus
822 if self.results :
823 self.results.sort()
824 self._drop()
825 return self.results
826 _search = classmethod(_search)
827
829 """RE.is_palindromic() -> bool.
830
831 True if the recognition site is a palindrom."""
832 return False
833 is_palindromic = classmethod(is_palindromic)
834
836 """Implement the methods specific to the enzymes for which the overhang
837 is unknown.
838
839 These enzymes are also NotDefined and NoCut.
840
841 Internal use only. Not meant to be instantiated."""
842
844 """RE.catalyse(dna, linear=True) -> tuple of DNA.
845 RE.catalyze(dna, linear=True) -> tuple of DNA.
846
847 return a tuple of dna as will be produced by using RE to restrict the
848 dna.
849
850 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
851
852 if linear is False, the sequence is considered to be circular and the
853 output will be modified accordingly."""
854 raise NotImplementedError('%s restriction is unknown.' \
855 % self.__name__)
856 catalyze = catalyse = classmethod(catalyse)
857
859 """RE.is_blunt() -> bool.
860
861 True if the enzyme produces blunt end.
862
863 see also :
864 RE.is_3overhang()
865 RE.is_5overhang()
866 RE.is_unknown()"""
867 return False
868 is_blunt = classmethod(is_blunt)
869
871 """RE.is_5overhang() -> bool.
872
873 True if the enzyme produces 5' overhang sticky end.
874
875 see also :
876 RE.is_3overhang()
877 RE.is_blunt()
878 RE.is_unknown()"""
879 return False
880 is_5overhang = classmethod(is_5overhang)
881
883 """RE.is_3overhang() -> bool.
884
885 True if the enzyme produces 3' overhang sticky end.
886
887 see also :
888 RE.is_5overhang()
889 RE.is_blunt()
890 RE.is_unknown()"""
891 return False
892 is_3overhang = classmethod(is_3overhang)
893
895 """RE.overhang() -> str. type of overhang of the enzyme.,
896
897 can be "3' overhang", "5' overhang", "blunt", "unknown" """
898 return 'unknown'
899 overhang = classmethod(overhang)
900
902 """RE.compatible_end() -> list.
903
904 list of all the enzymes that share compatible end with RE."""
905 return []
906 compatible_end = classmethod(compatible_end)
907
908 - def _mod1(self, other) :
909 """RE._mod1(other) -> bool.
910
911 for internal use only
912
913 test for the compatibility of restriction ending of RE and other."""
914 return False
915 _mod1 = classmethod(_mod1)
916
917 -class Blunt(AbstractCut) :
918 """Implement the methods specific to the enzymes for which the overhang
919 is blunt.
920
921 The enzyme cuts the + strand and the - strand of the DNA at the same
922 place.
923
924 Internal use only. Not meant to be instantiated."""
925
927 """RE.catalyse(dna, linear=True) -> tuple of DNA.
928 RE.catalyze(dna, linear=True) -> tuple of DNA.
929
930 return a tuple of dna as will be produced by using RE to restrict the
931 dna.
932
933 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
934
935 if linear is False, the sequence is considered to be circular and the
936 output will be modified accordingly."""
937 r = self.search(dna, linear)
938 d = self.dna
939 if not r : return d[1:],
940 fragments = []
941 length = len(r)-1
942 if d.is_linear() :
943
944
945
946 fragments.append(d[1:r[0]])
947 if length :
948
949
950
951 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
952
953
954
955 fragments.append(d[r[-1]:])
956 else :
957
958
959
960 fragments.append(d[r[-1]:]+d[1:r[0]])
961 if not length:
962
963
964
965 return tuple(fragments)
966
967
968
969 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
970 return tuple(fragments)
971 catalyze = catalyse = classmethod(catalyse)
972
974 """RE.is_blunt() -> bool.
975
976 True if the enzyme produces blunt end.
977
978 see also :
979 RE.is_3overhang()
980 RE.is_5overhang()
981 RE.is_unknown()"""
982 return True
983 is_blunt = classmethod(is_blunt)
984
986 """RE.is_5overhang() -> bool.
987
988 True if the enzyme produces 5' overhang sticky end.
989
990 see also :
991 RE.is_3overhang()
992 RE.is_blunt()
993 RE.is_unknown()"""
994 return False
995 is_5overhang = classmethod(is_5overhang)
996
998 """RE.is_3overhang() -> bool.
999
1000 True if the enzyme produces 3' overhang sticky end.
1001
1002 see also :
1003 RE.is_5overhang()
1004 RE.is_blunt()
1005 RE.is_unknown()"""
1006 return False
1007 is_3overhang = classmethod(is_3overhang)
1008
1010 """RE.overhang() -> str. type of overhang of the enzyme.,
1011
1012 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1013 return 'blunt'
1014 overhang = classmethod(overhang)
1015
1017 """RE.compatible_end() -> list.
1018
1019 list of all the enzymes that share compatible end with RE."""
1020 if not batch : batch = AllEnzymes
1021 r = [x for x in iter(AllEnzymes) if x.is_blunt()]
1022 r.sort()
1023 return r
1024 compatible_end = classmethod(compatible_end)
1025
1027 """RE._mod1(other) -> bool.
1028
1029 for internal use only
1030
1031 test for the compatibility of restriction ending of RE and other."""
1032 if issubclass(other, Blunt) : return True
1033 else : return False
1034 _mod1 = staticmethod(_mod1)
1035
1036 -class Ov5(AbstractCut) :
1037 """Implement the methods specific to the enzymes for which the overhang
1038 is recessed in 3'.
1039
1040 The enzyme cuts the + strand after the - strand of the DNA.
1041
1042 Internal use only. Not meant to be instantiated."""
1043
1044 - def catalyse(self, dna, linear=True) :
1045 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1046 RE.catalyze(dna, linear=True) -> tuple of DNA.
1047
1048 return a tuple of dna as will be produced by using RE to restrict the
1049 dna.
1050
1051 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1052
1053 if linear is False, the sequence is considered to be circular and the
1054 output will be modified accordingly."""
1055 r = self.search(dna, linear)
1056 d = self.dna
1057 if not r : return d[1:],
1058 length = len(r)-1
1059 fragments = []
1060 if d.is_linear() :
1061
1062
1063
1064 fragments.append(d[1:r[0]])
1065 if length :
1066
1067
1068
1069 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1070
1071
1072
1073 fragments.append(d[r[-1]:])
1074 else :
1075
1076
1077
1078 fragments.append(d[r[-1]:]+d[1:r[0]])
1079 if not length:
1080
1081
1082
1083 return tuple(fragments)
1084
1085
1086
1087 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1088 return tuple(fragments)
1089 catalyze = catalyse = classmethod(catalyse)
1090
1092 """RE.is_blunt() -> bool.
1093
1094 True if the enzyme produces blunt end.
1095
1096 see also :
1097 RE.is_3overhang()
1098 RE.is_5overhang()
1099 RE.is_unknown()"""
1100 return False
1101 is_blunt = classmethod(is_blunt)
1102
1104 """RE.is_5overhang() -> bool.
1105
1106 True if the enzyme produces 5' overhang sticky end.
1107
1108 see also :
1109 RE.is_3overhang()
1110 RE.is_blunt()
1111 RE.is_unknown()"""
1112 return True
1113 is_5overhang = classmethod(is_5overhang)
1114
1116 """RE.is_3overhang() -> bool.
1117
1118 True if the enzyme produces 3' overhang sticky end.
1119
1120 see also :
1121 RE.is_5overhang()
1122 RE.is_blunt()
1123 RE.is_unknown()"""
1124 return False
1125 is_3overhang = classmethod(is_3overhang)
1126
1128 """RE.overhang() -> str. type of overhang of the enzyme.,
1129
1130 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1131 return "5' overhang"
1132 overhang = classmethod(overhang)
1133
1135 """RE.compatible_end() -> list.
1136
1137 list of all the enzymes that share compatible end with RE."""
1138 if not batch : batch = AllEnzymes
1139 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self]
1140 r.sort()
1141 return r
1142 compatible_end = classmethod(compatible_end)
1143
1144 - def _mod1(self, other) :
1145 """RE._mod1(other) -> bool.
1146
1147 for internal use only
1148
1149 test for the compatibility of restriction ending of RE and other."""
1150 if issubclass(other, Ov5) : return self._mod2(other)
1151 else : return False
1152 _mod1 = classmethod(_mod1)
1153
1154
1155 -class Ov3(AbstractCut) :
1156 """Implement the methods specific to the enzymes for which the overhang
1157 is recessed in 5'.
1158
1159 The enzyme cuts the - strand after the + strand of the DNA.
1160
1161 Internal use only. Not meant to be instantiated."""
1162
1163 - def catalyse(self, dna, linear=True) :
1164 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1165 RE.catalyze(dna, linear=True) -> tuple of DNA.
1166
1167 return a tuple of dna as will be produced by using RE to restrict the
1168 dna.
1169
1170 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1171
1172 if linear is False, the sequence is considered to be circular and the
1173 output will be modified accordingly."""
1174 r = self.search(dna, linear)
1175 d = self.dna
1176 if not r : return d[1:],
1177 fragments = []
1178 length = len(r)-1
1179 if d.is_linear() :
1180
1181
1182
1183 fragments.append(d[1:r[0]])
1184 if length :
1185
1186
1187
1188 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1189
1190
1191
1192 fragments.append(d[r[-1]:])
1193 else :
1194
1195
1196
1197 fragments.append(d[r[-1]:]+d[1:r[0]])
1198 if not length:
1199
1200
1201
1202 return tuple(fragments)
1203
1204
1205
1206 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1207 return tuple(fragments)
1208 catalyze = catalyse = classmethod(catalyse)
1209
1211 """RE.is_blunt() -> bool.
1212
1213 True if the enzyme produces blunt end.
1214
1215 see also :
1216 RE.is_3overhang()
1217 RE.is_5overhang()
1218 RE.is_unknown()"""
1219 return False
1220 is_blunt = classmethod(is_blunt)
1221
1223 """RE.is_5overhang() -> bool.
1224
1225 True if the enzyme produces 5' overhang sticky end.
1226
1227 see also :
1228 RE.is_3overhang()
1229 RE.is_blunt()
1230 RE.is_unknown()"""
1231 return False
1232 is_5overhang = classmethod(is_5overhang)
1233
1235 """RE.is_3overhang() -> bool.
1236
1237 True if the enzyme produces 3' overhang sticky end.
1238
1239 see also :
1240 RE.is_5overhang()
1241 RE.is_blunt()
1242 RE.is_unknown()"""
1243 return True
1244 is_3overhang = classmethod(is_3overhang)
1245
1247 """RE.overhang() -> str. type of overhang of the enzyme.,
1248
1249 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1250 return "3' overhang"
1251 overhang = classmethod(overhang)
1252
1254 """RE.compatible_end() -> list.
1255
1256 list of all the enzymes that share compatible end with RE."""
1257 if not batch : batch = AllEnzymes
1258 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self]
1259 r.sort()
1260 return r
1261 compatible_end = classmethod(compatible_end)
1262
1263 - def _mod1(self, other) :
1264 """RE._mod1(other) -> bool.
1265
1266 for internal use only
1267
1268 test for the compatibility of restriction ending of RE and other."""
1269
1270
1271
1272 if issubclass(other, Ov3) : return self._mod2(other)
1273 else : return False
1274 _mod1 = classmethod(_mod1)
1275
1276
1278 """Implement the methods specific to the enzymes for which the overhang
1279 and the cut are not variable.
1280
1281 Typical example : EcoRI -> G^AATT_C
1282 The overhang will always be AATT
1283 Notes :
1284 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1285 There overhang is always the same : blunt!
1286
1287 Internal use only. Not meant to be instantiated."""
1288
1290 """RE._drop() -> list.
1291
1292 for internal use only.
1293
1294 drop the site that are situated outside the sequence in linear sequence.
1295 modify the index for site in circular sequences."""
1296
1297
1298
1299
1300
1301
1302
1303
1304 length = len(self.dna)
1305 drop = itertools.dropwhile
1306 take = itertools.takewhile
1307 if self.dna.is_linear() :
1308 self.results = [x for x in drop(lambda x:x<1, self.results)]
1309 self.results = [x for x in take(lambda x:x<length, self.results)]
1310 else :
1311 for index, location in enumerate(self.results) :
1312 if location < 1 :
1313 self.results[index] += length
1314 else :
1315 break
1316 for index, location in enumerate(self.results[::-1]) :
1317 if location > length :
1318 self.results[-(index+1)] -= length
1319 else :
1320 break
1321 return
1322 _drop = classmethod(_drop)
1323
1325 """RE.is_defined() -> bool.
1326
1327 True if the sequence recognised and cut is constant,
1328 i.e. the recognition site is not degenerated AND the enzyme cut inside
1329 the site.
1330
1331 see also :
1332 RE.is_ambiguous()
1333 RE.is_unknown()"""
1334 return True
1335 is_defined = classmethod(is_defined)
1336
1338 """RE.is_ambiguous() -> bool.
1339
1340 True if the sequence recognised and cut is ambiguous,
1341 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1342 the site.
1343
1344 see also :
1345 RE.is_defined()
1346 RE.is_unknown()"""
1347 return False
1348 is_ambiguous = classmethod(is_ambiguous)
1349
1351 """RE.is_unknown() -> bool.
1352
1353 True if the sequence is unknown,
1354 i.e. the recognition site has not been characterised yet.
1355
1356 see also :
1357 RE.is_defined()
1358 RE.is_ambiguous()"""
1359 return False
1360 is_unknown = classmethod(is_unknown)
1361
1363 """RE.elucidate() -> str
1364
1365 return a representation of the site with the cut on the (+) strand
1366 represented as '^' and the cut on the (-) strand as '_'.
1367 ie :
1368 >>> EcoRI.elucidate() # 5' overhang
1369 'G^AATT_C'
1370 >>> KpnI.elucidate() # 3' overhang
1371 'G_GTAC^C'
1372 >>> EcoRV.elucidate() # blunt
1373 'GAT^_ATC'
1374 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1375 '? GTATAC ?'
1376 >>>
1377 """
1378 f5 = self.fst5
1379 f3 = self.fst3
1380 site = self.site
1381 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1382 elif self.is_5overhang() :
1383 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N'
1384 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N'
1385 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1386 elif self.is_blunt() :
1387 re = site[:f5] + '^_' + site[f5:]
1388 else :
1389 if f5 == f3 == 0 : re = 'N_'+ site + '^N'
1390 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:]
1391 return re
1392 elucidate = classmethod(elucidate)
1393
1394 - def _mod2(self, other) :
1395 """RE._mod2(other) -> bool.
1396
1397 for internal use only
1398
1399 test for the compatibility of restriction ending of RE and other."""
1400
1401
1402
1403 if other.ovhgseq == self.ovhgseq :
1404 return True
1405 elif issubclass(other, Ambiguous) :
1406 return other._mod2(self)
1407 else:
1408 return False
1409 _mod2 = classmethod(_mod2)
1410
1411
1413 """Implement the methods specific to the enzymes for which the overhang
1414 is variable.
1415
1416 Typical example : BstXI -> CCAN_NNNN^NTGG
1417 The overhang can be any sequence of 4 bases.
1418 Notes :
1419 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1420 There overhang is always the same : blunt!
1421
1422 Internal use only. Not meant to be instantiated."""
1423
1425 """RE._drop() -> list.
1426
1427 for internal use only.
1428
1429 drop the site that are situated outside the sequence in linear sequence.
1430 modify the index for site in circular sequences."""
1431 length = len(self.dna)
1432 drop = itertools.dropwhile
1433 take = itertools.takewhile
1434 if self.dna.is_linear() :
1435 self.results = [x for x in drop(lambda x : x < 1, self.results)]
1436 self.results = [x for x in take(lambda x : x <length, self.results)]
1437 else :
1438 for index, location in enumerate(self.results) :
1439 if location < 1 :
1440 self.results[index] += length
1441 else :
1442 break
1443 for index, location in enumerate(self.results[::-1]) :
1444 if location > length :
1445 self.results[-(index+1)] -= length
1446 else :
1447 break
1448 return
1449 _drop = classmethod(_drop)
1450
1452 """RE.is_defined() -> bool.
1453
1454 True if the sequence recognised and cut is constant,
1455 i.e. the recognition site is not degenerated AND the enzyme cut inside
1456 the site.
1457
1458 see also :
1459 RE.is_ambiguous()
1460 RE.is_unknown()"""
1461 return False
1462 is_defined = classmethod(is_defined)
1463
1465 """RE.is_ambiguous() -> bool.
1466
1467 True if the sequence recognised and cut is ambiguous,
1468 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1469 the site.
1470
1471
1472 see also :
1473 RE.is_defined()
1474 RE.is_unknown()"""
1475 return True
1476 is_ambiguous = classmethod(is_ambiguous)
1477
1479 """RE.is_unknown() -> bool.
1480
1481 True if the sequence is unknown,
1482 i.e. the recognition site has not been characterised yet.
1483
1484 see also :
1485 RE.is_defined()
1486 RE.is_ambiguous()"""
1487 return False
1488 is_unknown = classmethod(is_unknown)
1489
1490 - def _mod2(self, other) :
1491 """RE._mod2(other) -> bool.
1492
1493 for internal use only
1494
1495 test for the compatibility of restriction ending of RE and other."""
1496
1497
1498
1499 if len(self.ovhgseq) != len(other.ovhgseq) :
1500 return False
1501 else :
1502 se = self.ovhgseq
1503 for base in se :
1504 if base in 'ATCG' :
1505 pass
1506 if base in 'N' :
1507 se = '.'.join(se.split('N'))
1508 if base in 'RYWMSKHDBV':
1509 expand = '['+ matching[base] + ']'
1510 se = expand.join(se.split(base))
1511 if re.match(se, other.ovhgseq) :
1512 return True
1513 else :
1514 return False
1515 _mod2 = classmethod(_mod2)
1516
1518 """RE.elucidate() -> str
1519
1520 return a representation of the site with the cut on the (+) strand
1521 represented as '^' and the cut on the (-) strand as '_'.
1522 ie :
1523 >>> EcoRI.elucidate() # 5' overhang
1524 'G^AATT_C'
1525 >>> KpnI.elucidate() # 3' overhang
1526 'G_GTAC^C'
1527 >>> EcoRV.elucidate() # blunt
1528 'GAT^_ATC'
1529 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1530 '? GTATAC ?'
1531 >>>
1532 """
1533 f5 = self.fst5
1534 f3 = self.fst3
1535 length = len(self)
1536 site = self.site
1537 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1538 elif self.is_5overhang() :
1539 if f3 == f5 == 0 :
1540 re = 'N^' + site +'_N'
1541 elif 0 <= f5 <= length and 0 <= f3+length <= length :
1542 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1543 elif 0 <= f5 <= length :
1544 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N'
1545 elif 0 <= f3+length <= length :
1546 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1547 elif f3+length < 0 :
1548 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site
1549 elif f5 > length :
1550 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N'
1551 else :
1552 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N'
1553 elif self.is_blunt() :
1554 if f5 < 0 :
1555 re = 'N^_' + abs(f5)*'N' + site
1556 elif f5 > length :
1557 re = site + (f5-length)*'N' + '^_N'
1558 else :
1559 raise ValueError('%s.easyrepr() : error f5=%i' \
1560 % (self.name,f5))
1561 else :
1562 if f3 == 0 :
1563 if f5 == 0 : re = 'N_' + site + '^N'
1564 else : re = site + '_' + (f5-length)*'N' + '^N'
1565 elif 0 < f3+length <= length and 0 <= f5 <= length :
1566 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1567 elif 0 < f3+length <= length :
1568 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N'
1569 elif 0 <= f5 <= length:
1570 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:]
1571 elif f3 > 0 :
1572 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N'
1573 elif f5 < 0 :
1574 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site
1575 else :
1576 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N'
1577 return re
1578 elucidate = classmethod(elucidate)
1579
1580
1582 """Implement the methods specific to the enzymes for which the overhang
1583 is not characterised.
1584
1585 Correspond to NoCut and Unknown.
1586
1587 Internal use only. Not meant to be instantiated."""
1588
1590 """RE._drop() -> list.
1591
1592 for internal use only.
1593
1594 drop the site that are situated outside the sequence in linear sequence.
1595 modify the index for site in circular sequences."""
1596 if self.dna.is_linear() :
1597 return
1598 else :
1599 length = len(self.dna)
1600 for index, location in enumerate(self.results) :
1601 if location < 1 :
1602 self.results[index] += length
1603 else :
1604 break
1605 for index, location in enumerate(self.results[:-1]) :
1606 if location > length :
1607 self.results[-(index+1)] -= length
1608 else :
1609 break
1610 return
1611 _drop = classmethod(_drop)
1612
1614 """RE.is_defined() -> bool.
1615
1616 True if the sequence recognised and cut is constant,
1617 i.e. the recognition site is not degenerated AND the enzyme cut inside
1618 the site.
1619
1620 see also :
1621 RE.is_ambiguous()
1622 RE.is_unknown()"""
1623 return False
1624 is_defined = classmethod(is_defined)
1625
1627 """RE.is_ambiguous() -> bool.
1628
1629 True if the sequence recognised and cut is ambiguous,
1630 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1631 the site.
1632
1633
1634 see also :
1635 RE.is_defined()
1636 RE.is_unknown()"""
1637 return False
1638 is_ambiguous = classmethod(is_ambiguous)
1639
1641 """RE.is_unknown() -> bool.
1642
1643 True if the sequence is unknown,
1644 i.e. the recognition site has not been characterised yet.
1645
1646 see also :
1647 RE.is_defined()
1648 RE.is_ambiguous()"""
1649 return True
1650 is_unknown = classmethod(is_unknown)
1651
1652 - def _mod2(self, other) :
1653 """RE._mod2(other) -> bool.
1654
1655 for internal use only
1656
1657 test for the compatibility of restriction ending of RE and other."""
1658
1659
1660
1661
1662
1663
1664 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \
1665 % (str(self), str(other), str(self)))
1666 _mod2 = classmethod(_mod2)
1667
1669 """RE.elucidate() -> str
1670
1671 return a representation of the site with the cut on the (+) strand
1672 represented as '^' and the cut on the (-) strand as '_'.
1673 ie :
1674 >>> EcoRI.elucidate() # 5' overhang
1675 'G^AATT_C'
1676 >>> KpnI.elucidate() # 3' overhang
1677 'G_GTAC^C'
1678 >>> EcoRV.elucidate() # blunt
1679 'GAT^_ATC'
1680 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1681 '? GTATAC ?'
1682 >>>
1683 """
1684 return '? %s ?' % self.site
1685 elucidate = classmethod(elucidate)
1686
1687
1689
1690
1691
1692
1693 """Implement the methods specific to the enzymes which are commercially
1694 available.
1695
1696 Internal use only. Not meant to be instantiated."""
1697
1699 """RE.suppliers() -> print the suppliers of RE."""
1700 supply = suppliers_dict.items()
1701 for k,v in supply :
1702 if k in self.suppl :
1703 print v[0]+','
1704 return
1705 suppliers = classmethod(suppliers)
1706
1708 """RE.supplier_list() -> list.
1709
1710 list of the supplier names for RE."""
1711 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1712 supplier_list = classmethod(supplier_list)
1713
1715 """RE.buffers(supplier) -> string.
1716
1717 not implemented yet."""
1718 return
1719 buffers = classmethod(buffers)
1720
1722 """RE.iscomm() -> bool.
1723
1724 True if RE has suppliers."""
1725 return True
1726 is_comm = classmethod(is_comm)
1727
1728
1730 """Implement the methods specific to the enzymes which are not commercially
1731 available.
1732
1733 Internal use only. Not meant to be instantiated."""
1734
1736 """RE.suppliers() -> print the suppliers of RE."""
1737 return None
1738 suppliers = staticmethod(suppliers)
1739
1741 """RE.supplier_list() -> list.
1742
1743 list of the supplier names for RE."""
1744 return []
1745 supplier_list = classmethod(supplier_list)
1746
1748 """RE.buffers(supplier) -> string.
1749
1750 not implemented yet."""
1751 raise TypeError("Enzyme not commercially available.")
1752 buffers = classmethod(buffers)
1753
1755 """RE.iscomm() -> bool.
1756
1757 True if RE has suppliers."""
1758 return False
1759 is_comm = classmethod(is_comm)
1760
1761
1762
1763
1764
1765
1766
1767
1768
1770
1771 - def __init__(self, first=[], suppliers=[]) :
1772 """RestrictionBatch([sequence]) -> new RestrictionBatch."""
1773 first = [self.format(x) for x in first]
1774 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
1775 set.__init__(self, first)
1776 self.mapping = dict.fromkeys(self)
1777 self.already_mapped = None
1778
1780 if len(self) < 5 :
1781 return '+'.join(self.elements())
1782 else :
1783 return '...'.join(('+'.join(self.elements()[:2]),\
1784 '+'.join(self.elements()[-2:])))
1785
1787 return 'RestrictionBatch(%s)' % self.elements()
1788
1790 try :
1791 other = self.format(other)
1792 except ValueError :
1793 return False
1794 return set.__contains__(self, other)
1795
1797 return self.search(other)
1798
1800 return self.search(other)
1801
1802 - def get(self, enzyme, add=False) :
1803 """B.get(enzyme[, add]) -> enzyme class.
1804
1805 if add is True and enzyme is not in B add enzyme to B.
1806 if add is False (which is the default) only return enzyme.
1807 if enzyme is not a RestrictionType or can not be evaluated to
1808 a RestrictionType, raise a ValueError."""
1809 e = self.format(enzyme)
1810 if e in self :
1811 return e
1812 elif add :
1813 self.add(e)
1814 return e
1815 else :
1816 raise ValueError('enzyme %s is not in RestrictionBatch' \
1817 % e.__name__)
1818
1820 """B.lambdasplit(func) -> RestrictionBatch .
1821
1822 the new batch will contains only the enzymes for which
1823 func return True."""
1824 d = [x for x in itertools.ifilter(func, self)]
1825 new = RestrictionBatch()
1826 new._data = dict(map(None, d, [True]*len(d)))
1827 return new
1828
1830 """B.add_supplier(letter) -> add a new set of enzyme to B.
1831
1832 letter represents the suppliers as defined in the dictionary
1833 RestrictionDictionary.suppliers
1834 return None.
1835 raise a KeyError if letter is not a supplier code."""
1836 supplier = suppliers_dict[letter]
1837 self.suppliers.append(letter)
1838 for x in supplier[1] :
1839 self.add_nocheck(eval(x))
1840 return
1841
1843 """B.current_suppliers() -> add a new set of enzyme to B.
1844
1845 return a sorted list of the suppliers which have been used to
1846 create the batch."""
1847 suppl_list = [suppliers_dict[x][0] for x in self.suppliers]
1848 suppl_list.sort()
1849 return suppl_list
1850
1852 """ b += other -> add other to b, check the type of other."""
1853 self.add(other)
1854 return self
1855
1857 """ b + other -> new RestrictionBatch."""
1858 new = self.__class__(self)
1859 new.add(other)
1860 return new
1861
1863 """B.remove(other) -> remove other from B if other is a RestrictionType.
1864
1865 Safe set.remove method. Verify that other is a RestrictionType or can be
1866 evaluated to a RestrictionType.
1867 raise a ValueError if other can not be evaluated to a RestrictionType.
1868 raise a KeyError if other is not in B."""
1869 return set.remove(self, self.format(other))
1870
1871 - def add(self, other) :
1872 """B.add(other) -> add other to B if other is a RestrictionType.
1873
1874 Safe set.add method. Verify that other is a RestrictionType or can be
1875 evaluated to a RestrictionType.
1876 raise a ValueError if other can not be evaluated to a RestrictionType.
1877 """
1878 return set.add(self, self.format(other))
1879
1881 """B.add_nocheck(other) -> add other to B. don't check type of other.
1882 """
1883 return set.add(self, other)
1884
1902
1903
1905 """B.is_restriction(y) -> bool.
1906
1907 True is y or eval(y) is a RestrictionType."""
1908 return isinstance(y, RestrictionType) or \
1909 isinstance(eval(str(y)), RestrictionType)
1910
1911 - def split(self, *classes, **bool) :
1912 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
1913
1914 it works but it is slow, so it has really an interest when splitting
1915 over multiple conditions."""
1916 def splittest(element) :
1917 for klass in classes :
1918 b = bool.get(klass.__name__, True)
1919 if issubclass(element, klass) :
1920 if b :
1921 continue
1922 else :
1923 return False
1924 elif b :
1925 return False
1926 else :
1927 continue
1928 return True
1929 d = [k for k in itertools.ifilter(splittest, self)]
1930 new = RestrictionBatch()
1931 new._data = dict(map(None, d, [True]*len(d)))
1932 return new
1933
1935 """B.elements() -> tuple.
1936
1937 give all the names of the enzymes in B sorted alphabetically."""
1938 l = [str(e) for e in self]
1939 l.sort()
1940 return l
1941
1943 """B.as_string() -> list.
1944
1945 return a list of the name of the elements of B."""
1946 return [str(e) for e in self]
1947
1949 """B.suppl_codes() -> dict
1950
1951 letter code for the suppliers"""
1952 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()])
1953 return supply
1954 suppl_codes = classmethod(suppl_codes)
1955
1957 "B.show_codes() -> letter codes for the suppliers"""
1958 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()]
1959 print '\n'.join(supply)
1960 return
1961 show_codes = classmethod(show_codes)
1962
1963 - def search(self, dna, linear=True) :
1964 """B.search(dna) -> dict."""
1965
1966
1967
1968
1969 if isinstance(dna, DNA) :
1970
1971
1972
1973
1974 if (str(dna), linear) == self.already_mapped :
1975 return self.mapping
1976 else :
1977 self.already_mapped = str(dna), linear
1978 fseq = FormattedSeq(dna, linear)
1979 self.mapping = dict([(x, x.search(fseq)) for x in self])
1980 return self.mapping
1981 elif isinstance(dna, FormattedSeq) :
1982 if (str(dna), dna.linear) == self.already_mapped :
1983 return self.mapping
1984 else :
1985 self.already_mapped = str(dna), dna.linear
1986 self.mapping = dict([(x, x.search(dna)) for x in self])
1987 return self.mapping
1988 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\
1989 %type(dna))
1990
1991
1992
1993
1994
1995
1996
1997 -class Analysis(RestrictionBatch, PrintFormat) :
1998
2001 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2002
2003 For most of the method of this class if a dictionary is given it will
2004 be used as the base to calculate the results.
2005 If no dictionary is given a new analysis using the Restriction Batch
2006 which has been given when the Analysis class has been instantiated."""
2007 RestrictionBatch.__init__(self, restrictionbatch)
2008 self.rb = restrictionbatch
2009 self.sequence = sequence
2010 self.linear = linear
2011 if self.sequence :
2012 self.search(self.sequence, self.linear)
2013
2015 return 'Analysis(%s,%s,%s)'%\
2016 (repr(self.rb),repr(self.sequence),self.linear)
2017
2019 """A._sub_set(other_set) -> dict.
2020
2021 Internal use only.
2022
2023 screen the results through wanted set.
2024 Keep only the results for which the enzymes is in wanted set.
2025 """
2026 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2027
2029 """A._boundaries(start, end) -> tuple.
2030
2031 Format the boundaries for use with the methods that limit the
2032 search to only part of the sequence given to analyse.
2033 """
2034 if not isinstance(start, int) :
2035 raise TypeError('expected int, got %s instead' % type(start))
2036 if not isinstance(end, int) :
2037 raise TypeError('expected int, got %s instead' % type(end))
2038 if start < 1 :
2039 start += len(self.sequence)
2040 if end < 1 :
2041 end += len(self.sequence)
2042 if start < end :
2043 pass
2044 else :
2045 start, end == end, start
2046 if start < 1 :
2047 start == 1
2048 if start < end :
2049 return start, end, self._test_normal
2050 else :
2051 return start, end, self._test_reverse
2052
2054 """A._test_normal(start, end, site) -> bool.
2055
2056 Internal use only
2057 Test if site is in between start and end.
2058 """
2059 return start <= site < end
2060
2062 """A._test_reverse(start, end, site) -> bool.
2063
2064 Internal use only
2065 Test if site is in between end and start (for circular sequences).
2066 """
2067 return start <= site <= len(self.sequence) or 1 <= site < end
2068
2069 - def print_that(self, dct=None, title='', s1='') :
2070 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2071
2072 If dct is not given the full dictionary is used.
2073 """
2074 if not dct :
2075 dct = self.mapping
2076 print
2077 return PrintFormat.print_that(self, dct, title, s1)
2078
2080 """A.change(**attribute_name) -> Change attribute of Analysis.
2081
2082 It is possible to change the width of the shell by setting
2083 self.ConsoleWidth to what you want.
2084 self.NameWidth refer to the maximal length of the enzyme name.
2085
2086 Changing one of these parameters here might not give the results
2087 you expect. In which case, you can settle back to a 80 columns shell
2088 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2089 you get it right."""
2090 for k,v in what.iteritems() :
2091 if k in ('NameWidth', 'ConsoleWidth') :
2092 setattr(self, k, v)
2093 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2094 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2095 elif k is 'sequence' :
2096 setattr(self, 'sequence', v)
2097 self.search(self.sequence, self.linear)
2098 elif k is 'rb' :
2099 self = Analysis.__init__(self, v, self.sequence, self.linear)
2100 elif k is 'linear' :
2101 setattr(self, 'linear', v)
2102 self.search(self.sequence, v)
2103 elif k in ('Indent', 'Maxsize') :
2104 setattr(self, k, v)
2105 elif k in ('Cmodulo', 'PrefWidth') :
2106 raise AttributeError( \
2107 'To change %s, change NameWidth and/or ConsoleWidth' \
2108 % name)
2109 else :
2110 raise AttributeError( \
2111 'Analysis has no attribute %s' % name)
2112 return
2113
2114 - def full(self, linear=True) :
2115 """A.full() -> dict.
2116
2117 Full Restriction Map of the sequence."""
2118 return self.mapping
2119
2120 - def blunt(self, dct = None) :
2121 """A.blunt([dct]) -> dict.
2122
2123 Only the enzymes which have a 3'overhang restriction site."""
2124 if not dct :
2125 dct = self.mapping
2126 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2127
2129 """A.overhang5([dct]) -> dict.
2130
2131 Only the enzymes which have a 5' overhang restriction site."""
2132 if not dct :
2133 dct = self.mapping
2134 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2135
2136
2138 """A.Overhang3([dct]) -> dict.
2139
2140 Only the enzymes which have a 3'overhang restriction site."""
2141 if not dct :
2142 dct = self.mapping
2143 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2144
2145
2147 """A.defined([dct]) -> dict.
2148
2149 Only the enzymes that have a defined restriction site in Rebase."""
2150 if not dct :
2151 dct = self.mapping
2152 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2153
2155 """A.with_sites([dct]) -> dict.
2156
2157 Enzymes which have at least one site in the sequence."""
2158 if not dct :
2159 dct = self.mapping
2160 return dict([(k,v) for k,v in dct.iteritems() if v])
2161
2163 """A.without_site([dct]) -> dict.
2164
2165 Enzymes which have no site in the sequence."""
2166 if not dct :
2167 dct = self.mapping
2168 return dict([(k,v) for k,v in dct.iteritems() if not v])
2169
2171 """A.With_N_Sites(N [, dct]) -> dict.
2172
2173 Enzymes which cut N times the sequence."""
2174 if not dct :
2175 dct = self.mapping
2176 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2177
2179 if not dct :
2180 dct = self.mapping
2181 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2182
2184 """A.with_name(list_of_names [, dct]) ->
2185
2186 Limit the search to the enzymes named in list_of_names."""
2187 for i, enzyme in enumerate(names) :
2188 if not enzyme in AllEnzymes :
2189 print "no datas for the enzyme:", str(name)
2190 del names[i]
2191 if not dct :
2192 return RestrictionBatch(names).search(self.sequence)
2193 return dict([(n, dct[n]) for n in names if n in dct])
2194
2196 """A.with_site_size(site_size [, dct]) ->
2197
2198 Limit the search to the enzymes whose site is of size <site_size>."""
2199 sites = [name for name in self if name.size == site_size]
2200 if not dct :
2201 return RestrictionBatch(sites).search(self.sequence)
2202 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2203
2205 """A.only_between(start, end[, dct]) -> dict.
2206
2207 Enzymes that cut the sequence only in between start and end."""
2208 start, end, test = self._boundaries(start, end)
2209 if not dct :
2210 dct = self.mapping
2211 d = dict(dct)
2212 for key, sites in dct.iteritems() :
2213 if not sites :
2214 del d[key]
2215 continue
2216 for site in sites:
2217 if test(start, end, site) :
2218 continue
2219 else :
2220 del d[key]
2221 break
2222 return d
2223
2224 - def between(self, start, end, dct=None) :
2225 """A.between(start, end [, dct]) -> dict.
2226
2227 Enzymes that cut the sequence at least in between start and end.
2228 They may cut outside as well."""
2229 start, end, test = self._boundaries(start, end)
2230 d = {}
2231 if not dct :
2232 dct = self.mapping
2233 for key, sites in dct.iteritems() :
2234 for site in sites :
2235 if test(start, end, site) :
2236 d[key] = sites
2237 break
2238 continue
2239 return d
2240
2242 """A.show_only_between(start, end [, dct]) -> dict.
2243
2244 Enzymes that cut the sequence outside of the region
2245 in between start and end but do not cut inside."""
2246 d = []
2247 if start <= end :
2248 d = [(k, [vv for vv in v if start<=vv<=end])
2249 for v in self.between(start, end, dct)]
2250 else :
2251 d = [(k, [vv for vv in v if start<=vv or vv <= end])
2252 for v in self.between(start, end, dct)]
2253 return dict(d)
2254
2256 """A.only_outside(start, end [, dct]) -> dict.
2257
2258 Enzymes that cut the sequence outside of the region
2259 in between start and end but do not cut inside."""
2260 start, end, test = self._boundaries(start, end)
2261 if not dct : dct = self.mapping
2262 d = dict(dct)
2263 for key, sites in dct.iteritems() :
2264 if not sites :
2265 del d[key]
2266 continue
2267 for site in sites :
2268 if test(start, end, site) :
2269 del d[key]
2270 break
2271 else :
2272 continue
2273 return d
2274
2275 - def outside(self, start, end, dct=None) :
2276 """A.outside((start, end [, dct]) -> dict.
2277
2278 Enzymes that cut outside the region in between start and end.
2279 No test is made to know if they cut or not inside this region."""
2280 start, end, test = self._boundaries(start, end)
2281 if not dct :
2282 dct = self.mapping
2283 d = {}
2284 for key, sites in dct.iteritems() :
2285 for site in sites :
2286 if test(start, end, site) :
2287 continue
2288 else :
2289 d[key] = sites
2290 break
2291 return d
2292
2293
2295 """A.do_not_cut(start, end [, dct]) -> dict.
2296
2297 Enzymes that do not cut the region in between start and end."""
2298 if not dct :
2299 dct = self.mapping
2300 d = self.without_site()
2301 d.update(self.only_outside(start, end, dct))
2302 return d
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326 CommOnly = RestrictionBatch()
2327 NonComm = RestrictionBatch()
2328 for TYPE, (bases, enzymes) in typedict.iteritems() :
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346 bases = tuple([eval(x) for x in bases])
2347
2348
2349
2350
2351 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2352 for k in enzymes :
2353
2354
2355
2356
2357
2358 newenz = T(k, bases, enzymedict[k])
2359
2360
2361
2362
2363
2364 if newenz.is_comm() : CommOnly.add_nocheck(newenz)
2365 else : NonComm.add_nocheck(newenz)
2366
2367
2368
2369 AllEnzymes = CommOnly | NonComm
2370
2371
2372
2373 names = [str(x) for x in AllEnzymes]
2374 locals().update(dict(map(None, names, AllEnzymes)))
2375 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names
2376 del k, x, enzymes, TYPE, bases, names
2377