Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """ Notes about the diverses class of the restriction enzyme implementation. 
  12   
  13          RestrictionType is the type of all restriction enzymes. 
  14      ---------------------------------------------------------------------------- 
  15          AbstractCut implements some methods that are common to all enzymes. 
  16      ---------------------------------------------------------------------------- 
  17          NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  18                                  produced by the enzyme. 
  19                                  they correspond to the 4th field of the rebase 
  20                                  record emboss_e.NNN. 
  21                  0->NoCut    : the enzyme is not characterised. 
  22                  2->OneCut   : the enzyme produce one double strand cut. 
  23                  4->TwoCuts  : two double strand cuts. 
  24      ---------------------------------------------------------------------------- 
  25          Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  26                                  the enzyme. 
  27                                  Not implemented yet. 
  28      ---------------------------------------------------------------------------- 
  29          Palindromic,            if the site is palindromic or not. 
  30          NotPalindromic          allow some optimisations of the code. 
  31                                  No need to check the reverse strand 
  32                                  with palindromic sites. 
  33      ----------------------------------------------------------------------------                                     
  34          Unknown, Blunt,         represent the overhang. 
  35          Ov5, Ov3                Unknown is here for symetry reasons and 
  36                                  correspond to enzymes that are not characterised 
  37                                  in rebase. 
  38      ---------------------------------------------------------------------------- 
  39          Defined, Ambiguous,     represent the sequence of the overhang. 
  40          NotDefined              
  41                                  NotDefined is for enzymes not characterised in 
  42                                  rebase. 
  43                                   
  44                                  Defined correspond to enzymes that display a 
  45                                  constant overhang whatever the sequence. 
  46                                  ex : EcoRI. G^AATTC -> overhang :AATT 
  47                                              CTTAA^G 
  48   
  49                                  Ambiguous : the overhang varies with the 
  50                                  sequence restricted. 
  51                                  Typically enzymes which cut outside their 
  52                                  restriction site or (but not always) 
  53                                  inside an ambiguous site. 
  54                                  ex : 
  55                                  AcuI CTGAAG(22/20)  -> overhang : NN 
  56                                  AasI GACNNN^NNNGTC  -> overhang : NN 
  57                                       CTGN^NNNNNCAG 
  58   
  59              note : these 3 classes refers to the overhang not the site. 
  60                 So the enzyme ApoI (RAATTY) is defined even if its restriction 
  61                 site is ambiguous. 
  62                                   
  63                      ApoI R^AATTY -> overhang : AATT -> Defined 
  64                           YTTAA^R 
  65                 Accordingly, blunt enzymes are always Defined even 
  66                 when they cut outside their restriction site. 
  67      ---------------------------------------------------------------------------- 
  68          Not_available,          as found in rebase file emboss_r.NNN files. 
  69          Commercially_available 
  70                                  allow the selection of the enzymes according to 
  71                                  their suppliers to reduce the quantity 
  72                                  of results. 
  73                                  Also will allow the implementation of buffer 
  74                                  compatibility tables. Not implemented yet. 
  75   
  76                                  the list of suppliers is extracted from 
  77                                  emboss_s.NNN 
  78      ---------------------------------------------------------------------------- 
  79          """ 
  80   
  81  import re 
  82  import itertools 
  83   
  84  #TODO - Remove this work around once we drop python 2.3 support 
  85  try: 
  86     set = set 
  87  except NameError: 
  88     from sets import Set as set 
  89   
  90  from Bio.Seq import Seq, MutableSeq 
  91  from Bio.Alphabet import IUPAC 
  92   
  93  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict,\ 
  94       typedict, suppliers as suppliers_dict 
  95  from Bio.Restriction.RanaConfig import * 
  96  from Bio.Restriction.PrintFormat import PrintFormat 
  97  from Bio.Restriction.DNAUtils import check_bases 
  98   
  99   
 100   
 101  matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN', 
 102              'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY', 
 103              'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY', 
 104              'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY', 
 105              'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY', 
 106              'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'} 
 107   
 108  DNA = Seq 
 109       
110 -class FormattedSeq(object) :
111 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 112 113 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 114 115 Roughly : 116 remove anything which is not IUPAC alphabet and then add a space 117 in front of the sequence to get a biological index instead of a 118 python index (i.e. index of the first base is 1 not 0). 119 120 Retains information about the shape of the molecule linear (default) 121 or circular. Restriction sites are search over the edges of circular 122 sequence.""" 123
124 - def __init__(self, seq, linear = True) :
125 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 126 127 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 128 if seq is a FormattedSeq, linear will have no effect on the 129 shape of the sequence.""" 130 if isinstance(seq, Seq) or isinstance(seq, MutableSeq) : 131 stringy = seq.tostring() 132 self.lower = stringy.islower() 133 self.data = check_bases(stringy) 134 self.linear = linear 135 self.klass = seq.__class__ 136 self.alphabet = seq.alphabet 137 elif isinstance(seq, FormattedSeq) : 138 self.lower = seq.lower 139 self.data = seq.data 140 self.linear = seq.linear 141 self.alphabet = seq.alphabet 142 self.klass = seq.klass 143 else : 144 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
145
146 - def __len__(self) :
147 return len(self.data) - 1
148
149 - def __repr__(self) :
150 return 'FormattedSeq(%s, linear=%s)' %(repr(self[1:]), repr(self.linear))
151
152 - def __eq__(self, other) :
153 if isinstance(other, FormattedSeq) : 154 if repr(self) == repr(other) : 155 return True 156 else : 157 return False 158 return False
159
160 - def circularise(self) :
161 """FS.circularise() -> circularise FS""" 162 self.linear = False 163 return
164
165 - def linearise(self) :
166 """FS.linearise() -> linearise FS""" 167 self.linear = True 168 return
169
170 - def to_linear(self) :
171 """FS.to_linear() -> new linear FS instance""" 172 new = self.__class__(self) 173 new.linear = True 174 return new
175
176 - def to_circular(self) :
177 """FS.to_circular() -> new circular FS instance""" 178 new = self.__class__(self) 179 new.linear = False 180 return new
181
182 - def is_linear(self) :
183 """FS.is_linear() -> bool. 184 185 True if the sequence will analysed as a linear sequence.""" 186 return self.linear
187
188 - def finditer(self, pattern, size) :
189 """FS.finditer(pattern, size) -> list. 190 191 return a list of pattern into the sequence. 192 the list is made of tuple (location, pattern.group). 193 the latter is used with non palindromic sites. 194 pattern is the regular expression pattern corresponding to the 195 enzyme restriction site. 196 size is the size of the restriction enzyme recognition-site size.""" 197 if self.is_linear() : 198 data = self.data 199 else : 200 data = self.data + self.data[1:size+1] 201 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
202
203 - def __getitem__(self, i) :
204 if self.lower : 205 return self.klass((self.data[i]).lower(), self.alphabet) 206 return self.klass(self.data[i], self.alphabet)
207 208
209 -class RestrictionType(type) :
210 """RestrictionType. Type from which derives all enzyme classes. 211 212 Implement the operator methods.""" 213
214 - def __init__(cls, name='', bases=(), dct={}) :
215 """RE(name, bases, dct) -> RestrictionType instance. 216 217 Not intended to be used in normal operation. The enzymes are 218 instantiated when importing the module. 219 220 see below.""" 221 super(RestrictionType, cls).__init__(cls, name, bases, dct) 222 cls.compsite = re.compile(cls.compsite)
223
224 - def __add__(cls, other) :
225 """RE.__add__(other) -> RestrictionBatch(). 226 227 if other is an enzyme returns a batch of the two enzymes. 228 if other is already a RestrictionBatch add enzyme to it.""" 229 if isinstance(other, RestrictionType) : 230 return RestrictionBatch([cls, other]) 231 elif isinstance(other, RestrictionBatch) : 232 return other.add_nocheck(cls) 233 else : 234 raise TypeError
235
236 - def __div__(cls, other) :
237 """RE.__div__(other) -> list. 238 239 RE/other 240 returns RE.search(other).""" 241 return cls.search(other)
242
243 - def __rdiv__(cls, other) :
244 """RE.__rdiv__(other) -> list. 245 246 other/RE 247 returns RE.search(other).""" 248 return cls.search(other)
249
250 - def __truediv__(cls, other) :
251 """RE.__truediv__(other) -> list. 252 253 RE/other 254 returns RE.search(other).""" 255 return cls.search(other)
256
257 - def __rtruediv__(cls, other) :
258 """RE.__rtruediv__(other) -> list. 259 260 other/RE 261 returns RE.search(other).""" 262 return cls.search(other)
263
264 - def __floordiv__(cls, other) :
265 """RE.__floordiv__(other) -> list. 266 267 RE//other 268 returns RE.catalyse(other).""" 269 return cls.catalyse(other)
270
271 - def __rfloordiv__(cls, other) :
272 """RE.__rfloordiv__(other) -> list. 273 274 other//RE 275 returns RE.catalyse(other).""" 276 return cls.catalyse(other)
277
278 - def __str__(cls) :
279 """RE.__str__() -> str. 280 281 return the name of the enzyme.""" 282 return cls.__name__
283
284 - def __repr__(cls) :
285 """RE.__repr__() -> str. 286 287 used with eval or exec will instantiate the enzyme.""" 288 return "%s" % cls.__name__
289
290 - def __len__(cls) :
291 """RE.__len__() -> int. 292 293 length of the recognition site.""" 294 return cls.size
295
296 - def __eq__(cls, other) :
297 """RE == other -> bool 298 299 True if RE and other are the same enzyme.""" 300 return other is cls
301
302 - def __ne__(cls, other) :
303 """RE != other -> bool. 304 isoschizomer strict, same recognition site, same restriction -> False 305 all the other-> True""" 306 if not isinstance(other, RestrictionType) : 307 return True 308 elif cls.charac == other.charac : 309 return False 310 else : 311 return True
312
313 - def __rshift__(cls, other) :
314 """RE >> other -> bool. 315 316 neoschizomer : same recognition site, different restriction. -> True 317 all the others : -> False""" 318 if not isinstance(other, RestrictionType) : 319 return False 320 elif cls.site == other.site and cls.charac != other.charac : 321 return True 322 else : 323 return False
324
325 - def __mod__(cls, other) :
326 """a % b -> bool. 327 328 Test compatibility of the overhang of a and b. 329 True if a and b have compatible overhang.""" 330 if not isinstance(other, RestrictionType) : 331 raise TypeError( \ 332 'expected RestrictionType, got %s instead' % type(other)) 333 return cls._mod1(other)
334
335 - def __ge__(cls, other) :
336 """a >= b -> bool. 337 338 a is greater or equal than b if the a site is longer than b site. 339 if their site have the same length sort by alphabetical order of their 340 names.""" 341 if not isinstance(other, RestrictionType) : 342 raise NotImplementedError 343 if len(cls) > len(other) : 344 return True 345 elif cls.size == len(other) and cls.__name__ >= other.__name__ : 346 return True 347 else : 348 return False
349
350 - def __gt__(cls, other) :
351 """a > b -> bool. 352 353 sorting order : 354 1. size of the recognition site. 355 2. if equal size, alphabetical order of the names.""" 356 if not isinstance(other, RestrictionType) : 357 raise NotImplementedError 358 if len(cls) > len(other) : 359 return True 360 elif cls.size == len(other) and cls.__name__ > other.__name__ : 361 return True 362 else : 363 return False
364
365 - def __le__(cls, other) :
366 """a <= b -> bool. 367 368 sorting order : 369 1. size of the recognition site. 370 2. if equal size, alphabetical order of the names.""" 371 if not isinstance(other, RestrictionType) : 372 raise NotImplementedError 373 elif len(cls) < len(other) : 374 return True 375 elif len(cls) == len(other) and cls.__name__ <= other.__name__ : 376 return True 377 else : 378 return False
379
380 - def __lt__(cls, other) :
381 """a < b -> bool. 382 383 sorting order : 384 1. size of the recognition site. 385 2. if equal size, alphabetical order of the names.""" 386 if not isinstance(other, RestrictionType) : 387 raise NotImplementedError 388 elif len(cls) < len(other) : 389 return True 390 elif len(cls) == len(other) and cls.__name__ < other.__name__ : 391 return True 392 else : 393 return False
394 395
396 -class AbstractCut(RestrictionType) :
397 """Implement the methods that are common to all restriction enzymes. 398 399 All the methods are classmethod. 400 401 For internal use only. Not meant to be instantiate.""" 402
403 - def search(cls, dna, linear=True) :
404 """RE.search(dna, linear=True) -> list. 405 406 return a list of all the site of RE in dna. Compensate for circular 407 sequences and so on. 408 409 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 410 411 if linear is False, the restriction sites than span over the boundaries 412 will be included. 413 414 The positions are the first base of the 3' fragment, 415 i.e. the first base after the position the enzyme will cut. """ 416 # 417 # Separating search from _search allow a (very limited) optimisation 418 # of the search when using a batch of restriction enzymes. 419 # in this case the DNA is tested once by the class which implements 420 # the batch instead of being tested by each enzyme single. 421 # see RestrictionBatch.search() for example. 422 # 423 if isinstance(dna, FormattedSeq) : 424 cls.dna = dna 425 return cls._search() 426 else : 427 cls.dna = FormattedSeq(dna, linear) 428 return cls._search()
429 search = classmethod(search) 430
431 - def all_suppliers(self) :
432 """RE.all_suppliers -> print all the suppliers of R""" 433 supply = [x[0] for x in suppliers_dict.itervalues()] 434 supply.sort() 435 print ",\n".join(supply) 436 return
437 all_suppliers = classmethod(all_suppliers) 438
439 - def is_equischizomer(self, other) :
440 """RE.is_equischizomers(other) -> bool. 441 442 True if other is an isoschizomer of RE. 443 False else. 444 445 equischizomer <=> same site, same position of restriction.""" 446 return not self != other
447 is_equischizomer = classmethod(is_equischizomer) 448
449 - def is_neoschizomer(self, other) :
450 """RE.is_neoschizomers(other) -> bool. 451 452 True if other is an isoschizomer of RE. 453 False else. 454 455 neoschizomer <=> same site, different position of restriction.""" 456 return self >> other
457 is_neoschizomer = classmethod(is_neoschizomer) 458
459 - def is_isoschizomer(self, other) :
460 """RE.is_isoschizomers(other) -> bool. 461 462 True if other is an isoschizomer of RE. 463 False else. 464 465 isoschizomer <=> same site.""" 466 return (not self != other) or self >> other
467 is_isoschizomer = classmethod(is_isoschizomer) 468
469 - def equischizomers(self, batch=None) :
470 """RE.equischizomers([batch]) -> list. 471 472 return a tuple of all the isoschizomers of RE. 473 if batch is supplied it is used instead of the default AllEnzymes. 474 475 equischizomer <=> same site, same position of restriction.""" 476 if not batch : batch = AllEnzymes 477 r = [x for x in batch if not self != x] 478 i = r.index(self) 479 del r[i] 480 r.sort() 481 return r
482 equischizomers = classmethod(equischizomers) 483
484 - def neoschizomers(self, batch=None) :
485 """RE.neoschizomers([batch]) -> list. 486 487 return a tuple of all the neoschizomers of RE. 488 if batch is supplied it is used instead of the default AllEnzymes. 489 490 neoschizomer <=> same site, different position of restriction.""" 491 if not batch : batch = AllEnzymes 492 r = [x for x in batch if self >> x] 493 r.sort() 494 return r
495 neoschizomers = classmethod(neoschizomers) 496
497 - def isoschizomers(self, batch=None) :
498 """RE.isoschizomers([batch]) -> list. 499 500 return a tuple of all the equischizomers and neoschizomers of RE. 501 if batch is supplied it is used instead of the default AllEnzymes.""" 502 if not batch : batch = AllEnzymes 503 r = [x for x in batch if (self >> x) or (not self != x)] 504 i = r.index(self) 505 del r[i] 506 r.sort() 507 return r
508 isoschizomers = classmethod(isoschizomers) 509
510 - def frequency(self) :
511 """RE.frequency() -> int. 512 513 frequency of the site.""" 514 return self.freq
515 frequency = classmethod(frequency)
516 517
518 -class NoCut(AbstractCut) :
519 """Implement the methods specific to the enzymes that do not cut. 520 521 These enzymes are generally enzymes that have been only partially 522 characterised and the way they cut the DNA is unknow or enzymes for 523 which the pattern of cut is to complex to be recorded in Rebase 524 (ncuts values of 0 in emboss_e.###). 525 526 When using search() with these enzymes the values returned are at the start of 527 the restriction site. 528 529 Their catalyse() method returns a TypeError. 530 531 Unknown and NotDefined are also part of the base classes of these enzymes. 532 533 Internal use only. Not meant to be instantiated.""" 534
535 - def cut_once(self) :
536 """RE.cut_once() -> bool. 537 538 True if the enzyme cut the sequence one time on each strand.""" 539 return False
540 cut_once = classmethod(cut_once) 541
542 - def cut_twice(self) :
543 """RE.cut_twice() -> bool. 544 545 True if the enzyme cut the sequence twice on each strand.""" 546 return False
547 cut_twice = classmethod(cut_twice) 548
549 - def _modify(self, location) :
550 """RE._modify(location) -> int. 551 552 for internal use only. 553 554 location is an integer corresponding to the location of the match for 555 the enzyme pattern in the sequence. 556 _modify returns the real place where the enzyme will cut. 557 558 example : 559 EcoRI pattern : GAATTC 560 EcoRI will cut after the G. 561 so in the sequence : 562 ______ 563 GAATACACGGAATTCGA 564 | 565 10 566 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 567 EcoRI cut after the G so : 568 EcoRI._modify(10) -> 11. 569 570 if the enzyme cut twice _modify will returns two integer corresponding 571 to each cutting site. 572 """ 573 yield location
574 _modify = classmethod(_modify) 575
576 - def _rev_modify(self, location) :
577 """RE._rev_modify(location) -> generator of int. 578 579 for internal use only. 580 581 as _modify for site situated on the antiparallel strand when the 582 enzyme is not palindromic 583 """ 584 yield location
585 _rev_modify = classmethod(_rev_modify) 586
587 - def characteristic(self) :
588 """RE.characteristic() -> tuple. 589 590 the tuple contains the attributes : 591 fst5 -> first 5' cut ((current strand) or None 592 fst3 -> first 3' cut (complementary strand) or None 593 scd5 -> second 5' cut (current strand) or None 594 scd5 -> second 3' cut (complementary strand) or None 595 site -> recognition site.""" 596 return None, None, None, None, self.site
597 characteristic = classmethod(characteristic)
598
599 -class OneCut(AbstractCut) :
600 """Implement the methods specific to the enzymes that cut the DNA only once 601 602 Correspond to ncuts values of 2 in emboss_e.### 603 604 Internal use only. Not meant to be instantiated.""" 605
606 - def cut_once(self) :
607 """RE.cut_once() -> bool. 608 609 True if the enzyme cut the sequence one time on each strand.""" 610 return True
611 cut_once = classmethod(cut_once) 612
613 - def cut_twice(self) :
614 """RE.cut_twice() -> bool. 615 616 True if the enzyme cut the sequence twice on each strand.""" 617 return False
618 cut_twice = classmethod(cut_twice) 619
620 - def _modify(self, location) :
621 """RE._modify(location) -> int. 622 623 for internal use only. 624 625 location is an integer corresponding to the location of the match for 626 the enzyme pattern in the sequence. 627 _modify returns the real place where the enzyme will cut. 628 629 example : 630 EcoRI pattern : GAATTC 631 EcoRI will cut after the G. 632 so in the sequence : 633 ______ 634 GAATACACGGAATTCGA 635 | 636 10 637 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 638 EcoRI cut after the G so : 639 EcoRI._modify(10) -> 11. 640 641 if the enzyme cut twice _modify will returns two integer corresponding 642 to each cutting site. 643 """ 644 yield location + self.fst5
645 _modify = classmethod(_modify) 646
647 - def _rev_modify(self, location) :
648 """RE._rev_modify(location) -> generator of int. 649 650 for internal use only. 651 652 as _modify for site situated on the antiparallel strand when the 653 enzyme is not palindromic 654 """ 655 yield location - self.fst3
656 _rev_modify = classmethod(_rev_modify) 657
658 - def characteristic(self) :
659 """RE.characteristic() -> tuple. 660 661 the tuple contains the attributes : 662 fst5 -> first 5' cut ((current strand) or None 663 fst3 -> first 3' cut (complementary strand) or None 664 scd5 -> second 5' cut (current strand) or None 665 scd5 -> second 3' cut (complementary strand) or None 666 site -> recognition site.""" 667 return self.fst5, self.fst3, None, None, self.site
668 characteristic = classmethod(characteristic)
669 670
671 -class TwoCuts(AbstractCut) :
672 """Implement the methods specific to the enzymes that cut the DNA twice 673 674 Correspond to ncuts values of 4 in emboss_e.### 675 676 Internal use only. Not meant to be instantiated.""" 677
678 - def cut_once(self) :
679 """RE.cut_once() -> bool. 680 681 True if the enzyme cut the sequence one time on each strand.""" 682 return False
683 cut_once = classmethod(cut_once) 684
685 - def cut_twice(self) :
686 """RE.cut_twice() -> bool. 687 688 True if the enzyme cut the sequence twice on each strand.""" 689 return True
690 cut_twice = classmethod(cut_twice) 691
692 - def _modify(self, location) :
693 """RE._modify(location) -> int. 694 695 for internal use only. 696 697 location is an integer corresponding to the location of the match for 698 the enzyme pattern in the sequence. 699 _modify returns the real place where the enzyme will cut. 700 701 example : 702 EcoRI pattern : GAATTC 703 EcoRI will cut after the G. 704 so in the sequence : 705 ______ 706 GAATACACGGAATTCGA 707 | 708 10 709 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 710 EcoRI cut after the G so : 711 EcoRI._modify(10) -> 11. 712 713 if the enzyme cut twice _modify will returns two integer corresponding 714 to each cutting site. 715 """ 716 yield location + self.fst5 717 yield location + self.scd5
718 _modify = classmethod(_modify) 719
720 - def _rev_modify(self, location) :
721 """RE._rev_modify(location) -> generator of int. 722 723 for internal use only. 724 725 as _modify for site situated on the antiparallel strand when the 726 enzyme is not palindromic 727 """ 728 yield location - self.fst3 729 yield location - self.scd3
730 _rev_modify = classmethod(_rev_modify) 731
732 - def characteristic(self) :
733 """RE.characteristic() -> tuple. 734 735 the tuple contains the attributes : 736 fst5 -> first 5' cut ((current strand) or None 737 fst3 -> first 3' cut (complementary strand) or None 738 scd5 -> second 5' cut (current strand) or None 739 scd5 -> second 3' cut (complementary strand) or None 740 site -> recognition site.""" 741 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
742 characteristic = classmethod(characteristic)
743 744
745 -class Meth_Dep(AbstractCut) :
746 """Implement the information about methylation. 747 748 Enzymes of this class possess a site which is methylable.""" 749
750 - def is_methylable(self) :
751 """RE.is_methylable() -> bool. 752 753 True if the recognition site is a methylable.""" 754 return True
755 is_methylable = classmethod(is_methylable)
756
757 -class Meth_Undep(AbstractCut) :
758 """Implement informations about methylation sensitibility. 759 760 Enzymes of this class are not sensible to methylation.""" 761
762 - def is_methylable(self) :
763 """RE.is_methylable() -> bool. 764 765 True if the recognition site is a methylable.""" 766 return False
767 is_methylable = classmethod(is_methylable)
768
769 -class Palindromic(AbstractCut) :
770 """Implement the methods specific to the enzymes which are palindromic 771 772 palindromic means : the recognition site and its reverse complement are 773 identical. 774 Remarks : an enzyme with a site CGNNCG is palindromic even if some 775 of the sites that it will recognise are not. 776 for example here : CGAACG 777 778 Internal use only. Not meant to be instantiated.""" 779
780 - def _search(self) :
781 """RE._search() -> list. 782 783 for internal use only. 784 785 implement the search method for palindromic and non palindromic enzyme. 786 """ 787 siteloc = self.dna.finditer(self.compsite,self.size) 788 self.results = [r for s,g in siteloc for r in self._modify(s)] 789 if self.results : self._drop() 790 return self.results
791 _search = classmethod(_search) 792
793 - def is_palindromic(self) :
794 """RE.is_palindromic() -> bool. 795 796 True if the recognition site is a palindrom.""" 797 return True
798 is_palindromic = classmethod(is_palindromic)
799 800
801 -class NonPalindromic(AbstractCut) :
802 """Implement the methods specific to the enzymes which are not palindromic 803 804 palindromic means : the recognition site and its reverse complement are 805 identical. 806 807 Internal use only. Not meant to be instantiated.""" 808
809 - def _search(self) :
810 """RE._search() -> list. 811 812 for internal use only. 813 814 implement the search method for palindromic and non palindromic enzyme. 815 """ 816 iterator = self.dna.finditer(self.compsite, self.size) 817 self.results = [] 818 modif = self._modify 819 revmodif = self._rev_modify 820 s = str(self) 821 self.on_minus = [] 822 for start, group in iterator : 823 if group(s) : 824 self.results += [r for r in modif(start)] 825 else : 826 self.on_minus += [r for r in revmodif(start)] 827 self.results += self.on_minus 828 if self.results : 829 self.results.sort() 830 self._drop() 831 return self.results
832 _search = classmethod(_search) 833
834 - def is_palindromic(self) :
835 """RE.is_palindromic() -> bool. 836 837 True if the recognition site is a palindrom.""" 838 return False
839 is_palindromic = classmethod(is_palindromic)
840
841 -class Unknown(AbstractCut) :
842 """Implement the methods specific to the enzymes for which the overhang 843 is unknown. 844 845 These enzymes are also NotDefined and NoCut. 846 847 Internal use only. Not meant to be instantiated.""" 848
849 - def catalyse(self, dna, linear=True) :
850 """RE.catalyse(dna, linear=True) -> tuple of DNA. 851 RE.catalyze(dna, linear=True) -> tuple of DNA. 852 853 return a tuple of dna as will be produced by using RE to restrict the 854 dna. 855 856 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 857 858 if linear is False, the sequence is considered to be circular and the 859 output will be modified accordingly.""" 860 raise NotImplementedError('%s restriction is unknown.' \ 861 % self.__name__)
862 catalyze = catalyse = classmethod(catalyse) 863
864 - def is_blunt(self) :
865 """RE.is_blunt() -> bool. 866 867 True if the enzyme produces blunt end. 868 869 see also : 870 RE.is_3overhang() 871 RE.is_5overhang() 872 RE.is_unknown()""" 873 return False
874 is_blunt = classmethod(is_blunt) 875
876 - def is_5overhang(self) :
877 """RE.is_5overhang() -> bool. 878 879 True if the enzyme produces 5' overhang sticky end. 880 881 see also : 882 RE.is_3overhang() 883 RE.is_blunt() 884 RE.is_unknown()""" 885 return False
886 is_5overhang = classmethod(is_5overhang) 887
888 - def is_3overhang(self) :
889 """RE.is_3overhang() -> bool. 890 891 True if the enzyme produces 3' overhang sticky end. 892 893 see also : 894 RE.is_5overhang() 895 RE.is_blunt() 896 RE.is_unknown()""" 897 return False
898 is_3overhang = classmethod(is_3overhang) 899
900 - def overhang(self) :
901 """RE.overhang() -> str. type of overhang of the enzyme., 902 903 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 904 return 'unknown'
905 overhang = classmethod(overhang) 906
907 - def compatible_end(self) :
908 """RE.compatible_end() -> list. 909 910 list of all the enzymes that share compatible end with RE.""" 911 return []
912 compatible_end = classmethod(compatible_end) 913
914 - def _mod1(self, other) :
915 """RE._mod1(other) -> bool. 916 917 for internal use only 918 919 test for the compatibility of restriction ending of RE and other.""" 920 return False
921 _mod1 = classmethod(_mod1)
922
923 -class Blunt(AbstractCut) :
924 """Implement the methods specific to the enzymes for which the overhang 925 is blunt. 926 927 The enzyme cuts the + strand and the - strand of the DNA at the same 928 place. 929 930 Internal use only. Not meant to be instantiated.""" 931
932 - def catalyse(self, dna, linear=True) :
933 """RE.catalyse(dna, linear=True) -> tuple of DNA. 934 RE.catalyze(dna, linear=True) -> tuple of DNA. 935 936 return a tuple of dna as will be produced by using RE to restrict the 937 dna. 938 939 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 940 941 if linear is False, the sequence is considered to be circular and the 942 output will be modified accordingly.""" 943 r = self.search(dna, linear) 944 d = self.dna 945 if not r : return d[1:], 946 fragments = [] 947 length = len(r)-1 948 if d.is_linear() : 949 # 950 # START of the sequence to FIRST site. 951 # 952 fragments.append(d[1:r[0]]) 953 if length : 954 # 955 # if more than one site add them. 956 # 957 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 958 # 959 # LAST site to END of the sequence. 960 # 961 fragments.append(d[r[-1]:]) 962 else : 963 # 964 # circular : bridge LAST site to FIRST site. 965 # 966 fragments.append(d[r[-1]:]+d[1:r[0]]) 967 if not length: 968 # 969 # one site we finish here. 970 # 971 return tuple(fragments) 972 # 973 # add the others. 974 # 975 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 976 return tuple(fragments)
977 catalyze = catalyse = classmethod(catalyse) 978
979 - def is_blunt(self) :
980 """RE.is_blunt() -> bool. 981 982 True if the enzyme produces blunt end. 983 984 see also : 985 RE.is_3overhang() 986 RE.is_5overhang() 987 RE.is_unknown()""" 988 return True
989 is_blunt = classmethod(is_blunt) 990
991 - def is_5overhang(self) :
992 """RE.is_5overhang() -> bool. 993 994 True if the enzyme produces 5' overhang sticky end. 995 996 see also : 997 RE.is_3overhang() 998 RE.is_blunt() 999 RE.is_unknown()""" 1000 return False
1001 is_5overhang = classmethod(is_5overhang) 1002
1003 - def is_3overhang(self) :
1004 """RE.is_3overhang() -> bool. 1005 1006 True if the enzyme produces 3' overhang sticky end. 1007 1008 see also : 1009 RE.is_5overhang() 1010 RE.is_blunt() 1011 RE.is_unknown()""" 1012 return False
1013 is_3overhang = classmethod(is_3overhang) 1014
1015 - def overhang(self) :
1016 """RE.overhang() -> str. type of overhang of the enzyme., 1017 1018 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1019 return 'blunt'
1020 overhang = classmethod(overhang) 1021
1022 - def compatible_end(self, batch=None) :
1023 """RE.compatible_end() -> list. 1024 1025 list of all the enzymes that share compatible end with RE.""" 1026 if not batch : batch = AllEnzymes 1027 r = [x for x in iter(AllEnzymes) if x.is_blunt()] 1028 r.sort() 1029 return r
1030 compatible_end = classmethod(compatible_end) 1031
1032 - def _mod1(other) :
1033 """RE._mod1(other) -> bool. 1034 1035 for internal use only 1036 1037 test for the compatibility of restriction ending of RE and other.""" 1038 if issubclass(other, Blunt) : return True 1039 else : return False
1040 _mod1 = staticmethod(_mod1)
1041
1042 -class Ov5(AbstractCut) :
1043 """Implement the methods specific to the enzymes for which the overhang 1044 is recessed in 3'. 1045 1046 The enzyme cuts the + strand after the - strand of the DNA. 1047 1048 Internal use only. Not meant to be instantiated.""" 1049
1050 - def catalyse(self, dna, linear=True) :
1051 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1052 RE.catalyze(dna, linear=True) -> tuple of DNA. 1053 1054 return a tuple of dna as will be produced by using RE to restrict the 1055 dna. 1056 1057 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1058 1059 if linear is False, the sequence is considered to be circular and the 1060 output will be modified accordingly.""" 1061 r = self.search(dna, linear) 1062 d = self.dna 1063 if not r : return d[1:], 1064 length = len(r)-1 1065 fragments = [] 1066 if d.is_linear() : 1067 # 1068 # START of the sequence to FIRST site. 1069 # 1070 fragments.append(d[1:r[0]]) 1071 if length : 1072 # 1073 # if more than one site add them. 1074 # 1075 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1076 # 1077 # LAST site to END of the sequence. 1078 # 1079 fragments.append(d[r[-1]:]) 1080 else : 1081 # 1082 # circular : bridge LAST site to FIRST site. 1083 # 1084 fragments.append(d[r[-1]:]+d[1:r[0]]) 1085 if not length: 1086 # 1087 # one site we finish here. 1088 # 1089 return tuple(fragments) 1090 # 1091 # add the others. 1092 # 1093 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1094 return tuple(fragments)
1095 catalyze = catalyse = classmethod(catalyse) 1096
1097 - def is_blunt(self) :
1098 """RE.is_blunt() -> bool. 1099 1100 True if the enzyme produces blunt end. 1101 1102 see also : 1103 RE.is_3overhang() 1104 RE.is_5overhang() 1105 RE.is_unknown()""" 1106 return False
1107 is_blunt = classmethod(is_blunt) 1108
1109 - def is_5overhang(self) :
1110 """RE.is_5overhang() -> bool. 1111 1112 True if the enzyme produces 5' overhang sticky end. 1113 1114 see also : 1115 RE.is_3overhang() 1116 RE.is_blunt() 1117 RE.is_unknown()""" 1118 return True
1119 is_5overhang = classmethod(is_5overhang) 1120
1121 - def is_3overhang(self) :
1122 """RE.is_3overhang() -> bool. 1123 1124 True if the enzyme produces 3' overhang sticky end. 1125 1126 see also : 1127 RE.is_5overhang() 1128 RE.is_blunt() 1129 RE.is_unknown()""" 1130 return False
1131 is_3overhang = classmethod(is_3overhang) 1132
1133 - def overhang(self) :
1134 """RE.overhang() -> str. type of overhang of the enzyme., 1135 1136 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1137 return "5' overhang"
1138 overhang = classmethod(overhang) 1139
1140 - def compatible_end(self, batch=None) :
1141 """RE.compatible_end() -> list. 1142 1143 list of all the enzymes that share compatible end with RE.""" 1144 if not batch : batch = AllEnzymes 1145 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self] 1146 r.sort() 1147 return r
1148 compatible_end = classmethod(compatible_end) 1149
1150 - def _mod1(self, other) :
1151 """RE._mod1(other) -> bool. 1152 1153 for internal use only 1154 1155 test for the compatibility of restriction ending of RE and other.""" 1156 if issubclass(other, Ov5) : return self._mod2(other) 1157 else : return False
1158 _mod1 = classmethod(_mod1)
1159 1160
1161 -class Ov3(AbstractCut) :
1162 """Implement the methods specific to the enzymes for which the overhang 1163 is recessed in 5'. 1164 1165 The enzyme cuts the - strand after the + strand of the DNA. 1166 1167 Internal use only. Not meant to be instantiated.""" 1168
1169 - def catalyse(self, dna, linear=True) :
1170 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1171 RE.catalyze(dna, linear=True) -> tuple of DNA. 1172 1173 return a tuple of dna as will be produced by using RE to restrict the 1174 dna. 1175 1176 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1177 1178 if linear is False, the sequence is considered to be circular and the 1179 output will be modified accordingly.""" 1180 r = self.search(dna, linear) 1181 d = self.dna 1182 if not r : return d[1:], 1183 fragments = [] 1184 length = len(r)-1 1185 if d.is_linear() : 1186 # 1187 # START of the sequence to FIRST site. 1188 # 1189 fragments.append(d[1:r[0]]) 1190 if length : 1191 # 1192 # if more than one site add them. 1193 # 1194 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1195 # 1196 # LAST site to END of the sequence. 1197 # 1198 fragments.append(d[r[-1]:]) 1199 else : 1200 # 1201 # circular : bridge LAST site to FIRST site. 1202 # 1203 fragments.append(d[r[-1]:]+d[1:r[0]]) 1204 if not length: 1205 # 1206 # one site we finish here. 1207 # 1208 return tuple(fragments) 1209 # 1210 # add the others. 1211 # 1212 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1213 return tuple(fragments)
1214 catalyze = catalyse = classmethod(catalyse) 1215
1216 - def is_blunt(self) :
1217 """RE.is_blunt() -> bool. 1218 1219 True if the enzyme produces blunt end. 1220 1221 see also : 1222 RE.is_3overhang() 1223 RE.is_5overhang() 1224 RE.is_unknown()""" 1225 return False
1226 is_blunt = classmethod(is_blunt) 1227
1228 - def is_5overhang(self) :
1229 """RE.is_5overhang() -> bool. 1230 1231 True if the enzyme produces 5' overhang sticky end. 1232 1233 see also : 1234 RE.is_3overhang() 1235 RE.is_blunt() 1236 RE.is_unknown()""" 1237 return False
1238 is_5overhang = classmethod(is_5overhang) 1239
1240 - def is_3overhang(self) :
1241 """RE.is_3overhang() -> bool. 1242 1243 True if the enzyme produces 3' overhang sticky end. 1244 1245 see also : 1246 RE.is_5overhang() 1247 RE.is_blunt() 1248 RE.is_unknown()""" 1249 return True
1250 is_3overhang = classmethod(is_3overhang) 1251
1252 - def overhang(self) :
1253 """RE.overhang() -> str. type of overhang of the enzyme., 1254 1255 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1256 return "3' overhang"
1257 overhang = classmethod(overhang) 1258
1259 - def compatible_end(self, batch=None) :
1260 """RE.compatible_end() -> list. 1261 1262 list of all the enzymes that share compatible end with RE.""" 1263 if not batch : batch = AllEnzymes 1264 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self] 1265 r.sort() 1266 return r
1267 compatible_end = classmethod(compatible_end) 1268
1269 - def _mod1(self, other) :
1270 """RE._mod1(other) -> bool. 1271 1272 for internal use only 1273 1274 test for the compatibility of restriction ending of RE and other.""" 1275 # 1276 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1277 # 1278 if issubclass(other, Ov3) : return self._mod2(other) 1279 else : return False
1280 _mod1 = classmethod(_mod1)
1281 1282
1283 -class Defined(AbstractCut) :
1284 """Implement the methods specific to the enzymes for which the overhang 1285 and the cut are not variable. 1286 1287 Typical example : EcoRI -> G^AATT_C 1288 The overhang will always be AATT 1289 Notes : 1290 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1291 There overhang is always the same : blunt! 1292 1293 Internal use only. Not meant to be instantiated.""" 1294
1295 - def _drop(self) :
1296 """RE._drop() -> list. 1297 1298 for internal use only. 1299 1300 drop the site that are situated outside the sequence in linear sequence. 1301 modify the index for site in circular sequences.""" 1302 # 1303 # remove or modify the results that are outside the sequence. 1304 # This is necessary since after finding the site we add the distance 1305 # from the site to the cut with the _modify and _rev_modify methods. 1306 # For linear we will remove these sites altogether. 1307 # For circular sequence, we modify the result rather than _drop it 1308 # since the site is in the sequence. 1309 # 1310 length = len(self.dna) 1311 drop = itertools.dropwhile 1312 take = itertools.takewhile 1313 if self.dna.is_linear() : 1314 self.results = [x for x in drop(lambda x:x<1, self.results)] 1315 self.results = [x for x in take(lambda x:x<length, self.results)] 1316 else : 1317 for index, location in enumerate(self.results) : 1318 if location < 1 : 1319 self.results[index] += length 1320 else : 1321 break 1322 for index, location in enumerate(self.results[::-1]) : 1323 if location > length : 1324 self.results[-(index+1)] -= length 1325 else : 1326 break 1327 return
1328 _drop = classmethod(_drop) 1329
1330 - def is_defined(self) :
1331 """RE.is_defined() -> bool. 1332 1333 True if the sequence recognised and cut is constant, 1334 i.e. the recognition site is not degenerated AND the enzyme cut inside 1335 the site. 1336 1337 see also : 1338 RE.is_ambiguous() 1339 RE.is_unknown()""" 1340 return True
1341 is_defined = classmethod(is_defined) 1342
1343 - def is_ambiguous(self) :
1344 """RE.is_ambiguous() -> bool. 1345 1346 True if the sequence recognised and cut is ambiguous, 1347 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1348 the site. 1349 1350 see also : 1351 RE.is_defined() 1352 RE.is_unknown()""" 1353 return False
1354 is_ambiguous = classmethod(is_ambiguous) 1355
1356 - def is_unknown(self) :
1357 """RE.is_unknown() -> bool. 1358 1359 True if the sequence is unknown, 1360 i.e. the recognition site has not been characterised yet. 1361 1362 see also : 1363 RE.is_defined() 1364 RE.is_ambiguous()""" 1365 return False
1366 is_unknown = classmethod(is_unknown) 1367
1368 - def elucidate(self) :
1369 """RE.elucidate() -> str 1370 1371 return a representation of the site with the cut on the (+) strand 1372 represented as '^' and the cut on the (-) strand as '_'. 1373 ie : 1374 >>> EcoRI.elucidate() # 5' overhang 1375 'G^AATT_C' 1376 >>> KpnI.elucidate() # 3' overhang 1377 'G_GTAC^C' 1378 >>> EcoRV.elucidate() # blunt 1379 'GAT^_ATC' 1380 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1381 '? GTATAC ?' 1382 >>> 1383 """ 1384 f5 = self.fst5 1385 f3 = self.fst3 1386 site = self.site 1387 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1388 elif self.is_5overhang() : 1389 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N' 1390 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N' 1391 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1392 elif self.is_blunt() : 1393 re = site[:f5] + '^_' + site[f5:] 1394 else : 1395 if f5 == f3 == 0 : re = 'N_'+ site + '^N' 1396 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:] 1397 return re
1398 elucidate = classmethod(elucidate) 1399
1400 - def _mod2(self, other) :
1401 """RE._mod2(other) -> bool. 1402 1403 for internal use only 1404 1405 test for the compatibility of restriction ending of RE and other.""" 1406 # 1407 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1408 # 1409 if other.ovhgseq == self.ovhgseq : 1410 return True 1411 elif issubclass(other, Ambiguous) : 1412 return other._mod2(self) 1413 else: 1414 return False
1415 _mod2 = classmethod(_mod2)
1416 1417
1418 -class Ambiguous(AbstractCut) :
1419 """Implement the methods specific to the enzymes for which the overhang 1420 is variable. 1421 1422 Typical example : BstXI -> CCAN_NNNN^NTGG 1423 The overhang can be any sequence of 4 bases. 1424 Notes : 1425 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1426 There overhang is always the same : blunt! 1427 1428 Internal use only. Not meant to be instantiated.""" 1429
1430 - def _drop(self) :
1431 """RE._drop() -> list. 1432 1433 for internal use only. 1434 1435 drop the site that are situated outside the sequence in linear sequence. 1436 modify the index for site in circular sequences.""" 1437 length = len(self.dna) 1438 drop = itertools.dropwhile 1439 take = itertools.takewhile 1440 if self.dna.is_linear() : 1441 self.results = [x for x in drop(lambda x : x < 1, self.results)] 1442 self.results = [x for x in take(lambda x : x <length, self.results)] 1443 else : 1444 for index, location in enumerate(self.results) : 1445 if location < 1 : 1446 self.results[index] += length 1447 else : 1448 break 1449 for index, location in enumerate(self.results[::-1]) : 1450 if location > length : 1451 self.results[-(index+1)] -= length 1452 else : 1453 break 1454 return
1455 _drop = classmethod(_drop) 1456
1457 - def is_defined(self) :
1458 """RE.is_defined() -> bool. 1459 1460 True if the sequence recognised and cut is constant, 1461 i.e. the recognition site is not degenerated AND the enzyme cut inside 1462 the site. 1463 1464 see also : 1465 RE.is_ambiguous() 1466 RE.is_unknown()""" 1467 return False
1468 is_defined = classmethod(is_defined) 1469
1470 - def is_ambiguous(self) :
1471 """RE.is_ambiguous() -> bool. 1472 1473 True if the sequence recognised and cut is ambiguous, 1474 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1475 the site. 1476 1477 1478 see also : 1479 RE.is_defined() 1480 RE.is_unknown()""" 1481 return True
1482 is_ambiguous = classmethod(is_ambiguous) 1483
1484 - def is_unknown(self) :
1485 """RE.is_unknown() -> bool. 1486 1487 True if the sequence is unknown, 1488 i.e. the recognition site has not been characterised yet. 1489 1490 see also : 1491 RE.is_defined() 1492 RE.is_ambiguous()""" 1493 return False
1494 is_unknown = classmethod(is_unknown) 1495
1496 - def _mod2(self, other) :
1497 """RE._mod2(other) -> bool. 1498 1499 for internal use only 1500 1501 test for the compatibility of restriction ending of RE and other.""" 1502 # 1503 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1504 # 1505 if len(self.ovhgseq) != len(other.ovhgseq) : 1506 return False 1507 else : 1508 se = self.ovhgseq 1509 for base in se : 1510 if base in 'ATCG' : 1511 pass 1512 if base in 'N' : 1513 se = '.'.join(se.split('N')) 1514 if base in 'RYWMSKHDBV': 1515 expand = '['+ matching[base] + ']' 1516 se = expand.join(se.split(base)) 1517 if re.match(se, other.ovhgseq) : 1518 return True 1519 else : 1520 return False
1521 _mod2 = classmethod(_mod2) 1522
1523 - def elucidate(self) :
1524 """RE.elucidate() -> str 1525 1526 return a representation of the site with the cut on the (+) strand 1527 represented as '^' and the cut on the (-) strand as '_'. 1528 ie : 1529 >>> EcoRI.elucidate() # 5' overhang 1530 'G^AATT_C' 1531 >>> KpnI.elucidate() # 3' overhang 1532 'G_GTAC^C' 1533 >>> EcoRV.elucidate() # blunt 1534 'GAT^_ATC' 1535 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1536 '? GTATAC ?' 1537 >>> 1538 """ 1539 f5 = self.fst5 1540 f3 = self.fst3 1541 length = len(self) 1542 site = self.site 1543 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1544 elif self.is_5overhang() : 1545 if f3 == f5 == 0 : 1546 re = 'N^' + site +'_N' 1547 elif 0 <= f5 <= length and 0 <= f3+length <= length : 1548 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1549 elif 0 <= f5 <= length : 1550 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N' 1551 elif 0 <= f3+length <= length : 1552 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1553 elif f3+length < 0 : 1554 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site 1555 elif f5 > length : 1556 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N' 1557 else : 1558 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N' 1559 elif self.is_blunt() : 1560 if f5 < 0 : 1561 re = 'N^_' + abs(f5)*'N' + site 1562 elif f5 > length : 1563 re = site + (f5-length)*'N' + '^_N' 1564 else : 1565 raise ValueError('%s.easyrepr() : error f5=%i' \ 1566 % (self.name,f5)) 1567 else : 1568 if f3 == 0 : 1569 if f5 == 0 : re = 'N_' + site + '^N' 1570 else : re = site + '_' + (f5-length)*'N' + '^N' 1571 elif 0 < f3+length <= length and 0 <= f5 <= length : 1572 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1573 elif 0 < f3+length <= length : 1574 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N' 1575 elif 0 <= f5 <= length: 1576 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:] 1577 elif f3 > 0 : 1578 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N' 1579 elif f5 < 0 : 1580 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site 1581 else : 1582 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N' 1583 return re
1584 elucidate = classmethod(elucidate)
1585 1586
1587 -class NotDefined(AbstractCut) :
1588 """Implement the methods specific to the enzymes for which the overhang 1589 is not characterised. 1590 1591 Correspond to NoCut and Unknown. 1592 1593 Internal use only. Not meant to be instantiated.""" 1594
1595 - def _drop(self) :
1596 """RE._drop() -> list. 1597 1598 for internal use only. 1599 1600 drop the site that are situated outside the sequence in linear sequence. 1601 modify the index for site in circular sequences.""" 1602 if self.dna.is_linear() : 1603 return 1604 else : 1605 length = len(self.dna) 1606 for index, location in enumerate(self.results) : 1607 if location < 1 : 1608 self.results[index] += length 1609 else : 1610 break 1611 for index, location in enumerate(self.results[:-1]) : 1612 if location > length : 1613 self.results[-(index+1)] -= length 1614 else : 1615 break 1616 return
1617 _drop = classmethod(_drop) 1618
1619 - def is_defined(self) :
1620 """RE.is_defined() -> bool. 1621 1622 True if the sequence recognised and cut is constant, 1623 i.e. the recognition site is not degenerated AND the enzyme cut inside 1624 the site. 1625 1626 see also : 1627 RE.is_ambiguous() 1628 RE.is_unknown()""" 1629 return False
1630 is_defined = classmethod(is_defined) 1631
1632 - def is_ambiguous(self) :
1633 """RE.is_ambiguous() -> bool. 1634 1635 True if the sequence recognised and cut is ambiguous, 1636 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1637 the site. 1638 1639 1640 see also : 1641 RE.is_defined() 1642 RE.is_unknown()""" 1643 return False
1644 is_ambiguous = classmethod(is_ambiguous) 1645
1646 - def is_unknown(self) :
1647 """RE.is_unknown() -> bool. 1648 1649 True if the sequence is unknown, 1650 i.e. the recognition site has not been characterised yet. 1651 1652 see also : 1653 RE.is_defined() 1654 RE.is_ambiguous()""" 1655 return True
1656 is_unknown = classmethod(is_unknown) 1657
1658 - def _mod2(self, other) :
1659 """RE._mod2(other) -> bool. 1660 1661 for internal use only 1662 1663 test for the compatibility of restriction ending of RE and other.""" 1664 # 1665 # Normally we should not arrive here. But well better safe than sorry. 1666 # the overhang is not defined we are compatible with nobody. 1667 # could raise an Error may be rather than return quietly. 1668 # 1669 #return False 1670 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \ 1671 % (str(self), str(other), str(self)))
1672 _mod2 = classmethod(_mod2) 1673
1674 - def elucidate(self) :
1675 """RE.elucidate() -> str 1676 1677 return a representation of the site with the cut on the (+) strand 1678 represented as '^' and the cut on the (-) strand as '_'. 1679 ie : 1680 >>> EcoRI.elucidate() # 5' overhang 1681 'G^AATT_C' 1682 >>> KpnI.elucidate() # 3' overhang 1683 'G_GTAC^C' 1684 >>> EcoRV.elucidate() # blunt 1685 'GAT^_ATC' 1686 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1687 '? GTATAC ?' 1688 >>> 1689 """ 1690 return '? %s ?' % self.site
1691 elucidate = classmethod(elucidate)
1692 1693
1694 -class Commercially_available(AbstractCut) :
1695 # 1696 # Recent addition to Rebase make this naming convention uncertain. 1697 # May be better to says enzymes which have a supplier. 1698 # 1699 """Implement the methods specific to the enzymes which are commercially 1700 available. 1701 1702 Internal use only. Not meant to be instantiated.""" 1703
1704 - def suppliers(self) :
1705 """RE.suppliers() -> print the suppliers of RE.""" 1706 supply = suppliers_dict.items() 1707 for k,v in supply : 1708 if k in self.suppl : 1709 print v[0]+',' 1710 return
1711 suppliers = classmethod(suppliers) 1712
1713 - def supplier_list(self) :
1714 """RE.supplier_list() -> list. 1715 1716 list of the supplier names for RE.""" 1717 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1718 supplier_list = classmethod(supplier_list) 1719
1720 - def buffers(self, supplier) :
1721 """RE.buffers(supplier) -> string. 1722 1723 not implemented yet.""" 1724 return
1725 buffers = classmethod(buffers) 1726
1727 - def is_comm(self) :
1728 """RE.iscomm() -> bool. 1729 1730 True if RE has suppliers.""" 1731 return True
1732 is_comm = classmethod(is_comm)
1733 1734
1735 -class Not_available(AbstractCut) :
1736 """Implement the methods specific to the enzymes which are not commercially 1737 available. 1738 1739 Internal use only. Not meant to be instantiated.""" 1740
1741 - def suppliers() :
1742 """RE.suppliers() -> print the suppliers of RE.""" 1743 return None
1744 suppliers = staticmethod(suppliers) 1745
1746 - def supplier_list(self) :
1747 """RE.supplier_list() -> list. 1748 1749 list of the supplier names for RE.""" 1750 return []
1751 supplier_list = classmethod(supplier_list) 1752
1753 - def buffers(self, supplier) :
1754 """RE.buffers(supplier) -> string. 1755 1756 not implemented yet.""" 1757 raise TypeError("Enzyme not commercially available.")
1758 buffers = classmethod(buffers) 1759
1760 - def is_comm(self) :
1761 """RE.iscomm() -> bool. 1762 1763 True if RE has suppliers.""" 1764 return False
1765 is_comm = classmethod(is_comm)
1766 1767 1768 ############################################################################### 1769 # # 1770 # Restriction Batch # 1771 # # 1772 ############################################################################### 1773 1774
1775 -class RestrictionBatch(set) :
1776
1777 - def __init__(self, first=[], suppliers=[]) :
1778 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1779 first = [self.format(x) for x in first] 1780 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1781 set.__init__(self, first) 1782 self.mapping = dict.fromkeys(self) 1783 self.already_mapped = DNA('')
1784
1785 - def __str__(self) :
1786 if len(self) < 5 : 1787 return '+'.join(self.elements()) 1788 else : 1789 return '...'.join(('+'.join(self.elements()[:2]),\ 1790 '+'.join(self.elements()[-2:])))
1791
1792 - def __repr__(self) :
1793 return 'RestrictionBatch(%s)' % self.elements()
1794
1795 - def __contains__(self, other) :
1796 try : 1797 other = self.format(other) 1798 except ValueError : # other is not a restriction enzyme 1799 return False 1800 return set.__contains__(self, other)
1801
1802 - def __div__(self, other) :
1803 return self.search(other)
1804
1805 - def __rdiv__(self, other) :
1806 return self.search(other)
1807
1808 - def get(self, enzyme, add=False) :
1809 """B.get(enzyme[, add]) -> enzyme class. 1810 1811 if add is True and enzyme is not in B add enzyme to B. 1812 if add is False (which is the default) only return enzyme. 1813 if enzyme is not a RestrictionType or can not be evaluated to 1814 a RestrictionType, raise a ValueError.""" 1815 e = self.format(enzyme) 1816 if e in self : 1817 return e 1818 elif add : 1819 self.add(e) 1820 return e 1821 else : 1822 raise ValueError('enzyme %s is not in RestrictionBatch' \ 1823 % e.__name__)
1824
1825 - def lambdasplit(self, func) :
1826 """B.lambdasplit(func) -> RestrictionBatch . 1827 1828 the new batch will contains only the enzymes for which 1829 func return True.""" 1830 d = [x for x in itertools.ifilter(func, self)] 1831 new = RestrictionBatch() 1832 new._data = dict(map(None, d, [True]*len(d))) 1833 return new
1834
1835 - def add_supplier(self, letter) :
1836 """B.add_supplier(letter) -> add a new set of enzyme to B. 1837 1838 letter represents the suppliers as defined in the dictionary 1839 RestrictionDictionary.suppliers 1840 return None. 1841 raise a KeyError if letter is not a supplier code.""" 1842 supplier = suppliers_dict[letter] 1843 self.suppliers.append(letter) 1844 for x in supplier[1] : 1845 self.add_nocheck(eval(x)) 1846 return
1847
1848 - def current_suppliers(self) :
1849 """B.current_suppliers() -> add a new set of enzyme to B. 1850 1851 return a sorted list of the suppliers which have been used to 1852 create the batch.""" 1853 suppl_list = [suppliers_dict[x][0] for x in self.suppliers] 1854 suppl_list.sort() 1855 return suppl_list
1856
1857 - def __iadd__(self, other) :
1858 """ b += other -> add other to b, check the type of other.""" 1859 self.add(other) 1860 return self
1861
1862 - def __add__(self, other) :
1863 """ b + other -> new RestrictionBatch.""" 1864 new = self.__class__(self) 1865 new.add(other) 1866 return new
1867
1868 - def remove(self, other) :
1869 """B.remove(other) -> remove other from B if other is a RestrictionType. 1870 1871 Safe set.remove method. Verify that other is a RestrictionType or can be 1872 evaluated to a RestrictionType. 1873 raise a ValueError if other can not be evaluated to a RestrictionType. 1874 raise a KeyError if other is not in B.""" 1875 return set.remove(self, self.format(other))
1876
1877 - def add(self, other) :
1878 """B.add(other) -> add other to B if other is a RestrictionType. 1879 1880 Safe set.add method. Verify that other is a RestrictionType or can be 1881 evaluated to a RestrictionType. 1882 raise a ValueError if other can not be evaluated to a RestrictionType. 1883 """ 1884 return set.add(self, self.format(other))
1885
1886 - def add_nocheck(self, other) :
1887 """B.add_nocheck(other) -> add other to B. don't check type of other. 1888 """ 1889 return set.add(self, other)
1890
1891 - def format(self, y) :
1892 """B.format(y) -> RestrictionType or raise ValueError. 1893 1894 if y is a RestrictionType return y 1895 if y can be evaluated to a RestrictionType return eval(y) 1896 raise a Value Error in all other case.""" 1897 try : 1898 if isinstance(y, RestrictionType) : 1899 return y 1900 elif isinstance(eval(str(y)), RestrictionType): 1901 return eval(y) 1902 1903 else : 1904 pass 1905 except (NameError, SyntaxError) : 1906 pass 1907 raise ValueError('%s is not a RestrictionType' % y.__class__)
1908 1909
1910 - def is_restriction(self, y) :
1911 """B.is_restriction(y) -> bool. 1912 1913 True is y or eval(y) is a RestrictionType.""" 1914 return isinstance(y, RestrictionType) or \ 1915 isinstance(eval(str(y)), RestrictionType)
1916
1917 - def split(self, *classes, **bool) :
1918 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 1919 1920 it works but it is slow, so it has really an interest when splitting 1921 over multiple conditions.""" 1922 def splittest(element) : 1923 for klass in classes : 1924 b = bool.get(klass.__name__, True) 1925 if issubclass(element, klass) : 1926 if b : 1927 continue 1928 else : 1929 return False 1930 elif b : 1931 return False 1932 else : 1933 continue 1934 return True
1935 d = [k for k in itertools.ifilter(splittest, self)] 1936 new = RestrictionBatch() 1937 new._data = dict(map(None, d, [True]*len(d))) 1938 return new
1939
1940 - def elements(self) :
1941 """B.elements() -> tuple. 1942 1943 give all the names of the enzymes in B sorted alphabetically.""" 1944 l = [str(e) for e in self] 1945 l.sort() 1946 return l
1947
1948 - def as_string(self) :
1949 """B.as_string() -> list. 1950 1951 return a list of the name of the elements of B.""" 1952 return [str(e) for e in self]
1953
1954 - def suppl_codes(self) :
1955 """B.suppl_codes() -> dict 1956 1957 letter code for the suppliers""" 1958 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()]) 1959 return supply
1960 suppl_codes = classmethod(suppl_codes) 1961
1962 - def show_codes(self) :
1963 "B.show_codes() -> letter codes for the suppliers""" 1964 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()] 1965 print '\n'.join(supply) 1966 return
1967 show_codes = classmethod(show_codes) 1968
1969 - def search(self, dna, linear=True) :
1970 """B.search(dna) -> dict.""" 1971 # 1972 # here we replace the search method of the individual enzymes 1973 # with one unique testing method. 1974 # 1975 if isinstance(dna, DNA) : 1976 if (dna, linear) == self.already_mapped : 1977 return self.mapping 1978 else : 1979 self.already_mapped = dna, linear 1980 fseq = FormattedSeq(dna, linear) 1981 self.mapping = dict([(x, x.search(fseq)) for x in self]) 1982 return self.mapping 1983 elif isinstance(dna, FormattedSeq) : 1984 if (dna, dna.linear) == self.already_mapped : 1985 return self.mapping 1986 else : 1987 self.already_mapped = dna, dna.linear 1988 self.mapping = dict([(x, x.search(dna)) for x in self]) 1989 return self.mapping 1990 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\ 1991 %type(dna))
1992 1993 ############################################################################### 1994 # # 1995 # Restriction Analysis # 1996 # # 1997 ############################################################################### 1998
1999 -class Analysis(RestrictionBatch, PrintFormat) :
2000
2001 - def __init__(self, restrictionbatch=RestrictionBatch(),sequence=DNA(''), 2002 linear=True) :
2003 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2004 2005 For most of the method of this class if a dictionary is given it will 2006 be used as the base to calculate the results. 2007 If no dictionary is given a new analysis using the Restriction Batch 2008 which has been given when the Analysis class has been instantiated.""" 2009 RestrictionBatch.__init__(self, restrictionbatch) 2010 self.rb = restrictionbatch 2011 self.sequence = sequence 2012 self.linear = linear 2013 if self.sequence : 2014 self.search(self.sequence, self.linear)
2015
2016 - def __repr__(self) :
2017 return 'Analysis(%s,%s,%s)'%\ 2018 (repr(self.rb),repr(self.sequence),self.linear)
2019
2020 - def _sub_set(self, wanted) :
2021 """A._sub_set(other_set) -> dict. 2022 2023 Internal use only. 2024 2025 screen the results through wanted set. 2026 Keep only the results for which the enzymes is in wanted set. 2027 """ 2028 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2029
2030 - def _boundaries(self, start, end) :
2031 """A._boundaries(start, end) -> tuple. 2032 2033 Format the boundaries for use with the methods that limit the 2034 search to only part of the sequence given to analyse. 2035 """ 2036 if not isinstance(start, int) : 2037 raise TypeError('expected int, got %s instead' % type(start)) 2038 if not isinstance(end, int) : 2039 raise TypeError('expected int, got %s instead' % type(end)) 2040 if start < 1 : 2041 start += len(self.sequence) 2042 if end < 1 : 2043 end += len(self.sequence) 2044 if start < end : 2045 pass 2046 else : 2047 start, end == end, start 2048 if start < 1 : 2049 start == 1 2050 if start < end : 2051 return start, end, self._test_normal 2052 else : 2053 return start, end, self._test_reverse
2054
2055 - def _test_normal(self, start, end, site) :
2056 """A._test_normal(start, end, site) -> bool. 2057 2058 Internal use only 2059 Test if site is in between start and end. 2060 """ 2061 return start <= site < end
2062
2063 - def _test_reverse(self, start, end, site) :
2064 """A._test_reverse(start, end, site) -> bool. 2065 2066 Internal use only 2067 Test if site is in between end and start (for circular sequences). 2068 """ 2069 return start <= site <= len(self.sequence) or 1 <= site < end
2070
2071 - def print_that(self, dct=None, title='', s1='') :
2072 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2073 2074 If dct is not given the full dictionary is used. 2075 """ 2076 if not dct : 2077 dct = self.mapping 2078 print 2079 return PrintFormat.print_that(self, dct, title, s1)
2080
2081 - def change(self, **what) :
2082 """A.change(**attribute_name) -> Change attribute of Analysis. 2083 2084 It is possible to change the width of the shell by setting 2085 self.ConsoleWidth to what you want. 2086 self.NameWidth refer to the maximal length of the enzyme name. 2087 2088 Changing one of these parameters here might not give the results 2089 you expect. In which case, you can settle back to a 80 columns shell 2090 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2091 you get it right.""" 2092 for k,v in what.iteritems() : 2093 if k in ('NameWidth', 'ConsoleWidth') : 2094 setattr(self, k, v) 2095 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2096 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2097 elif k is 'sequence' : 2098 setattr(self, 'sequence', v) 2099 self.search(self.sequence, self.linear) 2100 elif k is 'rb' : 2101 self = Analysis.__init__(self, v, self.sequence, self.linear) 2102 elif k is 'linear' : 2103 setattr(self, 'linear', v) 2104 self.search(self.sequence, v) 2105 elif k in ('Indent', 'Maxsize') : 2106 setattr(self, k, v) 2107 elif k in ('Cmodulo', 'PrefWidth') : 2108 raise AttributeError( \ 2109 'To change %s, change NameWidth and/or ConsoleWidth' \ 2110 % name) 2111 else : 2112 raise AttributeError( \ 2113 'Analysis has no attribute %s' % name) 2114 return
2115
2116 - def full(self, linear=True) :
2117 """A.full() -> dict. 2118 2119 Full Restriction Map of the sequence.""" 2120 return self.mapping
2121
2122 - def blunt(self, dct = None) :
2123 """A.blunt([dct]) -> dict. 2124 2125 Only the enzymes which have a 3'overhang restriction site.""" 2126 if not dct : 2127 dct = self.mapping 2128 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2129
2130 - def overhang5(self, dct=None) :
2131 """A.overhang5([dct]) -> dict. 2132 2133 Only the enzymes which have a 5' overhang restriction site.""" 2134 if not dct : 2135 dct = self.mapping 2136 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2137 2138
2139 - def overhang3(self, dct=None) :
2140 """A.Overhang3([dct]) -> dict. 2141 2142 Only the enzymes which have a 3'overhang restriction site.""" 2143 if not dct : 2144 dct = self.mapping 2145 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2146 2147
2148 - def defined(self, dct=None) :
2149 """A.defined([dct]) -> dict. 2150 2151 Only the enzymes that have a defined restriction site in Rebase.""" 2152 if not dct : 2153 dct = self.mapping 2154 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2155
2156 - def with_sites(self, dct=None) :
2157 """A.with_sites([dct]) -> dict. 2158 2159 Enzymes which have at least one site in the sequence.""" 2160 if not dct : 2161 dct = self.mapping 2162 return dict([(k,v) for k,v in dct.iteritems() if v])
2163
2164 - def without_site(self, dct=None) :
2165 """A.without_site([dct]) -> dict. 2166 2167 Enzymes which have no site in the sequence.""" 2168 if not dct : 2169 dct = self.mapping 2170 return dict([(k,v) for k,v in dct.iteritems() if not v])
2171
2172 - def with_N_sites(self, N, dct=None) :
2173 """A.With_N_Sites(N [, dct]) -> dict. 2174 2175 Enzymes which cut N times the sequence.""" 2176 if not dct : 2177 dct = self.mapping 2178 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2179
2180 - def with_number_list(self, list, dct= None) :
2181 if not dct : 2182 dct = self.mapping 2183 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2184
2185 - def with_name(self, names, dct=None) :
2186 """A.with_name(list_of_names [, dct]) -> 2187 2188 Limit the search to the enzymes named in list_of_names.""" 2189 for i, enzyme in enumerate(names) : 2190 if not enzyme in AllEnzymes : 2191 print "no datas for the enzyme:", str(name) 2192 del names[i] 2193 if not dct : 2194 return RestrictionBatch(names).search(self.sequence) 2195 return dict([(n, dct[n]) for n in names if n in dct])
2196
2197 - def with_site_size(self, site_size, dct=None) :
2198 """A.with_site_size(site_size [, dct]) -> 2199 2200 Limit the search to the enzymes whose site is of size <site_size>.""" 2201 sites = [name for name in self if name.size == site_size] 2202 if not dct : 2203 return RestrictionBatch(sites).search(self.sequence) 2204 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2205
2206 - def only_between(self, start, end, dct=None) :
2207 """A.only_between(start, end[, dct]) -> dict. 2208 2209 Enzymes that cut the sequence only in between start and end.""" 2210 start, end, test = self._boundaries(start, end) 2211 if not dct : 2212 dct = self.mapping 2213 d = dict(dct) 2214 for key, sites in dct.iteritems() : 2215 if not sites : 2216 del d[key] 2217 continue 2218 for site in sites: 2219 if test(start, end, site) : 2220 continue 2221 else : 2222 del d[key] 2223 break 2224 return d
2225
2226 - def between(self, start, end, dct=None) :
2227 """A.between(start, end [, dct]) -> dict. 2228 2229 Enzymes that cut the sequence at least in between start and end. 2230 They may cut outside as well.""" 2231 start, end, test = self._boundaries(start, end) 2232 d = {} 2233 if not dct : 2234 dct = self.mapping 2235 for key, sites in dct.iteritems() : 2236 for site in sites : 2237 if test(start, end, site) : 2238 d[key] = sites 2239 break 2240 continue 2241 return d
2242
2243 - def show_only_between(self, start, end, dct=None) :
2244 """A.show_only_between(start, end [, dct]) -> dict. 2245 2246 Enzymes that cut the sequence outside of the region 2247 in between start and end but do not cut inside.""" 2248 d = [] 2249 if start <= end : 2250 d = [(k, [vv for vv in v if start<=vv<=end]) 2251 for v in self.between(start, end, dct)] 2252 else : 2253 d = [(k, [vv for vv in v if start<=vv or vv <= end]) 2254 for v in self.between(start, end, dct)] 2255 return dict(d)
2256
2257 - def only_outside(self, start, end, dct = None) :
2258 """A.only_outside(start, end [, dct]) -> dict. 2259 2260 Enzymes that cut the sequence outside of the region 2261 in between start and end but do not cut inside.""" 2262 start, end, test = self._boundaries(start, end) 2263 if not dct : dct = self.mapping 2264 d = dict(dct) 2265 for key, sites in dct.iteritems() : 2266 if not sites : 2267 del d[key] 2268 continue 2269 for site in sites : 2270 if test(start, end, site) : 2271 del d[key] 2272 break 2273 else : 2274 continue 2275 return d
2276
2277 - def outside(self, start, end, dct=None) :
2278 """A.outside((start, end [, dct]) -> dict. 2279 2280 Enzymes that cut outside the region in between start and end. 2281 No test is made to know if they cut or not inside this region.""" 2282 start, end, test = self._boundaries(start, end) 2283 if not dct : 2284 dct = self.mapping 2285 d = {} 2286 for key, sites in dct.iteritems() : 2287 for site in sites : 2288 if test(start, end, site) : 2289 continue 2290 else : 2291 d[key] = sites 2292 break 2293 return d
2294 2295
2296 - def do_not_cut(self, start, end, dct = None) :
2297 """A.do_not_cut(start, end [, dct]) -> dict. 2298 2299 Enzymes that do not cut the region in between start and end.""" 2300 if not dct : 2301 dct = self.mapping 2302 d = self.without_site() 2303 d.update(self.only_outside(start, end, dct)) 2304 return d
2305 2306 # 2307 # The restriction enzyme classes are created dynamically when the module is 2308 # imported. Here is the magic which allow the creation of the 2309 # restriction-enzyme classes. 2310 # 2311 # The reason for the two dictionaries in Restriction_Dictionary 2312 # one for the types (which will be called pseudo-type as they really 2313 # correspond to the values that instances of RestrictionType can take) 2314 # and one for the enzymes is efficiency as the bases are evaluated 2315 # once per pseudo-type. 2316 # 2317 # However Restriction is still a very inefficient module at import. But 2318 # remember that around 660 classes (which is more or less the size of Rebase) 2319 # have to be created dynamically. However, this processing take place only 2320 # once. 2321 # This inefficiency is however largely compensated by the use of metaclass 2322 # which provide a very efficient layout for the class themselves mostly 2323 # alleviating the need of if/else loops in the class methods. 2324 # 2325 # It is essential to run Restriction with doc string optimisation (-OO switch) 2326 # as the doc string of 660 classes take a lot of processing. 2327 # 2328 CommOnly = RestrictionBatch() # commercial enzymes 2329 NonComm = RestrictionBatch() # not available commercially 2330 for TYPE, (bases, enzymes) in typedict.iteritems() : 2331 # 2332 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2333 # The names are not important and are only present to differentiate 2334 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2335 # These names will not be used after and the pseudo-types are not 2336 # kept in the locals() dictionary. It is therefore impossible to 2337 # import them. 2338 # Now, if you have look at the dictionary, you will see that not all the 2339 # types are present as those without corresponding enzymes have been 2340 # removed by Dictionary_Builder(). 2341 # 2342 # The values are tuples which contain 2343 # as first element a tuple of bases (as string) and 2344 # as second element the names of the enzymes. 2345 # 2346 # First eval the bases. 2347 # 2348 bases = tuple([eval(x) for x in bases]) 2349 # 2350 # now create the particular value of RestrictionType for the classes 2351 # in enzymes. 2352 # 2353 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2354 for k in enzymes : 2355 # 2356 # Now, we go through all the enzymes and assign them their type. 2357 # enzymedict[k] contains the values of the attributes for this 2358 # particular class (self.site, self.ovhg,....). 2359 # 2360 newenz = T(k, bases, enzymedict[k]) 2361 # 2362 # we add the enzymes to the corresponding batch. 2363 # 2364 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2365 # 2366 if newenz.is_comm() : CommOnly.add_nocheck(newenz) 2367 else : NonComm.add_nocheck(newenz) 2368 # 2369 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2370 # 2371 AllEnzymes = CommOnly | NonComm 2372 # 2373 # Now, place the enzymes in locals so they can be imported. 2374 # 2375 names = [str(x) for x in AllEnzymes] 2376 locals().update(dict(map(None, names, AllEnzymes))) 2377 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names 2378 del k, x, enzymes, TYPE, bases, names 2379