Trees | Indices | Help |
---|
|
1 #!/usr/bin/env python 2 # 3 # Restriction Analysis Libraries. 4 # Copyright (C) 2004. Frederic Sohm. 5 # 6 # This code is part of the Biopython distribution and governed by its 7 # license. Please see the LICENSE file that should have been included 8 # as part of this package. 9 # 10 11 import re 12 import itertools 13 from Bio.Restriction import RanaConfig as RanaConf 14 from Bio.Restriction.DNAUtils import complement 15 16 """ 17 Usage : 18 19 PrintFormat allow to print the results from restriction analysis in 3 20 different format. 21 List, column or map. 22 23 the easiest way to use it is : 24 25 >>> from Rana.PrintFormat import PrintFormat 26 >>> from Rana.Restriction import AllEnzymes 27 >>> from Rana.fts import fts 28 >>> seq = fts(pBR322) 29 >>> dct = AllEnzymes.search(seq) 30 >>> new = PrintFormat() 31 >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n') 32 33 my pBR322 analysis 34 35 AasI : 2169, 2582. 36 AatII : 4289. 37 ... 38 More enzymes. 39 ... 40 ZraI : 4287. 41 ZrmI : 3847. 42 43 no site : 44 45 AarI AatI Acc65I AcsI AcvI AdeI AflII AgeI 46 ... 47 More enzymes. 48 ... 49 Vha464I XapI XbaI XcmI XhoI XmaCI XmaI XmaJI 50 Zsp2I 51 52 >>> 53 54 Some of the methods of PrintFormat are meant to be overriden by derived 55 class. 56 """ 5759 """PrintFormat allow the printing of results of restriction analysis.""" 60 61 ConsoleWidth = RanaConf.ConsoleWidth 62 NameWidth = RanaConf.NameWidth 63 MaxSize = RanaConf.MaxSize 64 Cmodulo = ConsoleWidth%NameWidth 65 PrefWidth = ConsoleWidth - Cmodulo 66 Indent = RanaConf.Indent 67 linesize = PrefWidth - NameWidth 68 7241474 """PF.print_as([what='list']) -> print the results as specified. 75 76 Valid format are : 77 'list' -> alphabetical order 78 'number' -> number of sites in the sequence 79 'map' -> a map representation of the sequence with the sites. 80 81 If you want more flexibility over-ride the virtual method make_format. 82 """ 83 if what == 'map' : 84 self.make_format = self._make_map 85 elif what == 'number' : 86 self.make_format = self._make_number 87 else : 88 self.make_format = self._make_list 89 90 return91 9294 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted. 95 96 dct is a dictionary as returned by a RestrictionBatch.search() 97 98 title is the title of the map. 99 It must be a formated string, i.e. you must include the line break. 100 101 s1 is the title separating the list of enzymes that have sites from 102 those without sites. 103 s1 must be a formatted string as well. 104 105 The format of print_that is a list.""" 106 if not dct : 107 dct = self.results 108 ls, nc = [], [] 109 for k, v in dct.iteritems() : 110 if v : 111 ls.append((k,v)) 112 else : 113 nc.append(k) 114 print self.make_format(ls, title, nc, s1) 115 return116118 """PF.make_format(cut, nc, title, s) -> string 119 120 Virtual method. 121 Here to be pointed to one of the _make_* methods. 122 You can as well create a new method and point make_format to it.""" 123 return self._make_list(cut,title, nc,s1)124 125 ###### _make_* methods to be used with the virtual method make_format 126128 """PF._make_number(ls,title, nc,s1) -> string. 129 130 return a string of form : 131 132 title. 133 134 enzyme1 : position1, position2. 135 enzyme2 : position1, position2, position3. 136 137 ls is a list of cutting enzymes. 138 title is the title. 139 nc is a list of non cutting enzymes. 140 s1 is the sentence before the non cutting enzymes.""" 141 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)142144 """PF._make_number(ls,title, nc,s1) -> string. 145 146 return a string of form : 147 148 title. 149 150 enzyme1, position 151 | 152 AAAAAAAAAAAAAAAAAAAAA... 153 ||||||||||||||||||||| 154 TTTTTTTTTTTTTTTTTTTTT... 155 156 ls is a list of cutting enzymes. 157 title is the title. 158 nc is a list of non cutting enzymes. 159 s1 is the sentence before the non cutting enzymes.""" 160 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)161163 """PF._make_number(ls,title, nc,s1) -> string. 164 165 title. 166 167 enzyme which cut 1 time : 168 169 enzyme1 : position1. 170 171 enzyme which cut 2 times : 172 173 enzyme2 : position1, position2. 174 ... 175 176 ls is a list of cutting enzymes. 177 title is the title. 178 nc is a list of non cutting enzymes. 179 s1 is the sentence before the non cutting enzymes.""" 180 return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)181183 """PF._make_nocut(ls,title, nc,s1) -> string. 184 185 return a formatted string of the non cutting enzymes. 186 187 ls is a list of cutting enzymes -> will not be used. 188 Here for compatibility with make_format. 189 190 title is the title. 191 nc is a list of non cutting enzymes. 192 s1 is the sentence before the non cutting enzymes.""" 193 return title + self._make_nocut_only(nc, s1)194196 """PF._make_nocut_only(nc, s1) -> string. 197 198 return a formatted string of the non cutting enzymes. 199 200 nc is a list of non cutting enzymes. 201 s1 is the sentence before the non cutting enzymes. 202 """ 203 if not nc : 204 return s1 205 nc.sort() 206 st = '' 207 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 208 Join = ''.join 209 for key in nc : 210 st = Join((st, str.ljust(str(key), self.NameWidth))) 211 if len(st) > self.linesize : 212 stringsite = Join((stringsite, st, '\n')) 213 st = '' 214 stringsite = Join((stringsite, st, '\n')) 215 return stringsite216218 """PF._make_list_only(ls, title) -> string. 219 220 return a string of form : 221 222 title. 223 224 enzyme1 : position1, position2. 225 enzyme2 : position1, position2, position3. 226 ... 227 228 ls is a list of results. 229 title is a string. 230 Non cutting enzymes are not included.""" 231 if not ls : 232 return title 233 return self.__next_section(ls, title)234236 """PF._make_number_only(ls, title) -> string. 237 238 return a string of form : 239 240 title. 241 242 enzyme which cut 1 time : 243 244 enzyme1 : position1. 245 246 enzyme which cut 2 times : 247 248 enzyme2 : position1, position2. 249 ... 250 251 252 ls is a list of results. 253 title is a string. 254 Non cutting enzymes are not included.""" 255 if not ls : 256 return title 257 ls.sort(lambda x,y : cmp(len(x[1]), len(y[1]))) 258 iterator = iter(ls) 259 cur_len = 1 260 new_sect = [] 261 for name, sites in iterator : 262 l = len(sites) 263 if l > cur_len : 264 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 265 title = self.__next_section(new_sect, title) 266 new_sect, cur_len = [(name, sites)], l 267 continue 268 new_sect.append((name,sites)) 269 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 270 return self.__next_section(new_sect, title)271273 """PF._make_map_only(ls, title) -> string. 274 275 return a string of form : 276 277 title. 278 279 enzyme1, position 280 | 281 AAAAAAAAAAAAAAAAAAAAA... 282 ||||||||||||||||||||| 283 TTTTTTTTTTTTTTTTTTTTT... 284 285 286 ls is a list of results. 287 title is a string. 288 Non cutting enzymes are not included. 289 """ 290 if not ls : 291 return title 292 resultKeys = [str(x) for x,y in ls] 293 resultKeys.sort() 294 map = title or '' 295 enzymemap = {} 296 for (enzyme, cut) in ls : 297 for c in cut : 298 if c in enzymemap : 299 enzymemap[c].append(str(enzyme)) 300 else : 301 enzymemap[c] = [str(enzyme)] 302 mapping = enzymemap.keys() 303 mapping.sort() 304 cutloc = {} 305 x, counter, length = 0, 0, len(self.sequence) 306 for x in xrange(60, length, 60) : 307 counter = x - 60 308 l=[] 309 for key in mapping : 310 if key <= x : 311 l.append(key) 312 else : 313 cutloc[counter] = l 314 mapping = mapping[mapping.index(key):] 315 break 316 cutloc[x] = l 317 cutloc[x] = mapping 318 sequence = self.sequence.tostring() 319 revsequence = complement(sequence) 320 a = '|' 321 base, counter = 0, 0 322 emptyline = ' ' * 60 323 Join = ''.join 324 for base in xrange(60, length, 60) : 325 counter = base - 60 326 line = emptyline 327 for key in cutloc[counter] : 328 s = '' 329 if key == base : 330 for n in enzymemap[key] : s = ' '.join((s,n)) 331 l = line[0:59] 332 lineo = Join((l, str(key), s, '\n')) 333 line2 = Join((l, a, '\n')) 334 linetot = Join((lineo, line2)) 335 map = Join((map, linetot)) 336 break 337 for n in enzymemap[key] : s = ' '.join((s,n)) 338 k = key%60 339 lineo = Join((line[0:(k-1)], str(key), s, '\n')) 340 line = Join((line[0:(k-1)], a, line[k:])) 341 line2 = Join((line[0:(k-1)], a, line[k:], '\n')) 342 linetot = Join((lineo,line2)) 343 map = Join((map,linetot)) 344 mapunit = '\n'.join((sequence[counter : base],a * 60, 345 revsequence[counter : base], 346 Join((str.ljust(str(counter+1), 15), ' '* 30, 347 str.rjust(str(base), 15),'\n\n')) 348 )) 349 map = Join((map, mapunit)) 350 line = ' '* 60 351 for key in cutloc[base] : 352 s = '' 353 if key == length: 354 for n in enzymemap[key] : 355 s = Join((s,' ',n)) 356 l = line[0:(length-1)] 357 lineo = Join((l,str(key),s,'\n')) 358 line2 = Join((l,a,'\n')) 359 linetot = Join((lineo, line2)) 360 map = Join((map, linetot)) 361 break 362 for n in enzymemap[key] : s = Join((s,' ',n)) 363 k = key%60 364 lineo = Join((line[0:(k-1)],str(key),s,'\n')) 365 line = Join((line[0:(k-1)],a,line[k:])) 366 line2 = Join((line[0:(k-1)],a,line[k:],'\n')) 367 linetot = Join((lineo,line2)) 368 map = Join((map,linetot)) 369 mapunit = '' 370 mapunit = Join((sequence[base : length], '\n')) 371 mapunit = Join((mapunit, a * (length-base), '\n')) 372 mapunit = Join((mapunit,revsequence[base:length], '\n')) 373 mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*( 374 length-base-30),str.rjust(str(length), 15), 375 '\n\n')))) 376 map = Join((map,mapunit)) 377 return map378 379 ###### private method to do lists : 380382 """FP.__next_section(ls, into) -> string. 383 384 ls is a list of tuple (string, [int, int]). 385 into is a string to which the formatted ls will be added. 386 387 Format ls as a string of lines : 388 The form is : 389 390 enzyme1 : position1. 391 enzyme2 : position2, position3. 392 393 then add the formatted ls to tot 394 return tot.""" 395 ls.sort() 396 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 397 linesize = self.linesize - self.MaxSize 398 pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize) 399 several, Join = '', ''.join 400 for name, sites in ls : 401 stringsite = '' 402 l = Join((', '.join([str(site) for site in sites]), '.')) 403 if len(l) > linesize : 404 # 405 # cut where appropriate and add the indentation 406 # 407 l = [x.group() for x in re.finditer(pat, l)] 408 stringsite = indentation.join(l) 409 else : 410 stringsite = l 411 into = Join((into, 412 str(name).ljust(self.NameWidth),' : ',stringsite,'\n')) 413 return into
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Tue Sep 22 19:55:29 2009 | http://epydoc.sourceforge.net |