Package Bio :: Package AlignIO :: Module EmbossIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.EmbossIO

  1  # Copyright 2008-2009 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """ 
  7  Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools. 
  8   
  9  You are expected to use this module via the Bio.AlignIO functions (or the 
 10  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 11   
 12  This module contains a parser for the EMBOSS pairs/simple file format, for 
 13  example from the alignret, water and needle tools. 
 14  """ 
 15   
 16  from Bio.Align.Generic import Alignment 
 17  from Interfaces import AlignmentIterator, SequentialAlignmentWriter 
 18   
19 -class EmbossWriter(SequentialAlignmentWriter) :
20 """Emboss alignment writer (WORK IN PROGRESS). 21 22 Writes a simplfied version of the EMBOSS pairs/simple file format. 23 A lot of the information their tools record in their headers is not 24 available and is ommitted. 25 """ 26
27 - def write_header(self) :
28 handle = self.handle 29 handle.write("########################################\n") 30 handle.write("# Program: Biopython\n") 31 try : 32 handle.write("# Report_file: %s\n" % handle.name) 33 except AttributeError : 34 pass 35 handle.write("########################################\n")
36 41
42 - def write_alignment(self, alignment) :
43 """Use this to write (another) single alignment to an open file.""" 44 45 handle = self.handle 46 records = alignment.get_all_seqs() 47 48 handle.write("#=======================================\n") 49 handle.write("#\n") 50 handle.write("# Aligned_sequences: %i\n" % len(records)) 51 for i, record in enumerate(records) : 52 handle.write("# %i: %s\n" % (i+1, record.id)) 53 handle.write("#\n") 54 handle.write("# Length: %i\n" % alignment.get_alignment_length()) 55 handle.write("#\n") 56 handle.write("#=======================================\n") 57 handle.write("\n") 58 #... 59 assert False
60
61 -class EmbossIterator(AlignmentIterator) :
62 """Emboss alignment iterator. 63 64 For reading the (pairwise) alignments from EMBOSS tools in what they 65 call the "pairs" and "simple" formats. 66 """ 67
68 - def next(self) :
69 70 handle = self.handle 71 72 try : 73 #Header we saved from when we were parsing 74 #the previous alignment. 75 line = self._header 76 del self._header 77 except AttributeError: 78 line = handle.readline() 79 if not line: 80 return None 81 82 while line.rstrip() != "#=======================================" : 83 line = handle.readline() 84 if not line : 85 return None 86 87 length_of_seqs = None 88 number_of_seqs = None 89 ids = [] 90 seqs = [] 91 92 93 while line[0] == "#" : 94 #Read in the rest of this alignment header, 95 #try and discover the number of records expected 96 #and their length 97 parts = line[1:].split(":",1) 98 key = parts[0].lower().strip() 99 if key == "aligned_sequences" : 100 number_of_seqs = int(parts[1].strip()) 101 assert len(ids) == 0 102 # Should now expect the record identifiers... 103 for i in range(number_of_seqs) : 104 line = handle.readline() 105 parts = line[1:].strip().split(":",1) 106 assert i+1 == int(parts[0].strip()) 107 ids.append(parts[1].strip()) 108 assert len(ids) == number_of_seqs 109 if key == "length" : 110 length_of_seqs = int(parts[1].strip()) 111 112 #And read in another line... 113 line = handle.readline() 114 115 if number_of_seqs is None : 116 raise ValueError("Number of sequences missing!") 117 if length_of_seqs is None : 118 raise ValueError("Length of sequences missing!") 119 120 if self.records_per_alignment is not None \ 121 and self.records_per_alignment != number_of_seqs : 122 raise ValueError("Found %i records in this alignment, told to expect %i" \ 123 % (number_of_seqs, self.records_per_alignment)) 124 125 seqs = ["" for id in ids] 126 seq_starts = [] 127 index = 0 128 129 #Parse the seqs 130 while line : 131 if len(line) > 21 : 132 id_start = line[:21].strip().split(None, 1) 133 seq_end = line[21:].strip().split(None, 1) 134 if len(id_start) == 2 and len(seq_end) == 2: 135 #identifier, seq start position, seq, seq end position 136 #(an aligned seq is broken up into multiple lines) 137 id, start = id_start 138 seq, end = seq_end 139 if start==end : 140 #Special case, 141 assert len(seq.replace("-",""))==0 142 start = int(start) 143 end = int(end) 144 else : 145 start = int(start)-1 #python counting 146 end = int(end) 147 148 #The identifier is truncated... 149 assert 0 <= index and index < number_of_seqs, \ 150 "Expected index %i in range [0,%i)" \ 151 % (index, number_of_seqs) 152 assert id==ids[index] or id == ids[index][:len(id)] 153 154 if len(seq_starts) == index : 155 #Record the start 156 seq_starts.append(start) 157 158 #Check the start... 159 assert start - seq_starts[index] == len(seqs[index].replace("-","")), \ 160 "Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \ 161 % (len(seqs[index].replace("-","")), index, id, seqs[index], 162 start, line) 163 164 seqs[index] += seq 165 166 #Check the end ... 167 assert end == seq_starts[index] + len(seqs[index].replace("-","")), \ 168 "Found %i chars so far for %s, file says end %i:\n%s" \ 169 % (len(seqs[index]), id, end, repr(seqs[index])) 170 171 index += 1 172 if index >= number_of_seqs : 173 index = 0 174 else : 175 #just a start value, this is just alignment annotation (?) 176 #print "Skipping: " + line.rstrip() 177 pass 178 elif line.strip() == "" : 179 #Just a spacer? 180 pass 181 else : 182 print line 183 assert False 184 185 line = handle.readline() 186 if line.rstrip() == "#---------------------------------------" \ 187 or line.rstrip() == "#=======================================" : 188 #End of alignment 189 self._header = line 190 break 191 192 assert index == 0 193 194 if self.records_per_alignment is not None \ 195 and self.records_per_alignment != len(ids) : 196 raise ValueError("Found %i records in this alignment, told to expect %i" \ 197 % (len(ids), self.records_per_alignment)) 198 199 alignment = Alignment(self.alphabet) 200 for id, seq in zip(ids, seqs) : 201 if len(seq) != length_of_seqs : 202 #EMBOSS 2.9.0 is known to use spaces instead of minus signs 203 #for leading gaps, and thus fails to parse. This old version 204 #is still used as of Dec 2008 behind the EBI SOAP webservice: 205 #http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl 206 raise ValueError("Error parsing alignment - sequences of " 207 "different length? You could be using an " 208 "old version of EMBOSS.") 209 alignment.add_sequence(id, seq) 210 return alignment
211 212 if __name__ == "__main__" : 213 print "Running a quick self-test" 214 215 #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 216 simple_example = \ 217 """######################################## 218 # Program: alignret 219 # Rundate: Wed Jan 16 17:16:13 2002 220 # Report_file: stdout 221 ######################################## 222 #======================================= 223 # 224 # Aligned_sequences: 4 225 # 1: IXI_234 226 # 2: IXI_235 227 # 3: IXI_236 228 # 4: IXI_237 229 # Matrix: EBLOSUM62 230 # Gap_penalty: 10.0 231 # Extend_penalty: 0.5 232 # 233 # Length: 131 234 # Identity: 95/131 (72.5%) 235 # Similarity: 127/131 (96.9%) 236 # Gaps: 25/131 (19.1%) 237 # Score: 100.0 238 # 239 # 240 #======================================= 241 242 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 243 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 244 IXI_236 1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT 48 245 IXI_237 1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT 45 246 |||||:|||||||||::::::: |||||:||||:::::|||||:||||| 247 248 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 249 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 250 IXI_236 49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G 96 251 IXI_237 46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G 93 252 ||:||||||||||||||||||||:::::::::::||||||||||||| | 253 254 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 255 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 256 IXI_236 97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE 127 257 IXI_237 94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE 124 258 |||:||||:|||||:|||||||::||||||| 259 260 261 #--------------------------------------- 262 #--------------------------------------- 263 264 """ 265 266 #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair 267 pair_example = \ 268 """######################################## 269 # Program: water 270 # Rundate: Wed Jan 16 17:23:19 2002 271 # Report_file: stdout 272 ######################################## 273 #======================================= 274 # 275 # Aligned_sequences: 2 276 # 1: IXI_234 277 # 2: IXI_235 278 # Matrix: EBLOSUM62 279 # Gap_penalty: 10.0 280 # Extend_penalty: 0.5 281 # 282 # Length: 131 283 # Identity: 112/131 (85.5%) 284 # Similarity: 112/131 (85.5%) 285 # Gaps: 19/131 (14.5%) 286 # Score: 591.5 287 # 288 # 289 #======================================= 290 291 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 292 ||||||||||||||| |||||||||||||||||||||||||| 293 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 294 295 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 296 |||||||||||||||||||||||| |||||||||||||||| 297 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 298 299 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 300 ||||||||||||||||||||||||||||||| 301 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 302 303 304 #--------------------------------------- 305 #--------------------------------------- 306 307 308 """ 309 310 pair_example2 = \ 311 """######################################## 312 # Program: needle 313 # Rundate: Sun 27 Apr 2007 17:20:35 314 # Commandline: needle 315 # [-asequence] Spo0F.faa 316 # [-bsequence] paired_r.faa 317 # -sformat2 pearson 318 # Align_format: srspair 319 # Report_file: ref_rec .needle 320 ######################################## 321 322 #======================================= 323 # 324 # Aligned_sequences: 2 325 # 1: ref_rec 326 # 2: gi|94968718|receiver 327 # Matrix: EBLOSUM62 328 # Gap_penalty: 10.0 329 # Extend_penalty: 0.5 330 # 331 # Length: 124 332 # Identity: 32/124 (25.8%) 333 # Similarity: 64/124 (51.6%) 334 # Gaps: 17/124 (13.7%) 335 # Score: 112.0 336 # 337 # 338 #======================================= 339 340 ref_rec 1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL 46 341 :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.: 342 gi|94968718|r 1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV 47 343 344 ref_rec 47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT 96 345 |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||.. 346 gi|94968718|r 48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG 97 347 348 ref_rec 97 HFAK-PFDIDEIRDAV-------- 111 349 :..| ..|:|.|: || 350 gi|94968718|r 98 YILKSAIDLDLIQ-AVRRVANGET 120 351 352 353 #======================================= 354 # 355 # Aligned_sequences: 2 356 # 1: ref_rec 357 # 2: gi|94968761|receiver 358 # Matrix: EBLOSUM62 359 # Gap_penalty: 10.0 360 # Extend_penalty: 0.5 361 # 362 # Length: 119 363 # Identity: 34/119 (28.6%) 364 # Similarity: 58/119 (48.7%) 365 # Gaps: 9/119 ( 7.6%) 366 # Score: 154.0 367 # 368 # 369 #======================================= 370 371 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD 50 372 ||||||:......|:..|...|::.....|.::||:|...:..||:|.| 373 gi|94968761|r 1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD 49 374 375 ref_rec 51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK 100 376 :.:||.||:.:|:.:|.......|::|:....::|..::..||||....| 377 gi|94968761|r 50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK 99 378 379 ref_rec 101 PFDIDEIRDAV-------- 111 380 |...|::...| 381 gi|94968761|r 100 PLSTDKLLLTVENALKLKR 118 382 383 384 #======================================= 385 # 386 # Aligned_sequences: 2 387 # 1: ref_rec 388 # 2: gi|94967506|receiver 389 # Matrix: EBLOSUM62 390 # Gap_penalty: 10.0 391 # Extend_penalty: 0.5 392 # 393 # Length: 120 394 # Identity: 29/120 (24.2%) 395 # Similarity: 53/120 (44.2%) 396 # Gaps: 9/120 ( 7.5%) 397 # Score: 121.0 398 # 399 # 400 #======================================= 401 402 ref_rec 1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL 49 403 .|::|||..|..:.:..||.:.|:..........|.:.:.....||.:: 404 gi|94967506|r 1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV 50 405 406 ref_rec 50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA 99 407 |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:.. 408 gi|94967506|r 51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ 100 409 410 ref_rec 100 KPFDIDEIRDAV-------- 111 411 ||.|||.:.:.. 412 gi|94967506|r 101 KPIDIDALLNIAERALEHKE 120 413 414 415 #======================================= 416 # 417 # Aligned_sequences: 2 418 # 1: ref_rec 419 # 2: gi|94970045|receiver 420 # Matrix: EBLOSUM62 421 # Gap_penalty: 10.0 422 # Extend_penalty: 0.5 423 # 424 # Length: 118 425 # Identity: 30/118 (25.4%) 426 # Similarity: 64/118 (54.2%) 427 # Gaps: 9/118 ( 7.6%) 428 # Score: 126.0 429 # 430 # 431 #======================================= 432 433 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL 48 434 :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|::: 435 gi|94970045|r 1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI 49 436 437 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 438 .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| 439 gi|94970045|r 50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF 98 440 441 ref_rec 99 -AKPFDID----EIRDAV 111 442 .|||.:| :||:.: 443 gi|94970045|r 99 LRKPFRMDALSAKIREVL 116 444 445 446 #======================================= 447 # 448 # Aligned_sequences: 2 449 # 1: ref_rec 450 # 2: gi|94970041|receiver 451 # Matrix: EBLOSUM62 452 # Gap_penalty: 10.0 453 # Extend_penalty: 0.5 454 # 455 # Length: 125 456 # Identity: 35/125 (28.0%) 457 # Similarity: 70/125 (56.0%) 458 # Gaps: 18/125 (14.4%) 459 # Score: 156.5 460 # 461 # 462 #======================================= 463 464 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL 48 465 .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::| 466 gi|94970041|r 1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL 50 467 468 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 469 .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||.. 470 gi|94970041|r 51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES 96 471 472 ref_rec 99 A----KPFDIDEIRDAV-------- 111 473 | |||..|.:...| 474 gi|94970041|r 97 AEFLQKPFTSDSLLRKVRAVLQKRQ 121 475 476 477 #--------------------------------------- 478 #--------------------------------------- 479 480 """ 481 482 pair_example3 = """######################################## 483 # Program: needle 484 # Rundate: Mon 14 Jul 2008 11:45:42 485 # Commandline: needle 486 # [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC 487 # [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT 488 # -filter 489 # Align_format: srspair 490 # Report_file: stdout 491 ######################################## 492 493 #======================================= 494 # 495 # Aligned_sequences: 2 496 # 1: asis 497 # 2: asis 498 # Matrix: EDNAFULL 499 # Gap_penalty: 10.0 500 # Extend_penalty: 0.5 501 # 502 # Length: 667 503 # Identity: 210/667 (31.5%) 504 # Similarity: 210/667 (31.5%) 505 # Gaps: 408/667 (61.2%) 506 # Score: 561.0 507 # 508 # 509 #======================================= 510 511 asis 1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT 50 512 513 asis 0 -------------------------------------------------- 0 514 515 asis 51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA 100 516 517 asis 0 -------------------------------------------------- 0 518 519 asis 101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG 150 520 521 asis 0 -------------------------------------------------- 0 522 523 asis 151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA 200 524 .|||||| 525 asis 1 ------------TTATTAA------------------------------- 7 526 527 asis 201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG 244 528 .|||||.|| |||..|..|| ||||.||||.||.| ||.| 529 asis 8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG 50 530 531 asis 245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG 288 532 ||.|.|||||.|||.||||.|||| | ||||||||| 533 asis 51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG 90 534 535 asis 289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA 331 536 ||.|.||||||| ||||||||.| ||.| |||.|.||||| 537 asis 91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA 134 538 539 asis 332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC 377 540 ||| |||||| .||||...||||..|||||. 541 asis 135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT 167 542 543 asis 378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA-------- 418 544 .| ||||||||||| || ||.||.||.|| 545 asis 168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC 202 546 547 asis 419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT 449 548 ||..||.||.|.|||..||||.|| ||||| | 549 asis 203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG- 251 550 551 asis 450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT 498 552 ||| |.||| |||||||||.||| .||||||...||||||||| 553 asis 252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT 292 554 555 asis 499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT 548 556 ||||||||..| |||||||| 557 asis 293 TTTTTATTAAG-AAAGGTTT------------------------------ 311 558 559 asis 549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT 598 560 561 asis 311 -------------------------------------------------- 311 562 563 asis 599 GAAAGGGGGTTAATAGC 615 564 565 asis 311 ----------------- 311 566 567 568 #--------------------------------------- 569 #---------------------------------------""" 570 571 from StringIO import StringIO 572 573 alignments = list(EmbossIterator(StringIO(pair_example))) 574 assert len(alignments) == 1 575 assert len(alignments[0].get_all_seqs()) == 2 576 assert [r.id for r in alignments[0].get_all_seqs()] \ 577 == ["IXI_234", "IXI_235"] 578 579 alignments = list(EmbossIterator(StringIO(simple_example))) 580 assert len(alignments) == 1 581 assert len(alignments[0].get_all_seqs()) == 4 582 assert [r.id for r in alignments[0].get_all_seqs()] \ 583 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 584 585 alignments = list(EmbossIterator(StringIO(pair_example + simple_example))) 586 assert len(alignments) == 2 587 assert len(alignments[0].get_all_seqs()) == 2 588 assert len(alignments[1].get_all_seqs()) == 4 589 assert [r.id for r in alignments[0].get_all_seqs()] \ 590 == ["IXI_234", "IXI_235"] 591 assert [r.id for r in alignments[1].get_all_seqs()] \ 592 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 593 594 alignments = list(EmbossIterator(StringIO(pair_example2))) 595 assert len(alignments) == 5 596 assert len(alignments[0].get_all_seqs()) == 2 597 assert [r.id for r in alignments[0].get_all_seqs()] \ 598 == ["ref_rec", "gi|94968718|receiver"] 599 assert [r.id for r in alignments[4].get_all_seqs()] \ 600 == ["ref_rec", "gi|94970041|receiver"] 601 602 603 alignments = list(EmbossIterator(StringIO(pair_example3))) 604 assert len(alignments) == 1 605 assert len(alignments[0].get_all_seqs()) == 2 606 assert [r.id for r in alignments[0].get_all_seqs()] \ 607 == ["asis","asis"] 608 609 print "Done" 610