Package Bio :: Package AlignIO :: Module EmbossIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.EmbossIO

  1  # Copyright 2008-2009 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """ 
  7  Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools. 
  8   
  9  You are expected to use this module via the Bio.AlignIO functions (or the 
 10  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 11   
 12  This module contains a parser for the EMBOSS pairs/simple file format, for 
 13  example from the alignret, water and needle tools. 
 14  """ 
 15   
 16  from Bio.Align.Generic import Alignment 
 17  from Interfaces import AlignmentIterator, SequentialAlignmentWriter 
 18   
19 -class EmbossWriter(SequentialAlignmentWriter) :
20 """Emboss alignment writer (WORK IN PROGRESS). 21 22 Writes a simplfied version of the EMBOSS pairs/simple file format. 23 A lot of the information their tools record in their headers is not 24 available and is ommitted. 25 """ 26
27 - def write_header(self) :
28 handle = self.handle 29 handle.write("########################################\n") 30 handle.write("# Program: Biopython\n") 31 try : 32 handle.write("# Report_file: %s\n" % handle.name) 33 except AttributeError : 34 pass 35 handle.write("########################################\n")
36 41
42 - def write_alignment(self, alignment) :
43 """Use this to write (another) single alignment to an open file.""" 44 45 handle = self.handle 46 records = alignment.get_all_seqs() 47 48 handle.write("#=======================================\n") 49 handle.write("#\n") 50 handle.write("# Aligned_sequences: %i\n" % len(records)) 51 for i, record in enumerate(records) : 52 handle.write("# %i: %s\n" % (i+1, record.id)) 53 handle.write("#\n") 54 handle.write("# Length: %i\n" % alignment.get_alignment_length()) 55 handle.write("#\n") 56 handle.write("#=======================================\n") 57 handle.write("\n") 58 #... 59 assert False
60
61 -class EmbossIterator(AlignmentIterator) :
62 """Emboss alignment iterator. 63 64 For reading the (pairwise) alignments from EMBOSS tools in what they 65 call the "pairs" and "simple" formats. 66 """ 67
68 - def next(self) :
69 70 handle = self.handle 71 72 try : 73 #Header we saved from when we were parsing 74 #the previous alignment. 75 line = self._header 76 del self._header 77 except AttributeError: 78 line = handle.readline() 79 if not line: 80 return None 81 82 while line.rstrip() != "#=======================================" : 83 line = handle.readline() 84 if not line : 85 return None 86 87 length_of_seqs = None 88 number_of_seqs = None 89 ids = [] 90 seqs = [] 91 92 93 while line[0] == "#" : 94 #Read in the rest of this alignment header, 95 #try and discover the number of records expected 96 #and their length 97 parts = line[1:].split(":",1) 98 key = parts[0].lower().strip() 99 if key == "aligned_sequences" : 100 number_of_seqs = int(parts[1].strip()) 101 assert len(ids) == 0 102 # Should now expect the record identifiers... 103 for i in range(number_of_seqs) : 104 line = handle.readline() 105 parts = line[1:].strip().split(":",1) 106 assert i+1 == int(parts[0].strip()) 107 ids.append(parts[1].strip()) 108 assert len(ids) == number_of_seqs 109 if key == "length" : 110 length_of_seqs = int(parts[1].strip()) 111 112 #And read in another line... 113 line = handle.readline() 114 115 if number_of_seqs is None : 116 raise ValueError("Number of sequences missing!") 117 if length_of_seqs is None : 118 raise ValueError("Length of sequences missing!") 119 120 if self.records_per_alignment is not None \ 121 and self.records_per_alignment != number_of_seqs : 122 raise ValueError("Found %i records in this alignment, told to expect %i" \ 123 % (number_of_seqs, self.records_per_alignment)) 124 125 seqs = ["" for id in ids] 126 seq_starts = [] 127 index = 0 128 129 #Parse the seqs 130 while line : 131 if len(line) > 21 : 132 id_start = line[:21].strip().split(None, 1) 133 seq_end = line[21:].strip().split(None, 1) 134 if len(id_start) == 2 and len(seq_end) == 2: 135 #identifier, seq start position, seq, seq end position 136 #(an aligned seq is broken up into multiple lines) 137 id, start = id_start 138 seq, end = seq_end 139 if start==end : 140 #Special case, either a single letter is present, 141 #or no letters at all. 142 if seq.replace("-","") == "" : 143 start = int(start) 144 end = int(end) 145 else : 146 start = int(start) - 1 147 end = int(end) 148 else : 149 assert seq.replace("-","") != "" 150 start = int(start)-1 #python counting 151 end = int(end) 152 153 #The identifier is truncated... 154 assert 0 <= index and index < number_of_seqs, \ 155 "Expected index %i in range [0,%i)" \ 156 % (index, number_of_seqs) 157 assert id==ids[index] or id == ids[index][:len(id)] 158 159 if len(seq_starts) == index : 160 #Record the start 161 seq_starts.append(start) 162 163 #Check the start... 164 if start == end : 165 assert seq.replace("-","") == "", line 166 else : 167 assert start - seq_starts[index] == len(seqs[index].replace("-","")), \ 168 "Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \ 169 % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]), 170 start, line) 171 172 seqs[index] += seq 173 174 #Check the end ... 175 assert end == seq_starts[index] + len(seqs[index].replace("-","")), \ 176 "Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \ 177 % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]), 178 seq_starts[index], end, line) 179 180 index += 1 181 if index >= number_of_seqs : 182 index = 0 183 else : 184 #just a start value, this is just alignment annotation (?) 185 #print "Skipping: " + line.rstrip() 186 pass 187 elif line.strip() == "" : 188 #Just a spacer? 189 pass 190 else : 191 print line 192 assert False 193 194 line = handle.readline() 195 if line.rstrip() == "#---------------------------------------" \ 196 or line.rstrip() == "#=======================================" : 197 #End of alignment 198 self._header = line 199 break 200 201 assert index == 0 202 203 if self.records_per_alignment is not None \ 204 and self.records_per_alignment != len(ids) : 205 raise ValueError("Found %i records in this alignment, told to expect %i" \ 206 % (len(ids), self.records_per_alignment)) 207 208 alignment = Alignment(self.alphabet) 209 for id, seq in zip(ids, seqs) : 210 if len(seq) != length_of_seqs : 211 #EMBOSS 2.9.0 is known to use spaces instead of minus signs 212 #for leading gaps, and thus fails to parse. This old version 213 #is still used as of Dec 2008 behind the EBI SOAP webservice: 214 #http://www.ebi.ac.uk/Tools/webservices/wsdl/WSEmboss.wsdl 215 raise ValueError("Error parsing alignment - sequences of " 216 "different length? You could be using an " 217 "old version of EMBOSS.") 218 alignment.add_sequence(id, seq) 219 return alignment
220 221 if __name__ == "__main__" : 222 print "Running a quick self-test" 223 224 #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 225 simple_example = \ 226 """######################################## 227 # Program: alignret 228 # Rundate: Wed Jan 16 17:16:13 2002 229 # Report_file: stdout 230 ######################################## 231 #======================================= 232 # 233 # Aligned_sequences: 4 234 # 1: IXI_234 235 # 2: IXI_235 236 # 3: IXI_236 237 # 4: IXI_237 238 # Matrix: EBLOSUM62 239 # Gap_penalty: 10.0 240 # Extend_penalty: 0.5 241 # 242 # Length: 131 243 # Identity: 95/131 (72.5%) 244 # Similarity: 127/131 (96.9%) 245 # Gaps: 25/131 (19.1%) 246 # Score: 100.0 247 # 248 # 249 #======================================= 250 251 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 252 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 253 IXI_236 1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT 48 254 IXI_237 1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT 45 255 |||||:|||||||||::::::: |||||:||||:::::|||||:||||| 256 257 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 258 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 259 IXI_236 49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G 96 260 IXI_237 46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G 93 261 ||:||||||||||||||||||||:::::::::::||||||||||||| | 262 263 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 264 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 265 IXI_236 97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE 127 266 IXI_237 94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE 124 267 |||:||||:|||||:|||||||::||||||| 268 269 270 #--------------------------------------- 271 #--------------------------------------- 272 273 """ 274 275 #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair 276 pair_example = \ 277 """######################################## 278 # Program: water 279 # Rundate: Wed Jan 16 17:23:19 2002 280 # Report_file: stdout 281 ######################################## 282 #======================================= 283 # 284 # Aligned_sequences: 2 285 # 1: IXI_234 286 # 2: IXI_235 287 # Matrix: EBLOSUM62 288 # Gap_penalty: 10.0 289 # Extend_penalty: 0.5 290 # 291 # Length: 131 292 # Identity: 112/131 (85.5%) 293 # Similarity: 112/131 (85.5%) 294 # Gaps: 19/131 (14.5%) 295 # Score: 591.5 296 # 297 # 298 #======================================= 299 300 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 301 ||||||||||||||| |||||||||||||||||||||||||| 302 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 303 304 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 305 |||||||||||||||||||||||| |||||||||||||||| 306 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 307 308 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 309 ||||||||||||||||||||||||||||||| 310 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 311 312 313 #--------------------------------------- 314 #--------------------------------------- 315 316 317 """ 318 319 pair_example2 = \ 320 """######################################## 321 # Program: needle 322 # Rundate: Sun 27 Apr 2007 17:20:35 323 # Commandline: needle 324 # [-asequence] Spo0F.faa 325 # [-bsequence] paired_r.faa 326 # -sformat2 pearson 327 # Align_format: srspair 328 # Report_file: ref_rec .needle 329 ######################################## 330 331 #======================================= 332 # 333 # Aligned_sequences: 2 334 # 1: ref_rec 335 # 2: gi|94968718|receiver 336 # Matrix: EBLOSUM62 337 # Gap_penalty: 10.0 338 # Extend_penalty: 0.5 339 # 340 # Length: 124 341 # Identity: 32/124 (25.8%) 342 # Similarity: 64/124 (51.6%) 343 # Gaps: 17/124 (13.7%) 344 # Score: 112.0 345 # 346 # 347 #======================================= 348 349 ref_rec 1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL 46 350 :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.: 351 gi|94968718|r 1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV 47 352 353 ref_rec 47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT 96 354 |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||.. 355 gi|94968718|r 48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG 97 356 357 ref_rec 97 HFAK-PFDIDEIRDAV-------- 111 358 :..| ..|:|.|: || 359 gi|94968718|r 98 YILKSAIDLDLIQ-AVRRVANGET 120 360 361 362 #======================================= 363 # 364 # Aligned_sequences: 2 365 # 1: ref_rec 366 # 2: gi|94968761|receiver 367 # Matrix: EBLOSUM62 368 # Gap_penalty: 10.0 369 # Extend_penalty: 0.5 370 # 371 # Length: 119 372 # Identity: 34/119 (28.6%) 373 # Similarity: 58/119 (48.7%) 374 # Gaps: 9/119 ( 7.6%) 375 # Score: 154.0 376 # 377 # 378 #======================================= 379 380 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD 50 381 ||||||:......|:..|...|::.....|.::||:|...:..||:|.| 382 gi|94968761|r 1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD 49 383 384 ref_rec 51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK 100 385 :.:||.||:.:|:.:|.......|::|:....::|..::..||||....| 386 gi|94968761|r 50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK 99 387 388 ref_rec 101 PFDIDEIRDAV-------- 111 389 |...|::...| 390 gi|94968761|r 100 PLSTDKLLLTVENALKLKR 118 391 392 393 #======================================= 394 # 395 # Aligned_sequences: 2 396 # 1: ref_rec 397 # 2: gi|94967506|receiver 398 # Matrix: EBLOSUM62 399 # Gap_penalty: 10.0 400 # Extend_penalty: 0.5 401 # 402 # Length: 120 403 # Identity: 29/120 (24.2%) 404 # Similarity: 53/120 (44.2%) 405 # Gaps: 9/120 ( 7.5%) 406 # Score: 121.0 407 # 408 # 409 #======================================= 410 411 ref_rec 1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL 49 412 .|::|||..|..:.:..||.:.|:..........|.:.:.....||.:: 413 gi|94967506|r 1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV 50 414 415 ref_rec 50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA 99 416 |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:.. 417 gi|94967506|r 51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ 100 418 419 ref_rec 100 KPFDIDEIRDAV-------- 111 420 ||.|||.:.:.. 421 gi|94967506|r 101 KPIDIDALLNIAERALEHKE 120 422 423 424 #======================================= 425 # 426 # Aligned_sequences: 2 427 # 1: ref_rec 428 # 2: gi|94970045|receiver 429 # Matrix: EBLOSUM62 430 # Gap_penalty: 10.0 431 # Extend_penalty: 0.5 432 # 433 # Length: 118 434 # Identity: 30/118 (25.4%) 435 # Similarity: 64/118 (54.2%) 436 # Gaps: 9/118 ( 7.6%) 437 # Score: 126.0 438 # 439 # 440 #======================================= 441 442 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL 48 443 :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|::: 444 gi|94970045|r 1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI 49 445 446 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 447 .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| 448 gi|94970045|r 50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF 98 449 450 ref_rec 99 -AKPFDID----EIRDAV 111 451 .|||.:| :||:.: 452 gi|94970045|r 99 LRKPFRMDALSAKIREVL 116 453 454 455 #======================================= 456 # 457 # Aligned_sequences: 2 458 # 1: ref_rec 459 # 2: gi|94970041|receiver 460 # Matrix: EBLOSUM62 461 # Gap_penalty: 10.0 462 # Extend_penalty: 0.5 463 # 464 # Length: 125 465 # Identity: 35/125 (28.0%) 466 # Similarity: 70/125 (56.0%) 467 # Gaps: 18/125 (14.4%) 468 # Score: 156.5 469 # 470 # 471 #======================================= 472 473 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL 48 474 .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::| 475 gi|94970041|r 1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL 50 476 477 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 478 .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||.. 479 gi|94970041|r 51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES 96 480 481 ref_rec 99 A----KPFDIDEIRDAV-------- 111 482 | |||..|.:...| 483 gi|94970041|r 97 AEFLQKPFTSDSLLRKVRAVLQKRQ 121 484 485 486 #--------------------------------------- 487 #--------------------------------------- 488 489 """ 490 491 pair_example3 = """######################################## 492 # Program: needle 493 # Rundate: Mon 14 Jul 2008 11:45:42 494 # Commandline: needle 495 # [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC 496 # [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT 497 # -filter 498 # Align_format: srspair 499 # Report_file: stdout 500 ######################################## 501 502 #======================================= 503 # 504 # Aligned_sequences: 2 505 # 1: asis 506 # 2: asis 507 # Matrix: EDNAFULL 508 # Gap_penalty: 10.0 509 # Extend_penalty: 0.5 510 # 511 # Length: 667 512 # Identity: 210/667 (31.5%) 513 # Similarity: 210/667 (31.5%) 514 # Gaps: 408/667 (61.2%) 515 # Score: 561.0 516 # 517 # 518 #======================================= 519 520 asis 1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT 50 521 522 asis 0 -------------------------------------------------- 0 523 524 asis 51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA 100 525 526 asis 0 -------------------------------------------------- 0 527 528 asis 101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG 150 529 530 asis 0 -------------------------------------------------- 0 531 532 asis 151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA 200 533 .|||||| 534 asis 1 ------------TTATTAA------------------------------- 7 535 536 asis 201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG 244 537 .|||||.|| |||..|..|| ||||.||||.||.| ||.| 538 asis 8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG 50 539 540 asis 245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG 288 541 ||.|.|||||.|||.||||.|||| | ||||||||| 542 asis 51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG 90 543 544 asis 289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA 331 545 ||.|.||||||| ||||||||.| ||.| |||.|.||||| 546 asis 91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA 134 547 548 asis 332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC 377 549 ||| |||||| .||||...||||..|||||. 550 asis 135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT 167 551 552 asis 378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA-------- 418 553 .| ||||||||||| || ||.||.||.|| 554 asis 168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC 202 555 556 asis 419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT 449 557 ||..||.||.|.|||..||||.|| ||||| | 558 asis 203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG- 251 559 560 asis 450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT 498 561 ||| |.||| |||||||||.||| .||||||...||||||||| 562 asis 252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT 292 563 564 asis 499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT 548 565 ||||||||..| |||||||| 566 asis 293 TTTTTATTAAG-AAAGGTTT------------------------------ 311 567 568 asis 549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT 598 569 570 asis 311 -------------------------------------------------- 311 571 572 asis 599 GAAAGGGGGTTAATAGC 615 573 574 asis 311 ----------------- 311 575 576 577 #--------------------------------------- 578 #---------------------------------------""" 579 580 from StringIO import StringIO 581 582 alignments = list(EmbossIterator(StringIO(pair_example))) 583 assert len(alignments) == 1 584 assert len(alignments[0].get_all_seqs()) == 2 585 assert [r.id for r in alignments[0].get_all_seqs()] \ 586 == ["IXI_234", "IXI_235"] 587 588 alignments = list(EmbossIterator(StringIO(simple_example))) 589 assert len(alignments) == 1 590 assert len(alignments[0].get_all_seqs()) == 4 591 assert [r.id for r in alignments[0].get_all_seqs()] \ 592 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 593 594 alignments = list(EmbossIterator(StringIO(pair_example + simple_example))) 595 assert len(alignments) == 2 596 assert len(alignments[0].get_all_seqs()) == 2 597 assert len(alignments[1].get_all_seqs()) == 4 598 assert [r.id for r in alignments[0].get_all_seqs()] \ 599 == ["IXI_234", "IXI_235"] 600 assert [r.id for r in alignments[1].get_all_seqs()] \ 601 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 602 603 alignments = list(EmbossIterator(StringIO(pair_example2))) 604 assert len(alignments) == 5 605 assert len(alignments[0].get_all_seqs()) == 2 606 assert [r.id for r in alignments[0].get_all_seqs()] \ 607 == ["ref_rec", "gi|94968718|receiver"] 608 assert [r.id for r in alignments[4].get_all_seqs()] \ 609 == ["ref_rec", "gi|94970041|receiver"] 610 611 612 alignments = list(EmbossIterator(StringIO(pair_example3))) 613 assert len(alignments) == 1 614 assert len(alignments[0].get_all_seqs()) == 2 615 assert [r.id for r in alignments[0].get_all_seqs()] \ 616 == ["asis","asis"] 617 618 print "Done" 619