Package Bio :: Package writers :: Package SeqRecord :: Module embl
[hide private]
[frames] | no frames]

Source Code for Module Bio.writers.SeqRecord.embl

 1  # Not clear on the distinction, if any, between 'embl' and 'embl/65'.  This 
 2  # code might apply to either or both. 
 3   
 4  # See 'http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html' for a 
 5  # definition of this file format. 
 6   
 7  # This code only makes a best effort--the output may not be strictly valid. 
 8  # So, for example, the EMBL ID is supposed to be alphanumeric, starting with a 
 9  # letter, but we don't check for this, etc. 
10   
11   
12  # Example: 
13  # ID   AA03518    standard; DNA; FUN; 237 BP. 
14  # XX 
15  # AC   U03518; 
16  # XX 
17  # DE   Aspergillus awamori internal transcribed spacer 1 (ITS1) and 18S 
18  # DE   rRNA and 5.8S rRNA genes, partial sequence. 
19  # XX 
20  # SQ   Sequence 237 BP; 41 A; 77 C; 67 G; 52 T; 0 other; 
21  #      aacctgcgga aggatcatta ccgagtgcgg gtcctttggg cccaacctcc catccgtgtc        60 
22  #      tattgtaccc tgttgcttcg gcgggcccgc cgcttgtcgg ccgccggggg ggcgcctctg       120 
23  #      ccccccgggc ccgtgcccgc cggagacccc aacacgaaca ctgtctgaaa gcgtgcagtc       180 
24  #      tgagttgatt gaatgcaatc agttaaaact ttcaacaatg gatctcttgg ttccggc          237 
25  # // 
26   
27   
28  import textwrap 
29   
30  from Bio import Alphabet 
31  from Bio import Writer 
32   
33 -class WriteEmbl(Writer.Writer):
34 - def __init__(self, outfile):
35 Writer.Writer.__init__(self, outfile)
36
37 - def write(self, record):
38 seq = record.seq 39 assert seq.alphabet.size == 1, "cannot handle alphabet of size %d" % \ 40 seq.alphabet.size 41 data = seq.data 42 upperdata = data.upper() 43 44 # It'd be nice if the alphabet was usefully set, but for many interesting 45 # cases (e.g., reading from FASTA files), it's not. 46 47 if isinstance(seq.alphabet, Alphabet.RNAAlphabet): 48 molecule = 'mRNA' 49 letters = ['A', 'C', 'G', 'U'] 50 else: 51 molecule = 'DNA' 52 letters = ['A', 'C', 'G', 'T'] 53 54 division = 'UNC' # unknown 55 56 self.outfile.write("ID %s standard; %s; %s; %d BP.\n" 57 % (record.id, molecule, division, len(data))) 58 59 desclist = textwrap.wrap(record.description, 74) 60 for l in desclist: 61 self.outfile.write("DE %s\n" % l) 62 63 counts = [ upperdata.count(l) for l in letters ] 64 othercount = len(upperdata) - sum(counts) 65 66 countstring = ''.join([ " %d %s;" % p for p in zip(counts, letters) ]) 67 68 self.outfile.write("SQ Sequence %s BP;%s %d other;\n" 69 % (len(data), countstring, othercount)) 70 71 rowlength = 60 72 blocklength = 10 73 for i in xrange(0, len(data), rowlength): 74 self.outfile.write(" " * 5) 75 row = data[i:i+rowlength] 76 for b in xrange(0, rowlength, blocklength): 77 block = row[b:b+blocklength] 78 self.outfile.write("%-*s" % (blocklength+1, block)) 79 self.outfile.write("%9d\n" % min(i+rowlength, len(data))) 80 81 self.outfile.write("//\n")
82 83 84 make_writer = WriteEmbl 85