1 """Utilities for working with FASTA-formatted sequences (OBSOLETE).
2
3 Classes:
4 Record Holds FASTA sequence data.
5 Iterator Iterates over sequence data in a FASTA file.
6 RecordParser Parses FASTA sequence data into a Record object.
7 SequenceParser Parses FASTA sequence data into a SeqRecord object.
8
9 For a long time this module was the most commonly used and best documented
10 FASTA parser in Biopython. However, we now recommend using Bio.SeqIO instead.
11
12 In view of this, while you can continue to use Bio.Fasta for the moment, it is
13 considered to be a legacy module and should not be used if you are writing new
14 code. At some point Bio.Fasta may be officially deprecated (with warning
15 messages when used) before finally being removed.
16
17 If you are already using Bio.Fasta with the SequenceParser to get SeqRecord
18 objects, then you should be able to switch to the more recent Bio.SeqIO module
19 very easily as that too uses SeqRecord objects. For example,
20
21 from Bio import Fasta
22 handle = open("example.fas")
23 for seq_record in Fasta.Iterator(handle, Fasta.SequenceParser()) :
24 print seq_record.description
25 print seq_record.seq
26 handle.close()
27
28 Using Bio.SeqIO instead this becomes:
29
30 from Bio import SeqIO
31 handle = open("example.fas")
32 for seq_record in SeqIO.parse(handle, "fasta") :
33 print seq_record.description
34 print seq_record.seq
35 handle.close()
36
37 Converting an existing code which uses the RecordParser is a little more
38 complicated as the Bio.Fasta.Record object differs from the SeqRecord.
39
40 from Bio import Fasta
41 handle = open("example.fas")
42 for record in Fasta.Iterator(handle, Fasta.RecordParser()) :
43 #record is a Bio.Fasta.Record object
44 print record.title #The full title line as a string
45 print record.sequence #The sequence as a string
46 handle.close()
47
48 Using Bio.SeqIO instead this becomes:
49
50 from Bio import SeqIO
51 handle = open("example.fas")
52 for seq_record in SeqIO.parse(handle, "fasta") :
53 print seq_record.description #The full title line as a string
54 print seq_record.seq.tostring() #The sequence as a string
55 handle.close()
56
57
58
59 """
60 from Bio import Seq
61 from Bio import SeqRecord
62 from Bio import Alphabet
63
64
66 """Holds information from a FASTA record.
67
68 Members:
69 title Title line ('>' character not included).
70 sequence The sequence.
71
72 """
74 """__init__(self, colwidth=60)
75
76 Create a new Record. colwidth specifies the number of residues
77 to put on each line when generating FASTA format.
78
79 """
80 self.title = ''
81 self.sequence = ''
82 self._colwidth = colwidth
83
94
96 """Returns one record at a time from a FASTA file.
97 """
98 - def __init__(self, handle, parser = None, debug = 0):
99 """Initialize a new iterator.
100 """
101 self.handle = handle
102 self._parser = parser
103 self._debug = debug
104
105
106 while True :
107 line = handle.readline()
108 if not line or line[0] == ">" :
109 break
110 if debug : print "Skipping: " + line
111 self._lookahead = line
112
114 return iter(self.next, None)
115
117 """Return the next record in the file"""
118 line = self._lookahead
119 if not line:
120 return None
121 assert line[0]==">", line
122 lines = [line.rstrip()]
123 line = self.handle.readline()
124 while line:
125 if line[0] == ">": break
126 if line[0] == "#" :
127 if self._debug : print "Ignoring comment line"
128 pass
129 else :
130 lines.append(line.rstrip())
131 line = self.handle.readline()
132 self._lookahead = line
133 if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines))
134 if self._parser is None:
135 return "\n".join(lines)
136 else :
137 return self._parser.parse_string("\n".join(lines))
138
140 """Parses FASTA sequence data into a Fasta.Record object.
141 """
144
155
156 - def parse(self, handle):
158
160 """Parses FASTA sequence data into a SeqRecord object.
161 """
164 """Initialize a Scanner and Sequence Consumer.
165
166 Arguments:
167 o alphabet - The alphabet of the sequences to be parsed. If not
168 passed, this will be set as generic_alphabet.
169 o title2ids - A function that, when given the title of the FASTA
170 file (without the beginning >), will return the id, name and
171 description (in that order) for the record. If this is not given,
172 then the entire title line will be used as the description.
173 """
174 self.alphabet = alphabet
175 self.title2ids = title2ids
176
196
197 - def parse(self, handle):
199