Package Bio :: Package Fasta
[hide private]
[frames] | no frames]

Source Code for Package Bio.Fasta

  1  """Utilities for working with FASTA-formatted sequences (OBSOLETE). 
  2   
  3  Classes: 
  4  Record             Holds FASTA sequence data. 
  5  Iterator           Iterates over sequence data in a FASTA file. 
  6  RecordParser       Parses FASTA sequence data into a Record object. 
  7  SequenceParser     Parses FASTA sequence data into a SeqRecord object. 
  8   
  9  For a long time this module was the most commonly used and best documented 
 10  FASTA parser in Biopython.  However, we now recommend using Bio.SeqIO instead. 
 11   
 12  In view of this, while you can continue to use Bio.Fasta for the moment, it is 
 13  considered to be a legacy module and should not be used if you are writing new 
 14  code.  At some point Bio.Fasta may be officially deprecated (with warning 
 15  messages when used) before finally being removed. 
 16   
 17  If you are already using Bio.Fasta with the SequenceParser to get SeqRecord 
 18  objects, then you should be able to switch to the more recent Bio.SeqIO module 
 19  very easily as that too uses SeqRecord objects.  For example, 
 20   
 21  from Bio import Fasta 
 22  handle = open("example.fas") 
 23  for seq_record in Fasta.Iterator(handle, Fasta.SequenceParser()) : 
 24      print seq_record.description 
 25      print seq_record.seq 
 26  handle.close() 
 27   
 28  Using Bio.SeqIO instead this becomes: 
 29   
 30  from Bio import SeqIO 
 31  handle = open("example.fas") 
 32  for seq_record in SeqIO.parse(handle, "fasta") : 
 33      print seq_record.description 
 34      print seq_record.seq 
 35  handle.close() 
 36   
 37  Converting an existing code which uses the RecordParser is a little more 
 38  complicated as the Bio.Fasta.Record object differs from the SeqRecord. 
 39   
 40  from Bio import Fasta 
 41  handle = open("example.fas") 
 42  for record in Fasta.Iterator(handle, Fasta.RecordParser()) : 
 43      #record is a Bio.Fasta.Record object 
 44      print record.title #The full title line as a string 
 45      print record.sequence #The sequence as a string 
 46  handle.close() 
 47   
 48  Using Bio.SeqIO instead this becomes: 
 49   
 50  from Bio import SeqIO 
 51  handle = open("example.fas") 
 52  for seq_record in SeqIO.parse(handle, "fasta") : 
 53      print seq_record.description #The full title line as a string 
 54      print seq_record.seq.tostring() #The sequence as a string 
 55  handle.close() 
 56   
 57   
 58   
 59  """ 
 60  from Bio import Seq 
 61  from Bio import SeqRecord 
 62  from Bio import Alphabet 
 63   
 64   
65 -class Record:
66 """Holds information from a FASTA record. 67 68 Members: 69 title Title line ('>' character not included). 70 sequence The sequence. 71 72 """
73 - def __init__(self, colwidth=60):
74 """__init__(self, colwidth=60) 75 76 Create a new Record. colwidth specifies the number of residues 77 to put on each line when generating FASTA format. 78 79 """ 80 self.title = '' 81 self.sequence = '' 82 self._colwidth = colwidth
83
84 - def __str__(self):
85 s = [] 86 s.append('>%s' % self.title) 87 i = 0 88 while i < len(self.sequence): 89 s.append(self.sequence[i:i+self._colwidth]) 90 i = i + self._colwidth 91 #Was having a problem getting the tests to pass on windows... 92 #return os.linesep.join(s) 93 return "\n".join(s)
94
95 -class Iterator:
96 """Returns one record at a time from a FASTA file. 97 """
98 - def __init__(self, handle, parser = None, debug = 0):
99 """Initialize a new iterator. 100 """ 101 self.handle = handle 102 self._parser = parser 103 self._debug = debug 104 105 #Skip any text before the first record (e.g. blank lines) 106 while True : 107 line = handle.readline() 108 if not line or line[0] == ">" : 109 break 110 if debug : print "Skipping: " + line 111 self._lookahead = line
112
113 - def __iter__(self):
114 return iter(self.next, None)
115
116 - def next(self):
117 """Return the next record in the file""" 118 line = self._lookahead 119 if not line: 120 return None 121 assert line[0]==">", line 122 lines = [line.rstrip()] 123 line = self.handle.readline() 124 while line: 125 if line[0] == ">": break 126 if line[0] == "#" : 127 if self._debug : print "Ignoring comment line" 128 pass 129 else : 130 lines.append(line.rstrip()) 131 line = self.handle.readline() 132 self._lookahead = line 133 if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines)) 134 if self._parser is None: 135 return "\n".join(lines) 136 else : 137 return self._parser.parse_string("\n".join(lines))
138
139 -class RecordParser:
140 """Parses FASTA sequence data into a Fasta.Record object. 141 """
142 - def __init__(self, debug = 0):
143 pass
144
145 - def parse_string(self, text) :
146 text = text.replace("\r\n","\n") #Crude way of dealing with \r\n 147 assert text[0] == ">", text 148 text = text.split("\n>",1)[0] # Only do the first record if more than one 149 title, sequence = text.split("\n", 1) 150 title = title[1:] 151 rec = Record() 152 rec.title = title 153 rec.sequence = sequence.replace("\n","") 154 return rec
155
156 - def parse(self, handle):
157 return self.parse_string(handle.read())
158
159 -class SequenceParser:
160 """Parses FASTA sequence data into a SeqRecord object. 161 """
162 - def __init__(self, alphabet = Alphabet.generic_alphabet, title2ids = None, 163 debug = 0):
164 """Initialize a Scanner and Sequence Consumer. 165 166 Arguments: 167 o alphabet - The alphabet of the sequences to be parsed. If not 168 passed, this will be set as generic_alphabet. 169 o title2ids - A function that, when given the title of the FASTA 170 file (without the beginning >), will return the id, name and 171 description (in that order) for the record. If this is not given, 172 then the entire title line will be used as the description. 173 """ 174 self.alphabet = alphabet 175 self.title2ids = title2ids
176
177 - def parse_string(self, text) :
178 text = text.replace("\r\n","\n") #Crude way of dealing with \r\n 179 assert text[0] == ">", text 180 text = text.split("\n>",1)[0] # Only do the first record if more than one 181 title, sequence = text.split("\n", 1) 182 title = title[1:] 183 184 seq = Seq.Seq(sequence.replace("\n",""), self.alphabet) 185 rec = SeqRecord.SeqRecord(seq) 186 187 if self.title2ids: 188 seq_id, name, descr = self.title2ids(title) 189 rec.id = seq_id 190 rec.name = name 191 rec.description = descr 192 else: 193 rec.description = title 194 195 return rec
196
197 - def parse(self, handle):
198 return self.parse_string(handle.read())
199