Package Bio :: Module File
[hide private]
[frames] | no frames]

Source Code for Module Bio.File

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code for more fancy file handles. 
  7   
  8   
  9  Classes: 
 10  UndoHandle     File object decorator with support for undo-like operations. 
 11  StringHandle   Wraps a file object around a string. 
 12  SGMLHandle     File object that automatically strips SGML tags from data. 
 13   
 14  SGMLStripper   Object that strips SGML.  This is now considered OBSOLETE, and 
 15                 is likely to be deprecated in a future release of Biopython, 
 16                 and later removed. 
 17   
 18  """ 
 19  import os 
 20  import StringIO 
 21  import sgmllib 
 22   
23 -class UndoHandle:
24 """A Python handle that adds functionality for saving lines. 25 26 Saves lines in a LIFO fashion. 27 28 Added methods: 29 saveline Save a line to be returned next time. 30 peekline Peek at the next line without consuming it. 31 32 """
33 - def __init__(self, handle):
34 self._handle = handle 35 self._saved = []
36
37 - def __iter__(self):
38 return self
39
40 - def next(self):
41 next = self.readline() 42 if not next: 43 raise StopIteration 44 return next
45
46 - def readlines(self, *args, **keywds):
47 lines = self._saved + self._handle.readlines(*args,**keywds) 48 self._saved = [] 49 return lines
50
51 - def readline(self, *args, **keywds):
52 if self._saved: 53 line = self._saved.pop(0) 54 else: 55 line = self._handle.readline(*args,**keywds) 56 return line
57
58 - def read(self, size=-1):
59 if size == -1: 60 saved = "".join(self._saved) 61 self._saved[:] = [] 62 else: 63 saved = '' 64 while size > 0 and self._saved: 65 if len(self._saved[0]) <= size: 66 size = size - len(self._saved[0]) 67 saved = saved + self._saved.pop(0) 68 else: 69 saved = saved + self._saved[0][:size] 70 self._saved[0] = self._saved[0][size:] 71 size = 0 72 return saved + self._handle.read(size)
73
74 - def saveline(self, line):
75 if line: 76 self._saved = [line] + self._saved
77
78 - def peekline(self):
79 if self._saved: 80 line = self._saved[0] 81 else: 82 line = self._handle.readline() 83 self.saveline(line) 84 return line
85
86 - def tell(self):
87 lengths = map(len, self._saved) 88 sum = reduce(lambda x, y: x+y, lengths, 0) 89 return self._handle.tell() - sum
90
91 - def seek(self, *args):
92 self._saved = [] 93 self._handle.seek(*args)
94
95 - def __getattr__(self, attr):
96 return getattr(self._handle, attr)
97
98 - def __enter__(self):
99 return self
100
101 - def __exit__(self, type, value, traceback):
102 self._handle.close()
103 104 105 # I could make this faster by using cStringIO. 106 # However, cStringIO (in v1.52) does not implement the 107 # readlines method. 108 StringHandle = StringIO.StringIO 109 110 111
112 -class SGMLHandle:
113 """A Python handle that automatically strips SGML tags from data (OBSOLETE). 114 115 This module is now considered to be obsolete, and is likely to be 116 deprecated in a future release of Biopython, and later removed. 117 """
118 - def __init__(self, handle):
119 """SGMLStripper(handle) 120 121 handle is a file handle to SGML-formatted data. 122 123 """ 124 self._handle = handle 125 self._stripper = SGMLStripper()
126
127 - def read(self, *args, **keywds):
128 data = self._handle.read(*args, **keywds) 129 return self._stripper.strip(data)
130
131 - def readline(self, *args, **keywds):
132 line = self._handle.readline(*args, **keywds) 133 return self._stripper.strip(line)
134
135 - def readlines(self, *args, **keywds):
136 lines = self._handle.readlines(*args, **keywds) 137 for i in range(len(lines)): 138 lines[i] = self._stripper.strip(str) 139 return lines
140
141 - def __getattr__(self, attr):
142 return getattr(self._handle, attr)
143 144
145 -class SGMLStripper:
146 - class MyParser(sgmllib.SGMLParser):
147 - def __init__(self):
148 sgmllib.SGMLParser.__init__(self) 149 self.data = ''
150 - def handle_data(self, data):
151 self.data = self.data + data
152
153 - def __init__(self):
154 self._parser = SGMLStripper.MyParser()
155
156 - def strip(self, str):
157 """S.strip(str) -> string 158 159 Strip the SGML tags from str. 160 161 """ 162 if not str: # empty string, don't do anything. 163 return '' 164 # I need to make sure that I don't return an empty string if 165 # the buffer is not empty. This can happen if there's a newline 166 # character embedded within a tag. Thus, I'll first check to 167 # see if the last character is a newline. If it is, and it's stripped 168 # away, I'll add it back. 169 is_newline = str[-1] in ['\n', '\r'] 170 171 self._parser.data = '' # clear the parser's data (don't reset) 172 self._parser.feed(str) 173 if self._parser.data: 174 str = self._parser.data 175 elif is_newline: 176 str = '\n' 177 else: 178 str = '' 179 return str
180