Package Bio :: Module FilteredReader
[hide private]
[frames] | no frames]

Source Code for Module Bio.FilteredReader

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code for more fancy file handles. 
  7   
  8  Classes: 
  9  Filtered is a decorator for File that allows the user to filter the output 
 10  on a line by line basis. 
 11   
 12  The FilteredReader module reads a file and applies a sequence of filters to the input 
 13  The constructor sets a default filter chain, but the user can select another filter by setting 
 14  Bio.FilteredReader.filter_chain. 
 15   
 16  handle = open( "filename" ) 
 17  filtered_reader = Bio.FilteredReader( handle ) 
 18  filtered_reader.filter_chain = [ remove_asterisks, replace_dot_with_dash ] 
 19  filtered_reasder.read() 
 20   
 21  All filters in the chain must provide the same interface with a line of text as the single 
 22  input parameter and altered text as the return value. 
 23   
 24  """ 
 25   
 26  import os 
 27  import string 
 28  import copy 
 29  from File import UndoHandle 
 30   
 31   
 32   
 33  """Used for debugging""" 
34 -def dump_saved( name, text, j ):
35 dump_file = open( name + '%d' % j, "w" ) 36 k = 0 37 for i in range ( 0, len( text ), 80 ): 38 dump_file.write( '%s\n' % text[ i : i + 80 ] ) 39 dump_file.close()
40
41 -def remove_leading_whitespace( line ):
42 return line.lstrip()
43 44
45 -def remove_empty_line( line ):
46 stripped_line = line.strip() 47 if( stripped_line ): 48 return line[ : ] 49 else: 50 return ''
51
52 -def remove_useless_dot( line ):
53 before = line 54 while( 1 ): 55 after = before.replace( "\t.\t", "\t\t" ) 56 if( len( before ) == len( after ) ): 57 break 58 before = after 59 if( after.endswith( '.' ) ): 60 after = after[ :-1 ] 61 return after
62
63 -def fix_punctuation( line ):
64 line = line.replace( "'", '' ) 65 line = line.replace( '"', '' ) 66 line = line.replace( ';', '\t' ) 67 line = line.replace( 'entryname', 'id' ) 68 # line = line.lower( ) 69 if( line ): 70 return line[ : ] 71 else: 72 return ''
73 74 75
76 -class FilteredReader:
77 - def __init__(self, handle ):
78 self._handle = handle 79 self._start_line = '' 80 self._debug_count = 0 81 self.filter_chain = [ remove_empty_line, remove_useless_dot, fix_punctuation ]
82
83 - def __getattr__(self, attr):
84 return getattr(self._handle, attr)
85 86 87
88 - def close(self, *args, **keywds ):
89 return self._handle.close( *args, **keywds)
90
91 - def read( self, *args, **keywds ):
92 line = '' 93 len_expected = self._get_len_expected( args, keywds ) 94 if( len_expected ): 95 filtered_text = self.read_block( len_expected ) 96 else: 97 filtered_text = self.read_to_end() 98 return filtered_text
99
100 - def read_block( self, len_expected ):
101 102 len_filtered = 0 103 len_adjusted = len_expected - len( self._start_line ) 104 filtered_text = '' 105 while( len_filtered < len_expected ): 106 107 text_read = self._handle.read( len_adjusted ) 108 full_text = self._start_line + text_read 109 lines = full_text.splitlines( 1 ) 110 if( text_read == '' ): 111 filtered_text = filtered_text + self.filter( lines ) 112 break 113 else: 114 all_but_last_line = lines[ :-1 ] 115 self._start_line = lines[ -1 ] 116 filtered_text = filtered_text + self.filter( all_but_last_line ) 117 len_filtered_text = len( filtered_text ) 118 len_adjusted = len_adjusted - len_filtered_text 119 return filtered_text[ : ]
120
121 - def read_to_end( self ):
122 filtered_text = '' 123 text_read = self._handle.read() 124 full_text = self._start_line + text_read 125 lines = full_text.splitlines( 1 ) 126 filtered_text = filtered_text + self.filter( lines[:] ) 127 return filtered_text[ : ]
128 129 130 131 132 133
134 - def _get_len_expected( self, args, keywds ):
135 136 if( len( args) > 0 ): 137 len_expected = args[ 0 ] 138 if( len_expected < 0 ): 139 len_expected = None 140 elif( keywds.has_key( 'size' ) ): 141 len_expected = keywds[ 'size' ] 142 else: 143 len_expected = None 144 return len_expected
145
146 - def filter( self, lines ):
147 filter_chain = self.filter_chain 148 filtered_text = '' 149 for line in lines: 150 for filter in filter_chain: 151 line = filter( *( line, ) ) 152 filtered_text = filtered_text + line 153 154 return filtered_text
155
156 -def has_trailing_linefeed( line ):
157 if( line.endswith( chr( 13 ) ) or \ 158 line.endswith( chr( 10 ) ) ): 159 return 1 160 else: 161 return 0
162