Package Bio :: Module FilteredReader
[hide private]
[frames] | no frames]

Source Code for Module Bio.FilteredReader

  1  # Copyright 2001 by Katharine Lindner.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code for more fancy file handles (OBSOLETE). 
  7   
  8  Classes: 
  9  Filtered is a decorator for File that allows the user to filter the output 
 10  on a line by line basis. 
 11   
 12  The FilteredReader module reads a file and applies a sequence of filters to the input 
 13  The constructor sets a default filter chain, but the user can select another filter by setting 
 14  Bio.FilteredReader.filter_chain. 
 15   
 16  handle = open( "filename" ) 
 17  filtered_reader = Bio.FilteredReader( handle ) 
 18  filtered_reader.filter_chain = [ remove_asterisks, replace_dot_with_dash ] 
 19  filtered_reasder.read() 
 20   
 21  All filters in the chain must provide the same interface with a line of text as the single 
 22  input parameter and altered text as the return value. 
 23   
 24  This module is now considered to be obsolete, and is likely to be deprecated 
 25  in a future release of Biopython, and later removed. 
 26  """ 
 27   
 28   
29 -def dump_saved( name, text, j ):
30 """Used for debugging.""" 31 dump_file = open( name + '%d' % j, "w" ) 32 k = 0 33 for i in range ( 0, len( text ), 80 ): 34 dump_file.write( '%s\n' % text[ i : i + 80 ] ) 35 dump_file.close()
36
37 -def remove_leading_whitespace( line ):
38 return line.lstrip()
39 40
41 -def remove_empty_line( line ):
42 stripped_line = line.strip() 43 if( stripped_line ): 44 return line[ : ] 45 else: 46 return ''
47
48 -def remove_useless_dot( line ):
49 before = line 50 while( 1 ): 51 after = before.replace( "\t.\t", "\t\t" ) 52 if( len( before ) == len( after ) ): 53 break 54 before = after 55 if( after.endswith( '.' ) ): 56 after = after[ :-1 ] 57 return after
58
59 -def fix_punctuation( line ):
60 line = line.replace( "'", '' ) 61 line = line.replace( '"', '' ) 62 line = line.replace( ';', '\t' ) 63 line = line.replace( 'entryname', 'id' ) 64 # line = line.lower( ) 65 if( line ): 66 return line[ : ] 67 else: 68 return ''
69 70 71
72 -class FilteredReader:
73 - def __init__(self, handle ):
74 self._handle = handle 75 self._start_line = '' 76 self._debug_count = 0 77 self.filter_chain = [ remove_empty_line, remove_useless_dot, fix_punctuation ]
78
79 - def __getattr__(self, attr):
80 return getattr(self._handle, attr)
81 82 83
84 - def close(self, *args, **keywds ):
85 return self._handle.close( *args, **keywds)
86
87 - def read( self, *args, **keywds ):
88 line = '' 89 len_expected = self._get_len_expected( args, keywds ) 90 if( len_expected ): 91 filtered_text = self.read_block( len_expected ) 92 else: 93 filtered_text = self.read_to_end() 94 return filtered_text
95
96 - def read_block( self, len_expected ):
97 98 len_filtered = 0 99 len_adjusted -= len( self._start_line ) 100 filtered_text = '' 101 while( len_filtered < len_expected ): 102 103 text_read = self._handle.read( len_adjusted ) 104 full_text = self._start_line + text_read 105 lines = full_text.splitlines( 1 ) 106 if( text_read == '' ): 107 filtered_text = filtered_text + self.filter( lines ) 108 break 109 else: 110 all_but_last_line = lines[ :-1 ] 111 self._start_line = lines[ -1 ] 112 filtered_text = filtered_text + self.filter( all_but_last_line ) 113 len_filtered_text = len( filtered_text ) 114 len_adjusted = len_adjusted - len_filtered_text 115 return filtered_text[ : ]
116
117 - def read_to_end( self ):
118 filtered_text = '' 119 text_read = self._handle.read() 120 full_text = self._start_line + text_read 121 lines = full_text.splitlines( 1 ) 122 filtered_text += self.filter( lines[:] ) 123 return filtered_text[ : ]
124
125 - def _get_len_expected( self, args, keywds ):
126 127 if( len( args) > 0 ): 128 len_expected = args[ 0 ] 129 if( len_expected < 0 ): 130 len_expected = None 131 elif 'size' in keywds: 132 len_expected = keywds['size'] 133 else: 134 len_expected = None 135 return len_expected
136
137 - def filter( self, lines ):
138 filter_chain = self.filter_chain 139 filtered_text = '' 140 for line in lines: 141 for filter in filter_chain: 142 line = filter( *( line, ) ) 143 filtered_text += line 144 145 return filtered_text
146
147 -def has_trailing_linefeed( line ):
148 if( line.endswith( chr( 13 ) ) or \ 149 line.endswith( chr( 10 ) ) ): 150 return 1 151 else: 152 return 0
153