Package Bio :: Package FSSP
[hide private]
[frames] | no frames]

Source Code for Package Bio.FSSP

  1  import string 
  2  import re 
  3  import fssp_rec 
  4  from Bio.Align import Generic 
  5  from Bio import Alphabet 
  6  fff_rec = fssp_rec.fff_rec 
  7  """ 
  8  A module to handle FSSP files. For now it parses only the header, summary and alignment 
  9  sections. 
 10   
 11  functions: read_fssp(file_handle): reads an fssp file into the records. Returns a 
 12  tuple of two instances. 
 13  mult_align: returns a Biopyton alignment object 
 14  """ 
 15  header_records = { 
 16     'database' : re.compile('^DATABASE'), 
 17     'pdbid': re.compile('^PDBID'), 
 18     'header': re.compile('^HEADER'), 
 19     'compnd': re.compile('^COMPND'), 
 20     'author': re.compile('^AUTHOR'), 
 21     'source': re.compile('^SOURCE'), 
 22     'seqlength': re.compile('^SEQLENGTH'), 
 23     'nalign': re.compile('^NALIGN') 
 24  } 
 25   
 26  summary_title = re.compile('## +SUMMARY') 
 27  summary_rec = re.compile(' *[0-9]+: +[1-9][0-9a-z]{3,3}') 
 28  alignments_title= re.compile('## +ALIGNMENTS') 
 29  alignments_rec = re.compile(' *[0-9]+ +-{0,1}[0-9]+') 
 30  equiv_title = re.compile('## +EQUIVALENCES') 
 31   
32 -class FSSPHeader:
33 - def __init__(self):
34 self.database = None 35 self.pdbid = '' 36 self.header = '' 37 self.compnd = '' 38 self.source = '' 39 self.author = [] 40 self.seqlength = 0 41 self.nalign = 0
42 - def fill_header(self,inline):
43 for i in header_records.keys(): 44 if header_records[i].match(inline): 45 if i == 'database' or i == 'seqlength' or i == 'nalign': 46 setattr(self,i,int(string.split(inline)[1])) 47 elif i == 'compnd' or i == 'author': 48 setattr(self,i,string.split(inline)[1:]) 49 elif i == 'source' or i == 'header': 50 attr = inline[inline.find(' ')+1:].strip() 51 setattr(self,i,attr) 52 else: 53 setattr(self,i,string.split(inline)[1])
54
55 -class PosAlign:
56 - def __init__(self,inStr):
57 inStr = string.strip(inStr) 58 if len(inStr) <> 1 and len(inStr)<> 2: 59 raise ValueError, 'PosAlign: length not 2 chars' + inStr 60 if inStr == '..': 61 self.aa = '-' 62 self.gap = 1 63 else: 64 self.gap = 0 65 self.aa = inStr[0] 66 if self.aa == string.lower(self.aa): 67 self.aa = 'C' 68 if len(inStr) == 2: 69 self.ss = string.upper(inStr[1]) 70 else: 71 self.ss = '0'
72
73 - def __repr__(self):
74 if self.gap: 75 outstring = '..' 76 else: 77 outstring = self.aa+string.lower(self.ss) 78 return outstring
79 80 __str__ = __repr__
81 82 83 84
85 -class FSSPSumRec:
86 """ Contains info from an FSSP summary record"""
87 - def __init__(self,in_str):
88 self.raw = in_str 89 in_rec = string.split(string.strip(in_str)) 90 # print in_rec 91 self.nr = string.atoi(in_rec[0][:-1]) 92 self.pdb1 = in_rec[1][:4] 93 if len(in_rec[1]) == 4: 94 self.chain1='0' 95 elif len(in_rec[1]) == 5: 96 self.chain1=in_rec[1][4] 97 else: 98 raise ValueError, 'Bad PDB ID 1' 99 self.pdb2 = in_rec[2][:4] 100 if len(in_rec[2]) == 4: 101 self.chain2='0' 102 elif len(in_rec[2]) == 5: 103 self.chain2=in_rec[2][4] 104 else: 105 raise ValueError, 'Bad PDB ID 2' 106 self.zscore = string.atof(in_rec[3]) 107 self.rmsd = string.atof(in_rec[4]) 108 self.lali = string.atof(in_rec[5]) 109 self.lseq2 = string.atof(in_rec[6]) 110 self.pID = string.atof(in_rec[7]) 111 self.revers = string.atoi(in_rec[8]) 112 self.permut = string.atoi(in_rec[9]) 113 self.nfrag = string.atoi(in_rec[10]) 114 self.topo = in_rec[11] 115 self.doc = '' 116 for i in in_rec[12:]: 117 self.doc = self.doc + i + ' ' 118 self.doc = string.rstrip(self.doc) + '\n'
119
120 - def __repr__(self):
121 return self.raw
122 __str__ = __repr__
123
124 -class FSSPAlignRec:
125 - def __init__(self,in_fff_rec):
126 # print in_fff_rec 127 self.abs_res_num = string.atoi(in_fff_rec[fssp_rec.align.abs_res_num]) 128 self.pdb_res_num = string.strip(in_fff_rec[fssp_rec.align.pdb_res_num]) 129 self.chain_id = in_fff_rec[fssp_rec.align.chain_id] 130 if self.chain_id == ' ': 131 self.chain_id = '0' 132 self.res_name = in_fff_rec[fssp_rec.align.res_name] 133 if self.res_name == string.lower(self.res_name): 134 self.res_name = 'C' 135 self.ss1 = in_fff_rec[fssp_rec.align.ss1] 136 self.turn3 = in_fff_rec[fssp_rec.align.turn3] 137 self.turn4 = in_fff_rec[fssp_rec.align.turn4] 138 self.turn5 = in_fff_rec[fssp_rec.align.turn5] 139 self.pos_align_dict = {} 140 self.PosAlignList = []
141 - def add_align_list(self,align_list):
142 for i in align_list: 143 self.PosAlignList.append(PosAlign(i))
144 - def pos_align_list2dict(self):
145 j = 1 146 for i in self.PosAlignList: 147 self.pos_align_dict[j] = i 148 j = j + 1
149 150
151 -class FSSPAlignDict(dict):
152 - def __init__(self):
153 # The following two dictionaries are pointers to records in self 154 # The first dictionary is a "pdb_residue_number: self_key" 155 # The second dictionary is a "absolute_residue_number: self_key" 156 self.pdb_res_dict = {} 157 self.abs_res_dict = {} 158 self.data = {}
159 - def build_resnum_list(self):
160 for i in self.keys(): 161 self.abs_res_dict[self[i].abs_res_num] = i 162 self.pdb_res_dict[self[i].pdb_res_num] = i
163 # Given an absolute residue number & chain, returns the relevant fssp 164 # record
165 - def abs(self,num):
166 return self[self.abs_res_dict[num]]
167 # Given an PDB residue number & chain, returns the relevant fssp 168 # record
169 - def pdb(self,num):
170 return self[self.pdb_res_dict[num]]
171 # Returns a sequence string 172
173 - def sequence(self,num):
174 s = '' 175 sorted_pos_nums = self.abs_res_dict.keys() 176 sorted_pos_nums.sort() 177 for i in sorted_pos_nums: 178 s += self.abs(i).pos_align_dict[num].aa 179 return s
180
181 - def fasta_mult_align(self):
182 mult_align_dict = {} 183 for j in self.abs(1).pos_align_dict.keys(): 184 mult_align_dict[j] = '' 185 for fssp_rec in self.values(): 186 for j in fssp_rec.pos_align_dict.keys(): 187 mult_align_dict[j] += fssp_rec.pos_align_dict[j].aa 188 seq_order = mult_align_dict.keys() 189 seq_order.sort() 190 out_str = '' 191 for i in seq_order: 192 out_str += '> %d\n' % i 193 k = 0 194 for j in mult_align_dict[i]: 195 k += 1 196 if k % 72 == 0: 197 out_str += '\n' 198 out_str += j 199 out_str += '\n' 200 return out_str
201
202 -class FSSPSumDict(dict):
203 pass
204 205 # 206 # Process a fssp file into its constituents. Return a 2-tuple containing 207 # a list of FSSPSumRecs and a dictionary of alignment records. 208 #
209 -def read_fssp(fssp_handle):
210 header = FSSPHeader() 211 sum_dict = FSSPSumDict() 212 align_dict = FSSPAlignDict() 213 # fssp_handle=open(fssp_handlename) 214 curline = fssp_handle.readline() 215 while not summary_title.match(curline): 216 # Still in title 217 header.fill_header(curline) 218 curline = fssp_handle.readline() 219 220 if not summary_title.match(curline): 221 raise ValueError,'Bad FSSP file: no summary record found' 222 curline = fssp_handle.readline() #Read the title line, discard 223 curline = fssp_handle.readline() #Read the next line 224 # Process the summary records into a list 225 while summary_rec.match(curline): 226 cur_sum_rec = FSSPSumRec(curline) 227 sum_dict[cur_sum_rec.nr] = cur_sum_rec 228 curline = fssp_handle.readline() 229 230 # Outer loop: process everything up to the EQUIVALENCES title record 231 while not equiv_title.match(curline): 232 while (not alignments_title.match(curline) and 233 not equiv_title.match(curline)): 234 curline = fssp_handle.readline() 235 if not alignments_title.match(curline): 236 if equiv_title.match(curline): 237 # print "Reached equiv_title" 238 break 239 else: 240 raise ValueError,'Bad FSSP file: no alignments title record found' 241 242 if equiv_title.match(curline): 243 break 244 # If we got to this point, this means that we have matched an 245 # alignments title. Parse the alignment records in a loop. 246 curline = fssp_handle.readline() #Read the title line, discard 247 curline = fssp_handle.readline() #Read the next line 248 while alignments_rec.match(curline): 249 align_rec = FSSPAlignRec(fff_rec(curline)) 250 key = align_rec.chain_id+align_rec.res_name+str(align_rec.pdb_res_num) 251 align_list = string.split(curline[fssp_rec.align.start_aa_list:]) 252 if not align_dict.has_key(key): 253 align_dict[key] = align_rec 254 align_dict[key].add_align_list(align_list) 255 curline = fssp_handle.readline() 256 if not curline: 257 print 'EOFEOFEOF' 258 raise EOFError 259 for i in align_dict.values(): 260 i.pos_align_list2dict() 261 del i.PosAlignList 262 align_dict.build_resnum_list() 263 return (header, sum_dict, align_dict)
264