Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package.   
  5   
  6  # Python stuff 
  7  import sys 
  8   
  9  import numpy 
 10   
 11  # My stuff 
 12  from StructureBuilder import StructureBuilder 
 13  from PDBExceptions import PDBConstructionException 
 14  from parse_pdb_header import _parse_pdb_header_list 
 15   
 16  __doc__="Parser for PDB files." 
 17   
 18   
 19  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 20   
 21   
22 -class PDBParser:
23 """ 24 Parse a PDB file and return a Structure object. 25 """ 26
27 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
28 """ 29 The PDB parser call a number of standard methods in an aggregated 30 StructureBuilder object. Normally this object is instanciated by the 31 PDBParser object itself, but if the user provides his own StructureBuilder 32 object, the latter is used instead. 33 34 Arguments: 35 o PERMISSIVE - int, if this is 0 exceptions in constructing the 36 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are 37 caught, but some residues or atoms will be missing. THESE EXCEPTIONS 38 ARE DUE TO PROBLEMS IN THE PDB FILE!. 39 o structure_builder - an optional user implemented StructureBuilder class. 40 """ 41 if structure_builder!=None: 42 self.structure_builder=structure_builder 43 else: 44 self.structure_builder=StructureBuilder() 45 self.header=None 46 self.trailer=None 47 self.line_counter=0 48 self.PERMISSIVE=PERMISSIVE
49 50 # Public methods 51
52 - def get_structure(self, id, file):
53 """Return the structure. 54 55 Arguments: 56 o id - string, the id that will be used for the structure 57 o file - name of the PDB file OR an open filehandle 58 """ 59 self.header=None 60 self.trailer=None 61 # Make a StructureBuilder instance (pass id of structure as parameter) 62 self.structure_builder.init_structure(id) 63 if isinstance(file, basestring): 64 file=open(file) 65 self._parse(file.readlines()) 66 self.structure_builder.set_header(self.header) 67 # Return the Structure instance 68 return self.structure_builder.get_structure()
69
70 - def get_header(self):
71 "Return the header." 72 return self.header
73
74 - def get_trailer(self):
75 "Return the trailer." 76 return self.trailer
77 78 # Private methods 79
80 - def _parse(self, header_coords_trailer):
81 "Parse the PDB file." 82 # Extract the header; return the rest of the file 83 self.header, coords_trailer=self._get_header(header_coords_trailer) 84 # Parse the atomic data; return the PDB file trailer 85 self.trailer=self._parse_coordinates(coords_trailer)
86
87 - def _get_header(self, header_coords_trailer):
88 "Get the header of the PDB file, return the rest." 89 structure_builder=self.structure_builder 90 for i in range(0, len(header_coords_trailer)): 91 structure_builder.set_line_counter(i+1) 92 line=header_coords_trailer[i] 93 record_type=line[0:6] 94 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '): 95 break 96 header=header_coords_trailer[0:i] 97 # Return the rest of the coords+trailer for further processing 98 self.line_counter=i 99 coords_trailer=header_coords_trailer[i:] 100 header_dict=_parse_pdb_header_list(header) 101 return header_dict, coords_trailer
102
103 - def _parse_coordinates(self, coords_trailer):
104 "Parse the atomic data in the PDB file." 105 local_line_counter=0 106 structure_builder=self.structure_builder 107 current_model_id=0 108 # Flag we have an open model 109 model_open=0 110 current_chain_id=None 111 current_segid=None 112 current_residue_id=None 113 current_resname=None 114 for i in range(0, len(coords_trailer)): 115 line=coords_trailer[i] 116 record_type=line[0:6] 117 global_line_counter=self.line_counter+local_line_counter+1 118 structure_builder.set_line_counter(global_line_counter) 119 if(record_type=='ATOM ' or record_type=='HETATM'): 120 # Initialize the Model - there was no explicit MODEL record 121 if not model_open: 122 structure_builder.init_model(current_model_id) 123 current_model_id+=1 124 model_open=1 125 fullname=line[12:16] 126 # get rid of whitespace in atom names 127 split_list=fullname.split() 128 if len(split_list)!=1: 129 # atom name has internal spaces, e.g. " N B ", so 130 # we do not strip spaces 131 name=fullname 132 else: 133 # atom name is like " CA ", so we can strip spaces 134 name=split_list[0] 135 altloc=line[16:17] 136 resname=line[17:20] 137 chainid=line[21:22] 138 try: 139 serial_number=int(line[6:11]) 140 except: 141 serial_number=0 142 resseq=int(line[22:26].split()[0]) # sequence identifier 143 icode=line[26:27] # insertion code 144 if record_type=='HETATM': # hetero atom flag 145 if resname=="HOH" or resname=="WAT": 146 hetero_flag="W" 147 else: 148 hetero_flag="H" 149 else: 150 hetero_flag=" " 151 residue_id=(hetero_flag, resseq, icode) 152 # atomic coordinates 153 try : 154 x=float(line[30:38]) 155 y=float(line[38:46]) 156 z=float(line[46:54]) 157 except : 158 #Should we allow parsing to continue in permissive mode? 159 #If so what coordindates should we default to? Easier to abort! 160 raise PDBContructionError("Invalid or missing coordinate(s) at line %i." \ 161 % global_line_counter) 162 coord=numpy.array((x, y, z), 'f') 163 # occupancy & B factor 164 try : 165 occupancy=float(line[54:60]) 166 except : 167 self._handle_PDB_exception("Invalid or missing occupancy", 168 global_line_counter) 169 occupancy = 0.0 #Is one or zero a good default? 170 try : 171 bfactor=float(line[60:66]) 172 except : 173 self._handle_PDB_exception("Invalid or missing B factor", 174 global_line_counter) 175 bfactor = 0.0 #The PDB use a default of zero if the data is missing 176 segid=line[72:76] 177 if current_segid!=segid: 178 current_segid=segid 179 structure_builder.init_seg(current_segid) 180 if current_chain_id!=chainid: 181 current_chain_id=chainid 182 structure_builder.init_chain(current_chain_id) 183 current_residue_id=residue_id 184 current_resname=resname 185 try: 186 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 187 except PDBConstructionException, message: 188 self._handle_PDB_exception(message, global_line_counter) 189 elif current_residue_id!=residue_id or current_resname!=resname: 190 current_residue_id=residue_id 191 current_resname=resname 192 try: 193 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 194 except PDBConstructionException, message: 195 self._handle_PDB_exception(message, global_line_counter) 196 # init atom 197 try: 198 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number) 199 except PDBConstructionException, message: 200 self._handle_PDB_exception(message, global_line_counter) 201 elif(record_type=='ANISOU'): 202 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) 203 # U's are scaled by 10^4 204 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f') 205 structure_builder.set_anisou(anisou_array) 206 elif(record_type=='MODEL '): 207 structure_builder.init_model(current_model_id) 208 current_model_id+=1 209 model_open=1 210 current_chain_id=None 211 current_residue_id=None 212 elif(record_type=='END ' or record_type=='CONECT'): 213 # End of atomic data, return the trailer 214 self.line_counter=self.line_counter+local_line_counter 215 return coords_trailer[local_line_counter:] 216 elif(record_type=='ENDMDL'): 217 model_open=0 218 current_chain_id=None 219 current_residue_id=None 220 elif(record_type=='SIGUIJ'): 221 # standard deviation of anisotropic B factor 222 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) 223 # U sigma's are scaled by 10^4 224 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f') 225 structure_builder.set_siguij(siguij_array) 226 elif(record_type=='SIGATM'): 227 # standard deviation of atomic positions 228 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66])) 229 sigatm_array=numpy.array(sigatm, 'f') 230 structure_builder.set_sigatm(sigatm_array) 231 local_line_counter=local_line_counter+1 232 # EOF (does not end in END or CONECT) 233 self.line_counter=self.line_counter+local_line_counter 234 return []
235
236 - def _handle_PDB_exception(self, message, line_counter):
237 """ 238 This method catches an exception that occurs in the StructureBuilder 239 object (if PERMISSIVE==1), or raises it again, this time adding the 240 PDB line number to the error message. 241 """ 242 message="%s at line %i." % (message, line_counter) 243 if self.PERMISSIVE: 244 # just print a warning - some residues/atoms may be missing 245 print "PDBConstructionException: %s" % message 246 print "Exception ignored.\nSome atoms or residues may be missing in the data structure." 247 else: 248 # exceptions are fatal - raise again with new message (including line nr) 249 raise PDBConstructionException(message)
250 251 252 if __name__=="__main__": 253 254 import sys 255 256 p=PDBParser(PERMISSIVE=1) 257 258 filename = sys.argv[1] 259 s=p.get_structure("scr", filename) 260 261 for m in s: 262 p=m.get_parent() 263 assert(p is s) 264 for c in m: 265 p=c.get_parent() 266 assert(p is m) 267 for r in c: 268 print r 269 p=r.get_parent() 270 assert(p is c) 271 for a in r: 272 p=a.get_parent() 273 if not p is r: 274 print p, r 275