1
2
3
4
5
6
7 import sys
8
9 import numpy
10
11
12 from StructureBuilder import StructureBuilder
13 from PDBExceptions import PDBConstructionException
14 from parse_pdb_header import _parse_pdb_header_list
15
16 __doc__="Parser for PDB files."
17
18
19
20
21
23 """
24 Parse a PDB file and return a Structure object.
25 """
26
27 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
28 """
29 The PDB parser call a number of standard methods in an aggregated
30 StructureBuilder object. Normally this object is instanciated by the
31 PDBParser object itself, but if the user provides his own StructureBuilder
32 object, the latter is used instead.
33
34 Arguments:
35 o PERMISSIVE - int, if this is 0 exceptions in constructing the
36 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are
37 caught, but some residues or atoms will be missing. THESE EXCEPTIONS
38 ARE DUE TO PROBLEMS IN THE PDB FILE!.
39 o structure_builder - an optional user implemented StructureBuilder class.
40 """
41 if structure_builder!=None:
42 self.structure_builder=structure_builder
43 else:
44 self.structure_builder=StructureBuilder()
45 self.header=None
46 self.trailer=None
47 self.line_counter=0
48 self.PERMISSIVE=PERMISSIVE
49
50
51
53 """Return the structure.
54
55 Arguments:
56 o id - string, the id that will be used for the structure
57 o file - name of the PDB file OR an open filehandle
58 """
59 self.header=None
60 self.trailer=None
61
62 self.structure_builder.init_structure(id)
63 if isinstance(file, basestring):
64 file=open(file)
65 self._parse(file.readlines())
66 self.structure_builder.set_header(self.header)
67
68 return self.structure_builder.get_structure()
69
71 "Return the header."
72 return self.header
73
75 "Return the trailer."
76 return self.trailer
77
78
79
80 - def _parse(self, header_coords_trailer):
86
88 "Get the header of the PDB file, return the rest."
89 structure_builder=self.structure_builder
90 for i in range(0, len(header_coords_trailer)):
91 structure_builder.set_line_counter(i+1)
92 line=header_coords_trailer[i]
93 record_type=line[0:6]
94 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '):
95 break
96 header=header_coords_trailer[0:i]
97
98 self.line_counter=i
99 coords_trailer=header_coords_trailer[i:]
100 header_dict=_parse_pdb_header_list(header)
101 return header_dict, coords_trailer
102
104 "Parse the atomic data in the PDB file."
105 local_line_counter=0
106 structure_builder=self.structure_builder
107 current_model_id=0
108
109 model_open=0
110 current_chain_id=None
111 current_segid=None
112 current_residue_id=None
113 current_resname=None
114 for i in range(0, len(coords_trailer)):
115 line=coords_trailer[i]
116 record_type=line[0:6]
117 global_line_counter=self.line_counter+local_line_counter+1
118 structure_builder.set_line_counter(global_line_counter)
119 if(record_type=='ATOM ' or record_type=='HETATM'):
120
121 if not model_open:
122 structure_builder.init_model(current_model_id)
123 current_model_id+=1
124 model_open=1
125 fullname=line[12:16]
126
127 split_list=fullname.split()
128 if len(split_list)!=1:
129
130
131 name=fullname
132 else:
133
134 name=split_list[0]
135 altloc=line[16:17]
136 resname=line[17:20]
137 chainid=line[21:22]
138 try:
139 serial_number=int(line[6:11])
140 except:
141 serial_number=0
142 resseq=int(line[22:26].split()[0])
143 icode=line[26:27]
144 if record_type=='HETATM':
145 if resname=="HOH" or resname=="WAT":
146 hetero_flag="W"
147 else:
148 hetero_flag="H"
149 else:
150 hetero_flag=" "
151 residue_id=(hetero_flag, resseq, icode)
152
153 try :
154 x=float(line[30:38])
155 y=float(line[38:46])
156 z=float(line[46:54])
157 except :
158
159
160 raise PDBContructionError("Invalid or missing coordinate(s) at line %i." \
161 % global_line_counter)
162 coord=numpy.array((x, y, z), 'f')
163
164 try :
165 occupancy=float(line[54:60])
166 except :
167 self._handle_PDB_exception("Invalid or missing occupancy",
168 global_line_counter)
169 occupancy = 0.0
170 try :
171 bfactor=float(line[60:66])
172 except :
173 self._handle_PDB_exception("Invalid or missing B factor",
174 global_line_counter)
175 bfactor = 0.0
176 segid=line[72:76]
177 if current_segid!=segid:
178 current_segid=segid
179 structure_builder.init_seg(current_segid)
180 if current_chain_id!=chainid:
181 current_chain_id=chainid
182 structure_builder.init_chain(current_chain_id)
183 current_residue_id=residue_id
184 current_resname=resname
185 try:
186 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
187 except PDBConstructionException, message:
188 self._handle_PDB_exception(message, global_line_counter)
189 elif current_residue_id!=residue_id or current_resname!=resname:
190 current_residue_id=residue_id
191 current_resname=resname
192 try:
193 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
194 except PDBConstructionException, message:
195 self._handle_PDB_exception(message, global_line_counter)
196
197 try:
198 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number)
199 except PDBConstructionException, message:
200 self._handle_PDB_exception(message, global_line_counter)
201 elif(record_type=='ANISOU'):
202 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70]))
203
204 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f')
205 structure_builder.set_anisou(anisou_array)
206 elif(record_type=='MODEL '):
207 structure_builder.init_model(current_model_id)
208 current_model_id+=1
209 model_open=1
210 current_chain_id=None
211 current_residue_id=None
212 elif(record_type=='END ' or record_type=='CONECT'):
213
214 self.line_counter=self.line_counter+local_line_counter
215 return coords_trailer[local_line_counter:]
216 elif(record_type=='ENDMDL'):
217 model_open=0
218 current_chain_id=None
219 current_residue_id=None
220 elif(record_type=='SIGUIJ'):
221
222 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70]))
223
224 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f')
225 structure_builder.set_siguij(siguij_array)
226 elif(record_type=='SIGATM'):
227
228 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
229 sigatm_array=numpy.array(sigatm, 'f')
230 structure_builder.set_sigatm(sigatm_array)
231 local_line_counter=local_line_counter+1
232
233 self.line_counter=self.line_counter+local_line_counter
234 return []
235
237 """
238 This method catches an exception that occurs in the StructureBuilder
239 object (if PERMISSIVE==1), or raises it again, this time adding the
240 PDB line number to the error message.
241 """
242 message="%s at line %i." % (message, line_counter)
243 if self.PERMISSIVE:
244
245 print "PDBConstructionException: %s" % message
246 print "Exception ignored.\nSome atoms or residues may be missing in the data structure."
247 else:
248
249 raise PDBConstructionException(message)
250
251
252 if __name__=="__main__":
253
254 import sys
255
256 p=PDBParser(PERMISSIVE=1)
257
258 filename = sys.argv[1]
259 s=p.get_structure("scr", filename)
260
261 for m in s:
262 p=m.get_parent()
263 assert(p is s)
264 for c in m:
265 p=c.get_parent()
266 assert(p is m)
267 for r in c:
268 print r
269 p=r.get_parent()
270 assert(p is c)
271 for a in r:
272 p=a.get_parent()
273 if not p is r:
274 print p, r
275