1
2
3
4 import sys
5 import ProtParamData, IsoelectricPoint
6 from ProtParamData import kd
7 from Bio.Seq import Seq
8 from Bio.Alphabet import IUPAC
9 from Bio.Data import IUPACData
10
11
13 """
14 This class contains methods for protein analysis. The class init method takes
15 only one argument, the protein sequence as a string and build a sequence
16 object using the Bio.Seq module. This is done just to make sure the sequence
17 is a protein sequence and not anything else.
18
19 methods:
20
21 count_amino_acids:
22
23 Simply counts the number times an amino acid is repeated in the protein
24 sequence. Returns a dictionary {AminoAcid:Number} and also stores the
25 dictionary in self.amino_acids_content.
26
27 get_amino_acids_percent:
28
29 The same as count_amino_acids only returns the Number in percentage of entire
30 sequence. Returns a dictionary and stores the dictionary in
31 self.amino_acids_content_percent.
32
33 molecular_weight:
34 Calculates the molecular weight of a protein.
35
36 aromaticity:
37
38 Calculates the aromaticity value of a protein according to Lobry, 1994. It is
39 simply the relative frequency of Phe+Trp+Tyr.
40
41
42 instability_index:
43
44 Implementation of the method of Guruprasad et al. (Protein Engineering
45 4:155-161,1990). This method tests a protein for stability. Any value above 40
46 means the protein is unstable (=has a short half life).
47
48 flexibility:
49 Implementation of the flexibility method of Vihinen et al. (Proteins. 1994 Jun;19(2):141-9).
50
51 isoelectric_point:
52 This method uses the module IsoelectricPoint to calculate the pI of a protein.
53
54 secondary_structure_fraction:
55 This methods returns a list of the fraction of amino acids which tend to be in Helix, Turn or Sheet.
56 Amino acids in helix: V, I, Y, F, W, L.
57 Amino acids in Turn: N, P, G, S.
58 Amino acids in sheet: E, M, A, L.
59 The list contains 3 values: [Helix, Turn, Sheet].
60
61
62 protein_scale(Scale, WindwonSize, Edge):
63
64 An amino acid scale is defined by a numerical value assigned to each type of
65 amino acid. The most frequently used scales are the hydrophobicity or
66 hydrophilicity scales and the secondary structure conformational parameters
67 scales, but many other scales exist which are based on different chemical and
68 physical properties of the amino acids. You can set several parameters that
69 control the computation of a scale profile, such as the window size and the
70 window edge relative weight value. WindowSize: The window size is the length
71 of the interval to use for the profile computation. For a window size n, we
72 use the i- ( n-1)/2 neighboring residues on each side of residue it compute
73 the score for residue i. The score for residue is the sum of the scale values
74 for these amino acids, optionally weighted according to their position in the
75 window. Edge: The central amino acid of the window always has a weight of 1.
76 By default, the amino acids at the remaining window positions have the same
77 weight, but you can make the residue at the center of the window have a
78 larger weight than the others by setting the edge value for the residues at
79 the beginning and end of the interval to a value between 0 and 1. For
80 instance, for Edge=0.4 and a window size of 5 the weights will be: 0.4, 0.7,
81 1.0, 0.7, 0.4. The method returns a list of values which can be plotted to
82 view the change along a protein sequence. Many scales exist. Just add your
83 favorites to the ProtParamData modules.
84 """
93
100
101 """Calculate the amino acid content in percents.
102 input is the dictionary from CountAA.
103 output is a dictionary with AA as keys."""
105 if not self.amino_acids_content:
106 self.count_amino_acids()
107
108 PercentAA = {}
109 for i in self.amino_acids_content.keys():
110 if self.amino_acids_content[i] > 0:
111 PercentAA[i]=self.amino_acids_content[i]/float(self.length)
112 else:
113 PercentAA[i] = 0
114 self.amino_acids_percent = PercentAA
115 return PercentAA
116
117
118
129
130
131
133 if not self.amino_acids_percent:
134 self.get_amino_acids_percent()
135
136 Arom= self.amino_acids_percent['Y']+self.amino_acids_percent['W']+self.amino_acids_percent['F']
137 return Arom
138
139
140
149
150
151
152
166
167
169 ProtGravy=0.0
170 for i in self.sequence:
171 ProtGravy += kd[i]
172
173 return ProtGravy/self.length
174
175
176
177
178
180 unit = ((1.0-edge)/(window-1))*2
181 list = [0.0]*(window/2)
182 for i in range(window/2):
183 list[i] = edge + unit * i
184 return list
185
186
187
188
189
190
191
193
194 weight = self._weight_list(Window,Edge)
195 list = []
196
197 sum_of_weights = 0.0
198 for i in weight: sum_of_weights += i
199
200 sum_of_weights = sum_of_weights*2+1
201
202 for i in range(self.length-Window+1):
203 subsequence = self.sequence[i:i+Window]
204 score = 0.0
205 for j in range(Window/2):
206
207
208 try:
209 score += weight[j] * ParamDict[subsequence[j]] + weight[j] * ParamDict[subsequence[Window-j-1]]
210 except KeyError:
211 sys.stderr.write('warning: %s or %s is not a standard amino acid.\n' %
212 (subsequence[j],subsequence[Window-j-1]))
213
214
215 if subsequence[Window/2] in ParamDict:
216 score += ParamDict[subsequence[Window/2]]
217 else:
218 sys.stderr.write('warning: %s is not a standard amino acid.\n' % (subsequence[Window/2]))
219
220 list.append(score/sum_of_weights)
221 return list
222
223
229
230
232 if not self.amino_acids_percent:
233 self.get_amino_acids_percent()
234 Helix = self.amino_acids_percent['V'] + self.amino_acids_percent['I'] + self.amino_acids_percent['Y'] + self.amino_acids_percent['F'] + self.amino_acids_percent['W'] + self.amino_acids_percent['L']
235 Turn = self.amino_acids_percent['N'] + self.amino_acids_percent['P'] + self.amino_acids_percent['G'] + self.amino_acids_percent['S']
236 Sheet = self.amino_acids_percent['E'] + self.amino_acids_percent['M'] + self.amino_acids_percent['A'] + self.amino_acids_percent['L']
237 return Helix, Turn, Sheet
238
239
240 """
241 X = ProteinAnalysis("MAEGEITTFTALTEKFNLPPGNYKKPKLLYCSNGGHFLRILPDGTVDGTRDRSDQHIQLQLSAESVGEVYIKSTETGQYLAMDTSGLLYGSQTPSEECLFLERLEENHYNTYTSKKHAEKNWFVGLKKNGSCKRGPRTHYGQKAILFLPLPV")
242 print X.count_amino_acids()
243 print X.get_amino_acids_percent()
244 print X.molecular_weight()
245 print X.aromaticity()
246 print X.instability_index()
247 print X.flexibility()
248 print X.pi()
249 print X.secondary_structure_fraction()
250 print X.protein_scale(ProtParamData.kd, 9, 0.4)
251 """
252