Package Bio :: Module MaxEntropy
[hide private]
[frames] | no frames]

Source Code for Module Bio.MaxEntropy

  1  # Copyright 2001 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  Maximum Entropy code. 
  8   
  9  Uses Improved Iterative Scaling: 
 10  XXX ref 
 11   
 12  # XXX need to define terminology 
 13   
 14  """ 
 15   
 16  import numpy 
 17   
 18  #TODO - Remove this work around once we drop python 2.3 support 
 19  try: 
 20      set 
 21  except NameError: 
 22      from sets import Set as set 
 23   
 24  #These are module level defaults, which in theory users might change 
 25  #at run time to adjust the behaviour of the train function: 
 26  MAX_IIS_ITERATIONS = 10000    # Maximum iterations for IIS. 
 27  IIS_CONVERGE = 1E-5           # Convergence criteria for IIS. 
 28  MAX_NEWTON_ITERATIONS = 100   # Maximum iterations on Newton's method. 
 29  NEWTON_CONVERGE = 1E-10       # Convergence criteria for Newton's method. 
 30   
31 -class MaxEntropy:
32 """Holds information for a Maximum Entropy classifier. 33 34 Members: 35 classes List of the possible classes of data. 36 alphas List of the weights for each feature. 37 feature_fns List of the feature functions. 38 39 """
40 - def __init__(self):
41 self.classes = [] 42 self.alphas = [] 43 self.feature_fns = []
44
45 -def calculate(me, observation):
46 """calculate(me, observation) -> list of log probs 47 48 Calculate the log of the probability for each class. me is a 49 MaxEntropy object that has been trained. observation is a vector 50 representing the observed data. The return value is a list of 51 unnormalized log probabilities for each class. 52 53 """ 54 scores = [] 55 for klass in range(len(me.classes)): 56 lprob = 0.0 57 for fn, alpha in map(None, me.feature_fns, me.alphas): 58 lprob += fn(observation, klass) * alpha 59 scores.append(lprob) 60 return scores
61
62 -def classify(me, observation):
63 """classify(me, observation) -> class 64 65 Classify an observation into a class. 66 67 """ 68 scores = calculate(me, observation) 69 max_score, klass = scores[0], me.classes[0] 70 for i in range(1, len(scores)): 71 if scores[i] > max_score: 72 max_score, klass = scores[i], me.classes[i] 73 return klass
74
75 -def _eval_feature_fn(fn, xs, classes):
76 """_eval_feature_fn(fn, xs, classes) -> dict of values 77 78 Evaluate a feature function on every instance of the training set 79 and class. fn is a callback function that takes two parameters: a 80 training instance and a class. Return a dictionary of (training 81 set index, class index) -> non-zero value. Values of 0 are not 82 stored in the dictionary. 83 84 """ 85 values = {} 86 for i in range(len(xs)): 87 for j in range(len(classes)): 88 f = fn(xs[i], classes[j]) 89 if f != 0: 90 values[(i, j)] = f 91 return values
92
93 -def _calc_empirical_expects(xs, ys, classes, features):
94 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations 95 96 Calculate the expectation of each function from the data. This is 97 the constraint for the maximum entropy distribution. Return a 98 list of expectations, parallel to the list of features. 99 100 """ 101 # E[f_i] = SUM_x,y P(x, y) f(x, y) 102 # = 1/N f(x, y) 103 class2index = {} 104 for index, key in enumerate(classes): 105 class2index[key] = index 106 ys_i = [class2index[y] for y in ys] 107 108 expect = [] 109 N = len(xs) 110 for feature in features: 111 s = 0 112 for i in range(N): 113 s += feature.get((i, ys_i[i]), 0) 114 expect.append(float(s) / N) 115 return expect
116
117 -def _calc_model_expects(xs, classes, features, alphas):
118 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations. 119 120 Calculate the expectation of each feature from the model. This is 121 not used in maximum entropy training, but provides a good function 122 for debugging. 123 124 """ 125 # SUM_X P(x) SUM_Y P(Y|X) F(X, Y) 126 # = 1/N SUM_X SUM_Y P(Y|X) F(X, Y) 127 p_yx = _calc_p_class_given_x(xs, classes, features, alphas) 128 129 expects = [] 130 for feature in features: 131 sum = 0.0 132 for (i, j), f in feature.items(): 133 sum += p_yx[i][j] * f 134 expects.append(sum/len(xs)) 135 return expects
136
137 -def _calc_p_class_given_x(xs, classes, features, alphas):
138 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix 139 140 Calculate P(y|x), where y is the class and x is an instance from 141 the training set. Return a XSxCLASSES matrix of probabilities. 142 143 """ 144 prob_yx = numpy.zeros((len(xs), len(classes))) 145 146 # Calculate log P(y, x). 147 for feature, alpha in map(None, features, alphas): 148 for (x, y), f in feature.items(): 149 prob_yx[x][y] += alpha * f 150 # Take an exponent to get P(y, x) 151 prob_yx = numpy.exp(prob_yx) 152 # Divide out the probability over each class, so we get P(y|x). 153 for i in range(len(xs)): 154 z = sum(prob_yx[i]) 155 prob_yx[i] = prob_yx[i] / z 156 157 #prob_yx = [] 158 #for i in range(len(xs)): 159 # z = 0.0 # Normalization factor for this x, over all classes. 160 # probs = [0.0] * len(classes) 161 # for j in range(len(classes)): 162 # log_p = 0.0 # log of the probability of f(x, y) 163 # for k in range(len(features)): 164 # log_p += alphas[k] * features[k].get((i, j), 0.0) 165 # probs[j] = numpy.exp(log_p) 166 # z += probs[j] 167 # # Normalize the probabilities for this x. 168 # probs = map(lambda x, z=z: x/z, probs) 169 # prob_yx.append(probs) 170 return prob_yx
171
172 -def _calc_f_sharp(N, nclasses, features):
173 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values.""" 174 # f#(x, y) = SUM_i feature(x, y) 175 f_sharp = numpy.zeros((N, nclasses)) 176 for feature in features: 177 for (i, j), f in feature.items(): 178 f_sharp[i][j] += f 179 return f_sharp
180
181 -def _iis_solve_delta(N, feature, f_sharp, empirical, prob_yx):
182 # Solve delta using Newton's method for: 183 # SUM_x P(x) * SUM_c P(c|x) f_i(x, c) e^[delta_i * f#(x, c)] = 0 184 delta = 0.0 185 iters = 0 186 while iters < MAX_NEWTON_ITERATIONS: # iterate for Newton's method 187 f_newton = df_newton = 0.0 # evaluate the function and derivative 188 for (i, j), f in feature.items(): 189 prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j]) 190 f_newton += prod 191 df_newton += prod * f_sharp[i][j] 192 f_newton, df_newton = empirical - f_newton / N, -df_newton / N 193 194 ratio = f_newton / df_newton 195 delta -= ratio 196 if numpy.fabs(ratio) < NEWTON_CONVERGE: # converged 197 break 198 iters = iters + 1 199 else: 200 raise RuntimeError("Newton's method did not converge") 201 return delta
202
203 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical):
204 """Do one iteration of hill climbing to find better alphas (PRIVATE).""" 205 # This is a good function to parallelize. 206 207 # Pre-calculate P(y|x) 208 p_yx = _calc_p_class_given_x(xs, classes, features, alphas) 209 210 N = len(xs) 211 newalphas = alphas[:] 212 for i in range(len(alphas)): 213 delta = _iis_solve_delta(N, features[i], f_sharp, e_empirical[i], p_yx) 214 newalphas[i] += delta 215 return newalphas
216 217
218 -def train(training_set, results, feature_fns, update_fn=None):
219 """Train a maximum entropy classifier, returns MaxEntropy object. 220 221 Train a maximum entropy classifier on a training set. 222 training_set is a list of observations. results is a list of the 223 class assignments for each observation. feature_fns is a list of 224 the features. These are callback functions that take an 225 observation and class and return a 1 or 0. update_fn is a 226 callback function that's called at each training iteration. It is 227 passed a MaxEntropy object that encapsulates the current state of 228 the training. 229 230 """ 231 if not len(training_set): 232 raise ValueError("No data in the training set.") 233 if len(training_set) != len(results): 234 raise ValueError("training_set and results should be parallel lists.") 235 236 # Rename variables for convenience. 237 xs, ys = training_set, results 238 239 # Get a list of all the classes that need to be trained. 240 classes = list(set(results)) 241 classes.sort() 242 243 # Cache values for all features. 244 features = [_eval_feature_fn(fn, training_set, classes) 245 for fn in feature_fns] 246 # Cache values for f#. 247 f_sharp = _calc_f_sharp(len(training_set), len(classes), features) 248 249 # Pre-calculate the empirical expectations of the features. 250 e_empirical = _calc_empirical_expects(xs, ys, classes, features) 251 252 # Now train the alpha parameters to weigh each feature. 253 alphas = [0.0] * len(features) 254 iters = 0 255 while iters < MAX_IIS_ITERATIONS: 256 nalphas = _train_iis(xs, classes, features, f_sharp, 257 alphas, e_empirical) 258 diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas) 259 diff = reduce(lambda x, y: x+y, diff, 0) 260 alphas = nalphas 261 262 me = MaxEntropy() 263 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns 264 if update_fn is not None: 265 update_fn(me) 266 267 if diff < IIS_CONVERGE: # converged 268 break 269 else: 270 raise RuntimeError("IIS did not converge") 271 272 return me
273 274 275 if __name__ == "__main__" : 276 #Car data from example Naive Bayes Classifier example by Eric Meisner November 22, 2003 277 #http://www.inf.u-szeged.hu/~ormandi/teaching/mi2/02-naiveBayes-example.pdf 278 279 xcar=[ 280 ['Red', 'Sports', 'Domestic'], 281 ['Red', 'Sports', 'Domestic'], 282 ['Red', 'Sports', 'Domestic'], 283 ['Yellow', 'Sports', 'Domestic'], 284 ['Yellow', 'Sports', 'Imported'], 285 ['Yellow', 'SUV', 'Imported'], 286 ['Yellow', 'SUV', 'Imported'], 287 ['Yellow', 'SUV', 'Domestic'], 288 ['Red', 'SUV', 'Imported'], 289 ['Red', 'Sports', 'Imported'] 290 ] 291 292 ycar=[ 293 'Yes', 294 'No', 295 'Yes', 296 'No', 297 'Yes', 298 'No', 299 'Yes', 300 'No', 301 'No', 302 'Yes' 303 ] 304 305 #Requires some rules or features
306 - def udf1(ts, cl):
307 if ts[0] =='Red': 308 return 0 309 else: 310 return 1
311
312 - def udf2(ts, cl):
313 if ts[1] =='Sports': 314 return 0 315 else: 316 return 1
317
318 - def udf3(ts, cl):
319 if ts[2] =='Domestic': 320 return 0 321 else: 322 return 1
323 324 user_functions=[udf1, udf2, udf3] # must be an iterable type 325 326 xe=train(xcar, ycar, user_functions) 327 for xv,yv in zip(xcar, ycar): 328 xc=classify(xe, xv) 329 print 'Pred:', xv, 'gives', xc, 'y is', yv 330