1
2
3
4
5
6 """
7 Maximum Entropy code.
8
9 Uses Improved Iterative Scaling:
10 XXX ref
11
12 # XXX need to define terminology
13
14 """
15
16 import numpy
17
18
19 try:
20 set
21 except NameError:
22 from sets import Set as set
23
24
25
26 MAX_IIS_ITERATIONS = 10000
27 IIS_CONVERGE = 1E-5
28 MAX_NEWTON_ITERATIONS = 100
29 NEWTON_CONVERGE = 1E-10
30
32 """Holds information for a Maximum Entropy classifier.
33
34 Members:
35 classes List of the possible classes of data.
36 alphas List of the weights for each feature.
37 feature_fns List of the feature functions.
38
39 """
41 self.classes = []
42 self.alphas = []
43 self.feature_fns = []
44
46 """calculate(me, observation) -> list of log probs
47
48 Calculate the log of the probability for each class. me is a
49 MaxEntropy object that has been trained. observation is a vector
50 representing the observed data. The return value is a list of
51 unnormalized log probabilities for each class.
52
53 """
54 scores = []
55 for klass in range(len(me.classes)):
56 lprob = 0.0
57 for fn, alpha in map(None, me.feature_fns, me.alphas):
58 lprob += fn(observation, klass) * alpha
59 scores.append(lprob)
60 return scores
61
74
76 """_eval_feature_fn(fn, xs, classes) -> dict of values
77
78 Evaluate a feature function on every instance of the training set
79 and class. fn is a callback function that takes two parameters: a
80 training instance and a class. Return a dictionary of (training
81 set index, class index) -> non-zero value. Values of 0 are not
82 stored in the dictionary.
83
84 """
85 values = {}
86 for i in range(len(xs)):
87 for j in range(len(classes)):
88 f = fn(xs[i], classes[j])
89 if f != 0:
90 values[(i, j)] = f
91 return values
92
94 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations
95
96 Calculate the expectation of each function from the data. This is
97 the constraint for the maximum entropy distribution. Return a
98 list of expectations, parallel to the list of features.
99
100 """
101
102
103 class2index = {}
104 for index, key in enumerate(classes):
105 class2index[key] = index
106 ys_i = [class2index[y] for y in ys]
107
108 expect = []
109 N = len(xs)
110 for feature in features:
111 s = 0
112 for i in range(N):
113 s += feature.get((i, ys_i[i]), 0)
114 expect.append(float(s) / N)
115 return expect
116
118 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations.
119
120 Calculate the expectation of each feature from the model. This is
121 not used in maximum entropy training, but provides a good function
122 for debugging.
123
124 """
125
126
127 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
128
129 expects = []
130 for feature in features:
131 sum = 0.0
132 for (i, j), f in feature.items():
133 sum += p_yx[i][j] * f
134 expects.append(sum/len(xs))
135 return expects
136
138 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix
139
140 Calculate P(y|x), where y is the class and x is an instance from
141 the training set. Return a XSxCLASSES matrix of probabilities.
142
143 """
144 prob_yx = numpy.zeros((len(xs), len(classes)))
145
146
147 for feature, alpha in map(None, features, alphas):
148 for (x, y), f in feature.items():
149 prob_yx[x][y] += alpha * f
150
151 prob_yx = numpy.exp(prob_yx)
152
153 for i in range(len(xs)):
154 z = sum(prob_yx[i])
155 prob_yx[i] = prob_yx[i] / z
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170 return prob_yx
171
173 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values."""
174
175 f_sharp = numpy.zeros((N, nclasses))
176 for feature in features:
177 for (i, j), f in feature.items():
178 f_sharp[i][j] += f
179 return f_sharp
180
182
183
184 delta = 0.0
185 iters = 0
186 while iters < MAX_NEWTON_ITERATIONS:
187 f_newton = df_newton = 0.0
188 for (i, j), f in feature.items():
189 prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
190 f_newton += prod
191 df_newton += prod * f_sharp[i][j]
192 f_newton, df_newton = empirical - f_newton / N, -df_newton / N
193
194 ratio = f_newton / df_newton
195 delta -= ratio
196 if numpy.fabs(ratio) < NEWTON_CONVERGE:
197 break
198 iters = iters + 1
199 else:
200 raise RuntimeError("Newton's method did not converge")
201 return delta
202
203 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical):
204 """Do one iteration of hill climbing to find better alphas (PRIVATE)."""
205
206
207
208 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
209
210 N = len(xs)
211 newalphas = alphas[:]
212 for i in range(len(alphas)):
213 delta = _iis_solve_delta(N, features[i], f_sharp, e_empirical[i], p_yx)
214 newalphas[i] += delta
215 return newalphas
216
217
218 -def train(training_set, results, feature_fns, update_fn=None):
219 """Train a maximum entropy classifier, returns MaxEntropy object.
220
221 Train a maximum entropy classifier on a training set.
222 training_set is a list of observations. results is a list of the
223 class assignments for each observation. feature_fns is a list of
224 the features. These are callback functions that take an
225 observation and class and return a 1 or 0. update_fn is a
226 callback function that's called at each training iteration. It is
227 passed a MaxEntropy object that encapsulates the current state of
228 the training.
229
230 """
231 if not len(training_set):
232 raise ValueError("No data in the training set.")
233 if len(training_set) != len(results):
234 raise ValueError("training_set and results should be parallel lists.")
235
236
237 xs, ys = training_set, results
238
239
240 classes = list(set(results))
241 classes.sort()
242
243
244 features = [_eval_feature_fn(fn, training_set, classes)
245 for fn in feature_fns]
246
247 f_sharp = _calc_f_sharp(len(training_set), len(classes), features)
248
249
250 e_empirical = _calc_empirical_expects(xs, ys, classes, features)
251
252
253 alphas = [0.0] * len(features)
254 iters = 0
255 while iters < MAX_IIS_ITERATIONS:
256 nalphas = _train_iis(xs, classes, features, f_sharp,
257 alphas, e_empirical)
258 diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas)
259 diff = reduce(lambda x, y: x+y, diff, 0)
260 alphas = nalphas
261
262 me = MaxEntropy()
263 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
264 if update_fn is not None:
265 update_fn(me)
266
267 if diff < IIS_CONVERGE:
268 break
269 else:
270 raise RuntimeError("IIS did not converge")
271
272 return me
273
274
275 if __name__ == "__main__" :
276
277
278
279 xcar=[
280 ['Red', 'Sports', 'Domestic'],
281 ['Red', 'Sports', 'Domestic'],
282 ['Red', 'Sports', 'Domestic'],
283 ['Yellow', 'Sports', 'Domestic'],
284 ['Yellow', 'Sports', 'Imported'],
285 ['Yellow', 'SUV', 'Imported'],
286 ['Yellow', 'SUV', 'Imported'],
287 ['Yellow', 'SUV', 'Domestic'],
288 ['Red', 'SUV', 'Imported'],
289 ['Red', 'Sports', 'Imported']
290 ]
291
292 ycar=[
293 'Yes',
294 'No',
295 'Yes',
296 'No',
297 'Yes',
298 'No',
299 'Yes',
300 'No',
301 'No',
302 'Yes'
303 ]
304
305
307 if ts[0] =='Red':
308 return 0
309 else:
310 return 1
311
313 if ts[1] =='Sports':
314 return 0
315 else:
316 return 1
317
319 if ts[2] =='Domestic':
320 return 0
321 else:
322 return 1
323
324 user_functions=[udf1, udf2, udf3]
325
326 xe=train(xcar, ycar, user_functions)
327 for xv,yv in zip(xcar, ycar):
328 xc=classify(xe, xv)
329 print 'Pred:', xv, 'gives', xc, 'y is', yv
330