1
2
3
4
5
6
7
8
9 """Sparse Multinomial Logistic Regression classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15
16 from mvpa.clfs.base import Classifier
17 from mvpa.measures.base import Sensitivity
18 from mvpa.misc.exceptions import ConvergenceError
19 from mvpa.misc.state import StateVariable, Parametrized
20 from mvpa.misc.param import Parameter
21 from mvpa.misc import warning
22
23
24 from mvpa.clfs.libsmlr import stepwise_regression as _cStepwiseRegression
25
26 if __debug__:
27 from mvpa.misc import debug
28
30 """Convert labels to one-of-M form.
31
32 TODO: Might be useful elsewhere so could migrate into misc/
33 """
34
35
36 new_labels = N.zeros((len(labels), len(ulabels)))
37
38
39 for i, c in enumerate(ulabels):
40 new_labels[labels == c, i] = 1
41
42 return new_labels
43
44
45
46 -class SMLR(Classifier, Parametrized):
47 """Sparse Multinomial Logistic Regression `Classifier`.
48
49 This is an implementation of the SMLR algorithm published in
50 Krishnapuram et al. (2005, IEEE Transactions on Pattern Analysis
51 and Machine Intelligence). Be sure to cite that article if you
52 use this for your work.
53 """
54
55 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'multiclass',
56 'does_feature_selection' ]
57
58 lm = Parameter(.1, min=1e-10, allowedtype='float',
59 doc="""The penalty term lambda. Larger values will give rise to
60 more sparsification.""")
61
62 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float',
63 doc="""When the weight change for each cycle drops below this value
64 the regression is considered converged. Smaller values
65 lead to tighter convergence.""")
66
67 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0,
68 doc="""Rate of decay in the probability of resampling a zero weight.
69 1.0 will immediately decrease to the min_resamp from 1.0, 0.0
70 will never decrease from 1.0.""")
71
72 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0,
73 doc="Minimum resampling probability for zeroed weights")
74
75 maxiter = Parameter(10000, allowedtype='int', min=1,
76 doc="""Maximum number of iterations before stopping if not
77 converged.""")
78
79 has_bias = Parameter(True, allowedtype='bool',
80 doc="""Whether to add a bias term to allow fits to data not through
81 zero""")
82
83 fit_all_weights = Parameter(False, allowedtype='bool',
84 doc="""Whether to fit weights for all classes or to the number of
85 classes minus one. Both should give nearly identical results, but
86 if you set fit_all_weights to True it will take a little longer
87 and yield weights that are fully analyzable for each class. Also,
88 note that the convergence rate may be different, but convergence
89 point is the same.""")
90
91 implementation = Parameter("C", allowedtype='basestring',
92 choices=["C", "Python"],
93 doc="""Use C or Python as the implementation of
94 stepwise_regression. C version brings significant speedup thus is
95 the default one.""")
96
97 seed = Parameter(None, allowedtype='None or int',
98 doc="""Seed to be used to initialize random generator, might be
99 used to replicate the run""")
100
101
103 """Initialize an SMLR classifier.
104 """
105
106 """
107 TODO:
108 # Add in likelihood calculation
109 # Add kernels, not just direct methods.
110 """
111
112 Parametrized.__init__(self, init_classes=[Classifier], **kwargs)
113
114
115 self.__ulabels = None
116 """Unigue labels from the training set."""
117 self.__weights_all = None
118 """Contains all weights including bias values"""
119 self.__weights = None
120 """Just the weights, without the biases"""
121 self.__biases = None
122 """The biases, will remain none if has_bias is False"""
123
124
125 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E,
126 auto_corr,
127 lambda_over_2_auto_corr,
128 S,
129 M,
130 maxiter,
131 convergence_tol,
132 resamp_decay,
133 min_resamp,
134 verbose,
135 seed = None):
136 """The (much slower) python version of the stepwise
137 regression. I'm keeping this around for now so that we can
138 compare results."""
139
140
141 ns, nd = X.shape
142
143
144 converged = False
145 incr = N.finfo(N.float).max
146 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0
147 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0
148 p_resamp = N.ones(w.shape, dtype=N.float)
149
150
151 N.random.seed(seed)
152
153 if __debug__:
154 debug("SMLR_", "random seed=%s" % seed)
155
156
157 while not converged and cycles < maxiter:
158
159 w_old = w[basis, m]
160
161
162 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]:
163
164
165 P = E[:, m]/S
166
167
168 grad = XY[basis, m] - N.dot(X[:, basis], P)
169
170
171 w_new = w_old + grad/auto_corr[basis]
172
173
174 if w_new > lambda_over_2_auto_corr[basis]:
175 w_new -= lambda_over_2_auto_corr[basis]
176 changed = True
177
178 if w_old == 0:
179 non_zero += 1
180
181 p_resamp[basis, m] = 1.0
182 elif w_new < -lambda_over_2_auto_corr[basis]:
183 w_new += lambda_over_2_auto_corr[basis]
184 changed = True
185
186 if w_old == 0:
187 non_zero += 1
188
189 p_resamp[basis, m] = 1.0
190 else:
191
192 w_new = 0.0
193
194
195 p_resamp[basis, m] -= (p_resamp[basis, m] - \
196 min_resamp) * resamp_decay
197
198
199 if w_old == 0:
200 changed = False
201 wasted_basis += 1
202 else:
203 changed = True
204 non_zero -= 1
205
206
207 if changed:
208
209
210 w_diff = w_new - w_old
211 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff
212 E_new_m = N.exp(Xw[:, m])
213 S += E_new_m - E[:, m]
214 E[:, m] = E_new_m
215
216
217 w[basis, m] = w_new
218
219
220 sum2_w_diff += w_diff*w_diff
221
222
223 sum2_w_old += w_old*w_old
224
225
226 m = N.mod(m+1, w.shape[1])
227 if m == 0:
228
229 basis = N.mod(basis+1, nd)
230 if basis == 0:
231
232 cycles += 1
233
234
235 incr = N.sqrt(sum2_w_diff) / \
236 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps)
237
238
239 converged = incr < convergence_tol
240
241 if __debug__:
242 debug("SMLR_", \
243 "cycle=%d ; incr=%g ; non_zero=%d ; " %
244 (cycles, incr, non_zero) +
245 "wasted_basis=%d ; " %
246 (wasted_basis) +
247 "sum2_w_old=%g ; sum2_w_diff=%g" %
248 (sum2_w_old, sum2_w_diff))
249
250
251 sum2_w_diff = 0.0
252 sum2_w_old = 0.0
253 wasted_basis = 0
254
255
256 if not converged:
257 raise ConvergenceError, \
258 "More than %d Iterations without convergence" % \
259 (maxiter)
260
261
262
263
264
265
266
267
268
269
270
271
273 """Train the classifier using `dataset` (`Dataset`).
274 """
275
276 labels = _label2oneofm(dataset.labels, dataset.uniquelabels)
277 self.__ulabels = dataset.uniquelabels.copy()
278
279 Y = labels
280 M = len(self.__ulabels)
281
282
283 X = dataset.samples
284
285
286 if self.params.has_bias:
287 if __debug__:
288 debug("SMLR_", "hstacking 1s for bias")
289
290
291 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype)))
292
293 if self.params.implementation.upper() == 'C':
294 _stepwise_regression = _cStepwiseRegression
295
296
297 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']):
298 if __debug__:
299 debug("SMLR_",
300 "Copying data to get it C_CONTIGUOUS/ALIGNED")
301 X = N.array(X, copy=True, dtype=N.double, order='C')
302
303
304 if X.dtype != N.double:
305 if __debug__:
306 debug("SMLR_", "Converting data to double")
307
308 X = X.astype(N.double)
309
310
311 elif self.params.implementation.upper() == 'PYTHON':
312 _stepwise_regression = self._pythonStepwiseRegression
313 else:
314 raise ValueError, \
315 "Unknown implementation %s of stepwise_regression" % \
316 self.params.implementation
317
318
319 ns, nd = X.shape
320
321
322 if self.params.fit_all_weights:
323 c_to_fit = M
324 else:
325 c_to_fit = M-1
326
327
328 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0))
329 XY = N.dot(X.T, Y[:, :c_to_fit])
330 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr
331
332
333 w = N.zeros((nd, c_to_fit), dtype=N.double)
334 Xw = N.zeros((ns, c_to_fit), dtype=N.double)
335 E = N.ones((ns, c_to_fit), dtype=N.double)
336 S = M*N.ones(ns, dtype=N.double)
337
338
339 if __debug__:
340 verbosity = int( "SMLR_" in debug.active )
341 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed)
342 else:
343 verbosity = 0
344
345
346 cycles = _stepwise_regression(w,
347 X,
348 XY,
349 Xw,
350 E,
351 auto_corr,
352 lambda_over_2_auto_corr,
353 S,
354 M,
355 self.params.maxiter,
356 self.params.convergence_tol,
357 self.params.resamp_decay,
358 self.params.min_resamp,
359 verbosity,
360 self.params.seed)
361
362 if cycles >= self.params.maxiter:
363
364 raise ConvergenceError, \
365 "More than %d Iterations without convergence" % \
366 (self.params.maxiter)
367
368
369 self.__weights_all = w
370 self.__weights = w[:dataset.nfeatures, :]
371
372 if self.states.isEnabled('feature_ids'):
373 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures,:]), axis=1)>0)[0]
374
375
376 if self.params.has_bias:
377 self.__biases = w[-1, :]
378
379 if __debug__:
380 debug('SMLR', "train finished in %s cycles on data.shape=%s " %
381 (`cycles`, `X.shape`) +
382 "min:max(data)=%f:%f, got min:max(w)=%f:%f" %
383 (N.min(X), N.max(X), N.min(w), N.max(w)))
384
385
387 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
388
389
391 """
392 Predict the output for the provided data.
393 """
394
395 if self.params.has_bias:
396
397 data = N.hstack((data,
398 N.ones((data.shape[0], 1), dtype=data.dtype)))
399
400
401 if self.params.fit_all_weights:
402 w = self.__weights_all
403 else:
404 w = N.hstack((self.__weights_all,
405 N.zeros((self.__weights_all.shape[0], 1))))
406
407
408 dot_prod = N.dot(data, w)
409 E = N.exp(dot_prod)
410 S = N.sum(E, 1)
411
412 if __debug__:
413 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " %
414 (`data.shape`, N.min(data), N.max(data)) +
415 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" %
416 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod),
417 N.min(E), N.max(E)))
418
419 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1)
420 self.values = values
421
422
423 predictions = N.asarray([self.__ulabels[N.argmax(vals)]
424 for vals in values])
425
426
427
428 return predictions
429
430
432 """Returns a sensitivity analyzer for SMLR."""
433 return SMLRWeights(self, **kwargs)
434
435
436 biases = property(lambda self: self.__biases)
437 weights = property(lambda self: self.__weights)
438
439
440
442 """`SensitivityAnalyzer` that reports the weights SMLR trained
443 on a given `Dataset`.
444 """
445
446 biases = StateVariable(enabled=True,
447 doc="A 1-d ndarray of biases")
448
450 """Initialize the analyzer with the classifier it shall use.
451
452 :Parameters:
453 clf: SMLR
454 classifier to use. Only classifiers sub-classed from
455 `SMLR` may be used.
456 """
457 if not isinstance(clf, SMLR):
458 raise ValueError, \
459 "Classifier %s has to be a SMLR, but is [%s]" \
460 % (`clf`, `type(clf)`)
461
462
463 Sensitivity.__init__(self, clf, **kwargs)
464
465
466 - def _call(self, dataset):
467 """Extract weights from Linear SVM classifier.
468 """
469 if self.clf.weights.shape[1] != 1:
470 warning("You are estimating sensitivity for SMLR %s trained on %d" %
471 (`self.clf`, self.clf.weights.shape[1]+1) +
472 " classes. Make sure that it is what you intended to do" )
473
474 weights = self.clf.weights
475
476 if self.clf.has_bias:
477 self.biases = self.clf.biases
478
479 if __debug__:
480 debug('SVM',
481 "Extracting weights for %d-class SMLR" %
482 (self.clf.weights.shape[1]+1) +
483 "Result: min=%f max=%f" %\
484 (N.min(weights), N.max(weights)))
485
486 return weights
487