1
2
3
4
5
6
7
8
9 """Sparse Multinomial Logistic Regression classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15
16 from mvpa.clfs.base import Classifier
17 from mvpa.measures.base import Sensitivity
18 from mvpa.misc.exceptions import ConvergenceError
19 from mvpa.misc.state import StateVariable, Parametrized
20 from mvpa.misc.param import Parameter
21 from mvpa.misc.transformers import SecondAxisMaxOfAbs
22 from mvpa.base import warning, externals
23
24 _DEFAULT_IMPLEMENTATION = "Python"
25 if externals.exists('ctypes'):
26
27 from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression
28 _DEFAULT_IMPLEMENTATION = "C"
29 else:
30 _cStepwiseRegression = None
31 warning("SMLR implementation without ctypes is overwhelmingly slow."
32 " You are strongly advised to install python-ctypes")
33
34 if __debug__:
35 from mvpa.base import debug
36
38 """Convert labels to one-of-M form.
39
40 TODO: Might be useful elsewhere so could migrate into misc/
41 """
42
43
44 new_labels = N.zeros((len(labels), len(ulabels)))
45
46
47 for i, c in enumerate(ulabels):
48 new_labels[labels == c, i] = 1
49
50 return new_labels
51
52
53
54 -class SMLR(Classifier):
55 """Sparse Multinomial Logistic Regression `Classifier`.
56
57 This is an implementation of the SMLR algorithm published in
58 Krishnapuram et al. (2005, IEEE Transactions on Pattern Analysis
59 and Machine Intelligence). Be sure to cite that article if you
60 use this for your work.
61 """
62
63 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'binary',
64 'multiclass', 'does_feature_selection' ]
65
66 lm = Parameter(.1, min=1e-10, allowedtype='float',
67 doc="""The penalty term lambda. Larger values will give rise to
68 more sparsification.""")
69
70 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float',
71 doc="""When the weight change for each cycle drops below this value
72 the regression is considered converged. Smaller values
73 lead to tighter convergence.""")
74
75 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0,
76 doc="""Rate of decay in the probability of resampling a zero weight.
77 1.0 will immediately decrease to the min_resamp from 1.0, 0.0
78 will never decrease from 1.0.""")
79
80 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0,
81 doc="Minimum resampling probability for zeroed weights")
82
83 maxiter = Parameter(10000, allowedtype='int', min=1,
84 doc="""Maximum number of iterations before stopping if not
85 converged.""")
86
87 has_bias = Parameter(True, allowedtype='bool',
88 doc="""Whether to add a bias term to allow fits to data not through
89 zero""")
90
91 fit_all_weights = Parameter(True, allowedtype='bool',
92 doc="""Whether to fit weights for all classes or to the number of
93 classes minus one. Both should give nearly identical results, but
94 if you set fit_all_weights to True it will take a little longer
95 and yield weights that are fully analyzable for each class. Also,
96 note that the convergence rate may be different, but convergence
97 point is the same.""")
98
99 implementation = Parameter(_DEFAULT_IMPLEMENTATION, allowedtype='basestring',
100 choices=["C", "Python"],
101 doc="""Use C or Python as the implementation of
102 stepwise_regression. C version brings significant speedup thus is
103 the default one.""")
104
105 seed = Parameter(None, allowedtype='None or int',
106 doc="""Seed to be used to initialize random generator, might be
107 used to replicate the run""")
108
109
111 """Initialize an SMLR classifier.
112 """
113
114 """
115 TODO:
116 # Add in likelihood calculation
117 # Add kernels, not just direct methods.
118 """
119
120 Classifier.__init__(self, **kwargs)
121
122 if _cStepwiseRegression is None and self.implementation == 'C':
123 warning('SMLR: C implementation is not available.'
124 ' Using pure Python one')
125 self.implementation = 'Python'
126
127
128 self.__ulabels = None
129 """Unigue labels from the training set."""
130 self.__weights_all = None
131 """Contains all weights including bias values"""
132 self.__weights = None
133 """Just the weights, without the biases"""
134 self.__biases = None
135 """The biases, will remain none if has_bias is False"""
136
137
138 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E,
139 auto_corr,
140 lambda_over_2_auto_corr,
141 S,
142 M,
143 maxiter,
144 convergence_tol,
145 resamp_decay,
146 min_resamp,
147 verbose,
148 seed = None):
149 """The (much slower) python version of the stepwise
150 regression. I'm keeping this around for now so that we can
151 compare results."""
152
153
154 ns, nd = X.shape
155
156
157 converged = False
158 incr = N.finfo(N.float).max
159 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0
160 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0
161 p_resamp = N.ones(w.shape, dtype=N.float)
162
163 if seed is not None:
164
165 N.random.seed(seed)
166
167 if __debug__:
168 debug("SMLR_", "random seed=%s" % seed)
169
170
171 while not converged and cycles < maxiter:
172
173 w_old = w[basis, m]
174
175
176 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]:
177
178
179 P = E[:, m]/S
180
181
182 grad = XY[basis, m] - N.dot(X[:, basis], P)
183
184
185 w_new = w_old + grad/auto_corr[basis]
186
187
188 if w_new > lambda_over_2_auto_corr[basis]:
189 w_new -= lambda_over_2_auto_corr[basis]
190 changed = True
191
192 if w_old == 0:
193 non_zero += 1
194
195 p_resamp[basis, m] = 1.0
196 elif w_new < -lambda_over_2_auto_corr[basis]:
197 w_new += lambda_over_2_auto_corr[basis]
198 changed = True
199
200 if w_old == 0:
201 non_zero += 1
202
203 p_resamp[basis, m] = 1.0
204 else:
205
206 w_new = 0.0
207
208
209 p_resamp[basis, m] -= (p_resamp[basis, m] - \
210 min_resamp) * resamp_decay
211
212
213 if w_old == 0:
214 changed = False
215 wasted_basis += 1
216 else:
217 changed = True
218 non_zero -= 1
219
220
221 if changed:
222
223
224 w_diff = w_new - w_old
225 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff
226 E_new_m = N.exp(Xw[:, m])
227 S += E_new_m - E[:, m]
228 E[:, m] = E_new_m
229
230
231 w[basis, m] = w_new
232
233
234 sum2_w_diff += w_diff*w_diff
235
236
237 sum2_w_old += w_old*w_old
238
239
240 m = N.mod(m+1, w.shape[1])
241 if m == 0:
242
243 basis = N.mod(basis+1, nd)
244 if basis == 0:
245
246 cycles += 1
247
248
249 incr = N.sqrt(sum2_w_diff) / \
250 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps)
251
252
253 converged = incr < convergence_tol
254
255 if __debug__:
256 debug("SMLR_", \
257 "cycle=%d ; incr=%g ; non_zero=%d ; " %
258 (cycles, incr, non_zero) +
259 "wasted_basis=%d ; " %
260 (wasted_basis) +
261 "sum2_w_old=%g ; sum2_w_diff=%g" %
262 (sum2_w_old, sum2_w_diff))
263
264
265 sum2_w_diff = 0.0
266 sum2_w_old = 0.0
267 wasted_basis = 0
268
269
270 if not converged:
271 raise ConvergenceError, \
272 "More than %d Iterations without convergence" % \
273 (maxiter)
274
275
276
277
278
279
280
281
282
283
284
285
287 """Train the classifier using `dataset` (`Dataset`).
288 """
289
290 labels = _label2oneofm(dataset.labels, dataset.uniquelabels)
291 self.__ulabels = dataset.uniquelabels.copy()
292
293 Y = labels
294 M = len(self.__ulabels)
295
296
297 X = dataset.samples
298
299
300 if self.params.has_bias:
301 if __debug__:
302 debug("SMLR_", "hstacking 1s for bias")
303
304
305 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype)))
306
307 if self.params.implementation.upper() == 'C':
308 _stepwise_regression = _cStepwiseRegression
309
310
311 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']):
312 if __debug__:
313 debug("SMLR_",
314 "Copying data to get it C_CONTIGUOUS/ALIGNED")
315 X = N.array(X, copy=True, dtype=N.double, order='C')
316
317
318 if X.dtype != N.double:
319 if __debug__:
320 debug("SMLR_", "Converting data to double")
321
322 X = X.astype(N.double)
323
324
325 elif self.params.implementation.upper() == 'PYTHON':
326 _stepwise_regression = self._pythonStepwiseRegression
327 else:
328 raise ValueError, \
329 "Unknown implementation %s of stepwise_regression" % \
330 self.params.implementation
331
332
333 ns, nd = X.shape
334
335
336 if self.params.fit_all_weights:
337 c_to_fit = M
338 else:
339 c_to_fit = M-1
340
341
342 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0))
343 XY = N.dot(X.T, Y[:, :c_to_fit])
344 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr
345
346
347 w = N.zeros((nd, c_to_fit), dtype=N.double)
348 Xw = N.zeros((ns, c_to_fit), dtype=N.double)
349 E = N.ones((ns, c_to_fit), dtype=N.double)
350 S = M*N.ones(ns, dtype=N.double)
351
352
353 if __debug__:
354 verbosity = int( "SMLR_" in debug.active )
355 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed)
356 else:
357 verbosity = 0
358
359
360 cycles = _stepwise_regression(w,
361 X,
362 XY,
363 Xw,
364 E,
365 auto_corr,
366 lambda_over_2_auto_corr,
367 S,
368 M,
369 self.params.maxiter,
370 self.params.convergence_tol,
371 self.params.resamp_decay,
372 self.params.min_resamp,
373 verbosity,
374 self.params.seed)
375
376 if cycles >= self.params.maxiter:
377
378 raise ConvergenceError, \
379 "More than %d Iterations without convergence" % \
380 (self.params.maxiter)
381
382
383 self.__weights_all = w
384 self.__weights = w[:dataset.nfeatures, :]
385
386 if self.states.isEnabled('feature_ids'):
387 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures,:]), axis=1)>0)[0]
388
389
390 if self.params.has_bias:
391 self.__biases = w[-1, :]
392
393 if __debug__:
394 debug('SMLR', "train finished in %s cycles on data.shape=%s " %
395 (`cycles`, `X.shape`) +
396 "min:max(data)=%f:%f, got min:max(w)=%f:%f" %
397 (N.min(X), N.max(X), N.min(w), N.max(w)))
398
399
401 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
402
403
405 """
406 Predict the output for the provided data.
407 """
408
409 if self.params.has_bias:
410
411 data = N.hstack((data,
412 N.ones((data.shape[0], 1), dtype=data.dtype)))
413
414
415 if self.params.fit_all_weights:
416 w = self.__weights_all
417 else:
418 w = N.hstack((self.__weights_all,
419 N.zeros((self.__weights_all.shape[0], 1))))
420
421
422 dot_prod = N.dot(data, w)
423 E = N.exp(dot_prod)
424 S = N.sum(E, 1)
425
426 if __debug__:
427 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " %
428 (`data.shape`, N.min(data), N.max(data)) +
429 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" %
430 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod),
431 N.min(E), N.max(E)))
432
433 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1)
434 self.values = values
435
436
437 predictions = N.asarray([self.__ulabels[N.argmax(vals)]
438 for vals in values])
439
440
441
442 return predictions
443
444
449
450
451 biases = property(lambda self: self.__biases)
452 weights = property(lambda self: self.__weights)
453
454
455
457 """`SensitivityAnalyzer` that reports the weights SMLR trained
458 on a given `Dataset`.
459
460 By default SMLR provides multiple weights per feature (one per label in
461 training dataset). By default, all weights are combined into a single
462 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor
463 arguments how to custmize this behavior.
464 """
465
466 biases = StateVariable(enabled=True,
467 doc="A 1-d ndarray of biases")
468
469 _LEGAL_CLFS = [ SMLR ]
470
471 - def _call(self, dataset=None):
472 """Extract weights from SMLR classifier.
473
474 SMLR always has weights available, so nothing has to be computed here.
475 """
476 clf = self.clf
477 weights = clf.weights
478
479
480
481
482
483
484
485
486
487
488
489
490
491 if clf.has_bias:
492 self.biases = clf.biases
493
494 if __debug__:
495 debug('SMLR',
496 "Extracting weights for %d-class SMLR" %
497 (weights.shape[1]+1) +
498 "Result: min=%f max=%f" %\
499 (N.min(weights), N.max(weights)))
500
501 return weights
502