Package mvpa :: Package clfs :: Module smlr
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.smlr

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Sparse Multinomial Logistic Regression classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13   
 14  import numpy as N 
 15   
 16  from mvpa.clfs.base import Classifier 
 17  from mvpa.measures.base import Sensitivity 
 18  from mvpa.misc.exceptions import ConvergenceError 
 19  from mvpa.misc.state import StateVariable, Parametrized 
 20  from mvpa.misc.param import Parameter 
 21  from mvpa.misc import warning 
 22   
 23  # Uber-fast C-version of the stepwise regression 
 24  from mvpa.clfs.libsmlr import stepwise_regression as _cStepwiseRegression 
 25   
 26  if __debug__: 
 27      from mvpa.misc import debug 
 28   
29 -def _label2oneofm(labels, ulabels):
30 """Convert labels to one-of-M form. 31 32 TODO: Might be useful elsewhere so could migrate into misc/ 33 """ 34 35 # allocate for the new one-of-M labels 36 new_labels = N.zeros((len(labels), len(ulabels))) 37 38 # loop and convert to one-of-M 39 for i, c in enumerate(ulabels): 40 new_labels[labels == c, i] = 1 41 42 return new_labels
43 44 45
46 -class SMLR(Classifier, Parametrized):
47 """Sparse Multinomial Logistic Regression `Classifier`. 48 49 This is an implementation of the SMLR algorithm published in 50 Krishnapuram et al. (2005, IEEE Transactions on Pattern Analysis 51 and Machine Intelligence). Be sure to cite that article if you 52 use this for your work. 53 """ 54 55 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'multiclass', 56 'does_feature_selection' ] # later 'kernel-based'? 57 58 lm = Parameter(.1, min=1e-10, allowedtype='float', 59 doc="""The penalty term lambda. Larger values will give rise to 60 more sparsification.""") 61 62 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float', 63 doc="""When the weight change for each cycle drops below this value 64 the regression is considered converged. Smaller values 65 lead to tighter convergence.""") 66 67 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0, 68 doc="""Rate of decay in the probability of resampling a zero weight. 69 1.0 will immediately decrease to the min_resamp from 1.0, 0.0 70 will never decrease from 1.0.""") 71 72 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0, 73 doc="Minimum resampling probability for zeroed weights") 74 75 maxiter = Parameter(10000, allowedtype='int', min=1, 76 doc="""Maximum number of iterations before stopping if not 77 converged.""") 78 79 has_bias = Parameter(True, allowedtype='bool', 80 doc="""Whether to add a bias term to allow fits to data not through 81 zero""") 82 83 fit_all_weights = Parameter(False, allowedtype='bool', 84 doc="""Whether to fit weights for all classes or to the number of 85 classes minus one. Both should give nearly identical results, but 86 if you set fit_all_weights to True it will take a little longer 87 and yield weights that are fully analyzable for each class. Also, 88 note that the convergence rate may be different, but convergence 89 point is the same.""") 90 91 implementation = Parameter("C", allowedtype='basestring', 92 choices=["C", "Python"], 93 doc="""Use C or Python as the implementation of 94 stepwise_regression. C version brings significant speedup thus is 95 the default one.""") 96 97 seed = Parameter(None, allowedtype='None or int', 98 doc="""Seed to be used to initialize random generator, might be 99 used to replicate the run""") 100 101
102 - def __init__(self, **kwargs):
103 """Initialize an SMLR classifier. 104 """ 105 106 """ 107 TODO: 108 # Add in likelihood calculation 109 # Add kernels, not just direct methods. 110 """ 111 # init base class first 112 Parametrized.__init__(self, init_classes=[Classifier], **kwargs) 113 114 # pylint friendly initializations 115 self.__ulabels = None 116 """Unigue labels from the training set.""" 117 self.__weights_all = None 118 """Contains all weights including bias values""" 119 self.__weights = None 120 """Just the weights, without the biases""" 121 self.__biases = None 122 """The biases, will remain none if has_bias is False"""
123 124
125 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E, 126 auto_corr, 127 lambda_over_2_auto_corr, 128 S, 129 M, 130 maxiter, 131 convergence_tol, 132 resamp_decay, 133 min_resamp, 134 verbose, 135 seed = None):
136 """The (much slower) python version of the stepwise 137 regression. I'm keeping this around for now so that we can 138 compare results.""" 139 140 # get the data information into easy vars 141 ns, nd = X.shape 142 143 # initialize the iterative optimization 144 converged = False 145 incr = N.finfo(N.float).max 146 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0 147 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0 148 p_resamp = N.ones(w.shape, dtype=N.float) 149 150 # set the random seed 151 N.random.seed(seed) 152 153 if __debug__: 154 debug("SMLR_", "random seed=%s" % seed) 155 156 # perform the optimization 157 while not converged and cycles < maxiter: 158 # get the starting weight 159 w_old = w[basis, m] 160 161 # see if we're gonna update 162 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]: 163 # let's do it 164 # get the probability 165 P = E[:, m]/S 166 167 # set the gradient 168 grad = XY[basis, m] - N.dot(X[:, basis], P) 169 170 # calculate the new weight with the Laplacian prior 171 w_new = w_old + grad/auto_corr[basis] 172 173 # keep weights within bounds 174 if w_new > lambda_over_2_auto_corr[basis]: 175 w_new -= lambda_over_2_auto_corr[basis] 176 changed = True 177 # unmark from being zero if necessary 178 if w_old == 0: 179 non_zero += 1 180 # reset the prob of resampling 181 p_resamp[basis, m] = 1.0 182 elif w_new < -lambda_over_2_auto_corr[basis]: 183 w_new += lambda_over_2_auto_corr[basis] 184 changed = True 185 # unmark from being zero if necessary 186 if w_old == 0: 187 non_zero += 1 188 # reset the prob of resampling 189 p_resamp[basis, m] = 1.0 190 else: 191 # gonna zero it out 192 w_new = 0.0 193 194 # decrease the p_resamp 195 p_resamp[basis, m] -= (p_resamp[basis, m] - \ 196 min_resamp) * resamp_decay 197 198 # set number of non-zero 199 if w_old == 0: 200 changed = False 201 wasted_basis += 1 202 else: 203 changed = True 204 non_zero -= 1 205 206 # process any changes 207 if changed: 208 #print "w[%d, %d] = %g" % (basis, m, w_new) 209 # update the expected values 210 w_diff = w_new - w_old 211 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff 212 E_new_m = N.exp(Xw[:, m]) 213 S += E_new_m - E[:, m] 214 E[:, m] = E_new_m 215 216 # update the weight 217 w[basis, m] = w_new 218 219 # keep track of the sqrt sum squared diffs 220 sum2_w_diff += w_diff*w_diff 221 222 # add to the old no matter what 223 sum2_w_old += w_old*w_old 224 225 # update the class and basis 226 m = N.mod(m+1, w.shape[1]) 227 if m == 0: 228 # we completed a cycle of labels 229 basis = N.mod(basis+1, nd) 230 if basis == 0: 231 # we completed a cycle of features 232 cycles += 1 233 234 # assess convergence 235 incr = N.sqrt(sum2_w_diff) / \ 236 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps) 237 238 # save the new weights 239 converged = incr < convergence_tol 240 241 if __debug__: 242 debug("SMLR_", \ 243 "cycle=%d ; incr=%g ; non_zero=%d ; " % 244 (cycles, incr, non_zero) + 245 "wasted_basis=%d ; " % 246 (wasted_basis) + 247 "sum2_w_old=%g ; sum2_w_diff=%g" % 248 (sum2_w_old, sum2_w_diff)) 249 250 # reset the sum diffs and wasted_basis 251 sum2_w_diff = 0.0 252 sum2_w_old = 0.0 253 wasted_basis = 0 254 255 256 if not converged: 257 raise ConvergenceError, \ 258 "More than %d Iterations without convergence" % \ 259 (maxiter)
260 261 # calcualte the log likelihoods and posteriors for the training data 262 #log_likelihood = x 263 264 # if __debug__: 265 # debug("SMLR_", \ 266 # "SMLR converged after %d steps. Error: %g" % \ 267 # (cycles, XXX)) 268 269 # print 'cycles=%d ; wasted basis=%g\n' % (cycles, wasted_basis/((M-1)*nd)) 270 271
272 - def _train(self, dataset):
273 """Train the classifier using `dataset` (`Dataset`). 274 """ 275 # Process the labels to turn into 1 of N encoding 276 labels = _label2oneofm(dataset.labels, dataset.uniquelabels) 277 self.__ulabels = dataset.uniquelabels.copy() 278 279 Y = labels 280 M = len(self.__ulabels) 281 282 # get the dataset information into easy vars 283 X = dataset.samples 284 285 # see if we are adding a bias term 286 if self.params.has_bias: 287 if __debug__: 288 debug("SMLR_", "hstacking 1s for bias") 289 290 # append the bias term to the features 291 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype))) 292 293 if self.params.implementation.upper() == 'C': 294 _stepwise_regression = _cStepwiseRegression 295 # 296 # TODO: avoid copying to non-contig arrays, use strides in ctypes? 297 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']): 298 if __debug__: 299 debug("SMLR_", 300 "Copying data to get it C_CONTIGUOUS/ALIGNED") 301 X = N.array(X, copy=True, dtype=N.double, order='C') 302 303 # currently must be double for the C code 304 if X.dtype != N.double: 305 if __debug__: 306 debug("SMLR_", "Converting data to double") 307 # must cast to double 308 X = X.astype(N.double) 309 310 # set the feature dimensions 311 elif self.params.implementation.upper() == 'PYTHON': 312 _stepwise_regression = self._pythonStepwiseRegression 313 else: 314 raise ValueError, \ 315 "Unknown implementation %s of stepwise_regression" % \ 316 self.params.implementation 317 318 # set the feature dimensions 319 ns, nd = X.shape 320 321 # decide the size of weights based on num classes estimated 322 if self.params.fit_all_weights: 323 c_to_fit = M 324 else: 325 c_to_fit = M-1 326 327 # Precompute what we can 328 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0)) 329 XY = N.dot(X.T, Y[:, :c_to_fit]) 330 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr 331 332 # set starting values 333 w = N.zeros((nd, c_to_fit), dtype=N.double) 334 Xw = N.zeros((ns, c_to_fit), dtype=N.double) 335 E = N.ones((ns, c_to_fit), dtype=N.double) 336 S = M*N.ones(ns, dtype=N.double) 337 338 # set verbosity 339 if __debug__: 340 verbosity = int( "SMLR_" in debug.active ) 341 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed) 342 else: 343 verbosity = 0 344 345 # call the chosen version of stepwise_regression 346 cycles = _stepwise_regression(w, 347 X, 348 XY, 349 Xw, 350 E, 351 auto_corr, 352 lambda_over_2_auto_corr, 353 S, 354 M, 355 self.params.maxiter, 356 self.params.convergence_tol, 357 self.params.resamp_decay, 358 self.params.min_resamp, 359 verbosity, 360 self.params.seed) 361 362 if cycles >= self.params.maxiter: 363 # did not converge 364 raise ConvergenceError, \ 365 "More than %d Iterations without convergence" % \ 366 (self.params.maxiter) 367 368 # save the weights 369 self.__weights_all = w 370 self.__weights = w[:dataset.nfeatures, :] 371 372 if self.states.isEnabled('feature_ids'): 373 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures,:]), axis=1)>0)[0] 374 375 # and a bias 376 if self.params.has_bias: 377 self.__biases = w[-1, :] 378 379 if __debug__: 380 debug('SMLR', "train finished in %s cycles on data.shape=%s " % 381 (`cycles`, `X.shape`) + 382 "min:max(data)=%f:%f, got min:max(w)=%f:%f" % 383 (N.min(X), N.max(X), N.min(w), N.max(w)))
384 385
386 - def _getFeatureIds(self):
387 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
388 389
390 - def _predict(self, data):
391 """ 392 Predict the output for the provided data. 393 """ 394 # see if we are adding a bias term 395 if self.params.has_bias: 396 # append the bias term to the features 397 data = N.hstack((data, 398 N.ones((data.shape[0], 1), dtype=data.dtype))) 399 400 # append the zeros column to the weights if necessary 401 if self.params.fit_all_weights: 402 w = self.__weights_all 403 else: 404 w = N.hstack((self.__weights_all, 405 N.zeros((self.__weights_all.shape[0], 1)))) 406 407 # determine the probability values for making the prediction 408 dot_prod = N.dot(data, w) 409 E = N.exp(dot_prod) 410 S = N.sum(E, 1) 411 412 if __debug__: 413 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " % 414 (`data.shape`, N.min(data), N.max(data)) + 415 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" % 416 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod), 417 N.min(E), N.max(E))) 418 419 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1) 420 self.values = values 421 422 # generate predictions 423 predictions = N.asarray([self.__ulabels[N.argmax(vals)] 424 for vals in values]) 425 # no need to assign state variable here -- would be done in Classifier._postpredict anyway 426 #self.predictions = predictions 427 428 return predictions
429 430
431 - def getSensitivityAnalyzer(self, **kwargs):
432 """Returns a sensitivity analyzer for SMLR.""" 433 return SMLRWeights(self, **kwargs)
434 435 436 biases = property(lambda self: self.__biases) 437 weights = property(lambda self: self.__weights)
438 439 440
441 -class SMLRWeights(Sensitivity):
442 """`SensitivityAnalyzer` that reports the weights SMLR trained 443 on a given `Dataset`. 444 """ 445 446 biases = StateVariable(enabled=True, 447 doc="A 1-d ndarray of biases") 448
449 - def __init__(self, clf, **kwargs):
450 """Initialize the analyzer with the classifier it shall use. 451 452 :Parameters: 453 clf: SMLR 454 classifier to use. Only classifiers sub-classed from 455 `SMLR` may be used. 456 """ 457 if not isinstance(clf, SMLR): 458 raise ValueError, \ 459 "Classifier %s has to be a SMLR, but is [%s]" \ 460 % (`clf`, `type(clf)`) 461 462 # init base classes first 463 Sensitivity.__init__(self, clf, **kwargs)
464 465
466 - def _call(self, dataset):
467 """Extract weights from Linear SVM classifier. 468 """ 469 if self.clf.weights.shape[1] != 1: 470 warning("You are estimating sensitivity for SMLR %s trained on %d" % 471 (`self.clf`, self.clf.weights.shape[1]+1) + 472 " classes. Make sure that it is what you intended to do" ) 473 474 weights = self.clf.weights 475 476 if self.clf.has_bias: 477 self.biases = self.clf.biases 478 479 if __debug__: 480 debug('SVM', 481 "Extracting weights for %d-class SMLR" % 482 (self.clf.weights.shape[1]+1) + 483 "Result: min=%f max=%f" %\ 484 (N.min(weights), N.max(weights))) 485 486 return weights
487