Package mvpa :: Package clfs :: Module smlr
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.smlr

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Sparse Multinomial Logistic Regression classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13   
 14  import numpy as N 
 15   
 16  from mvpa.clfs.base import Classifier 
 17  from mvpa.measures.base import Sensitivity 
 18  from mvpa.misc.exceptions import ConvergenceError 
 19  from mvpa.misc.state import StateVariable, Parametrized 
 20  from mvpa.misc.param import Parameter 
 21  from mvpa.misc.transformers import SecondAxisMaxOfAbs 
 22  from mvpa.base import warning, externals 
 23   
 24  _DEFAULT_IMPLEMENTATION = "Python" 
 25  if externals.exists('ctypes'): 
 26      # Uber-fast C-version of the stepwise regression 
 27      from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression 
 28      _DEFAULT_IMPLEMENTATION = "C" 
 29  else: 
 30      _cStepwiseRegression = None 
 31      warning("SMLR implementation without ctypes is overwhelmingly slow." 
 32              " You are strongly advised to install python-ctypes") 
 33   
 34  if __debug__: 
 35      from mvpa.base import debug 
 36   
37 -def _label2oneofm(labels, ulabels):
38 """Convert labels to one-of-M form. 39 40 TODO: Might be useful elsewhere so could migrate into misc/ 41 """ 42 43 # allocate for the new one-of-M labels 44 new_labels = N.zeros((len(labels), len(ulabels))) 45 46 # loop and convert to one-of-M 47 for i, c in enumerate(ulabels): 48 new_labels[labels == c, i] = 1 49 50 return new_labels
51 52 53
54 -class SMLR(Classifier):
55 """Sparse Multinomial Logistic Regression `Classifier`. 56 57 This is an implementation of the SMLR algorithm published in 58 Krishnapuram et al. (2005, IEEE Transactions on Pattern Analysis 59 and Machine Intelligence). Be sure to cite that article if you 60 use this for your work. 61 """ 62 63 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'binary', 64 'multiclass', 'does_feature_selection' ] # later 'kernel-based'? 65 66 lm = Parameter(.1, min=1e-10, allowedtype='float', 67 doc="""The penalty term lambda. Larger values will give rise to 68 more sparsification.""") 69 70 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float', 71 doc="""When the weight change for each cycle drops below this value 72 the regression is considered converged. Smaller values 73 lead to tighter convergence.""") 74 75 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0, 76 doc="""Rate of decay in the probability of resampling a zero weight. 77 1.0 will immediately decrease to the min_resamp from 1.0, 0.0 78 will never decrease from 1.0.""") 79 80 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0, 81 doc="Minimum resampling probability for zeroed weights") 82 83 maxiter = Parameter(10000, allowedtype='int', min=1, 84 doc="""Maximum number of iterations before stopping if not 85 converged.""") 86 87 has_bias = Parameter(True, allowedtype='bool', 88 doc="""Whether to add a bias term to allow fits to data not through 89 zero""") 90 91 fit_all_weights = Parameter(True, allowedtype='bool', 92 doc="""Whether to fit weights for all classes or to the number of 93 classes minus one. Both should give nearly identical results, but 94 if you set fit_all_weights to True it will take a little longer 95 and yield weights that are fully analyzable for each class. Also, 96 note that the convergence rate may be different, but convergence 97 point is the same.""") 98 99 implementation = Parameter(_DEFAULT_IMPLEMENTATION, allowedtype='basestring', 100 choices=["C", "Python"], 101 doc="""Use C or Python as the implementation of 102 stepwise_regression. C version brings significant speedup thus is 103 the default one.""") 104 105 seed = Parameter(None, allowedtype='None or int', 106 doc="""Seed to be used to initialize random generator, might be 107 used to replicate the run""") 108 109
110 - def __init__(self, **kwargs):
111 """Initialize an SMLR classifier. 112 """ 113 114 """ 115 TODO: 116 # Add in likelihood calculation 117 # Add kernels, not just direct methods. 118 """ 119 # init base class first 120 Classifier.__init__(self, **kwargs) 121 122 if _cStepwiseRegression is None and self.implementation == 'C': 123 warning('SMLR: C implementation is not available.' 124 ' Using pure Python one') 125 self.implementation = 'Python' 126 127 # pylint friendly initializations 128 self.__ulabels = None 129 """Unigue labels from the training set.""" 130 self.__weights_all = None 131 """Contains all weights including bias values""" 132 self.__weights = None 133 """Just the weights, without the biases""" 134 self.__biases = None 135 """The biases, will remain none if has_bias is False"""
136 137
138 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E, 139 auto_corr, 140 lambda_over_2_auto_corr, 141 S, 142 M, 143 maxiter, 144 convergence_tol, 145 resamp_decay, 146 min_resamp, 147 verbose, 148 seed = None):
149 """The (much slower) python version of the stepwise 150 regression. I'm keeping this around for now so that we can 151 compare results.""" 152 153 # get the data information into easy vars 154 ns, nd = X.shape 155 156 # initialize the iterative optimization 157 converged = False 158 incr = N.finfo(N.float).max 159 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0 160 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0 161 p_resamp = N.ones(w.shape, dtype=N.float) 162 163 if seed is not None: 164 # set the random seed 165 N.random.seed(seed) 166 167 if __debug__: 168 debug("SMLR_", "random seed=%s" % seed) 169 170 # perform the optimization 171 while not converged and cycles < maxiter: 172 # get the starting weight 173 w_old = w[basis, m] 174 175 # see if we're gonna update 176 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]: 177 # let's do it 178 # get the probability 179 P = E[:, m]/S 180 181 # set the gradient 182 grad = XY[basis, m] - N.dot(X[:, basis], P) 183 184 # calculate the new weight with the Laplacian prior 185 w_new = w_old + grad/auto_corr[basis] 186 187 # keep weights within bounds 188 if w_new > lambda_over_2_auto_corr[basis]: 189 w_new -= lambda_over_2_auto_corr[basis] 190 changed = True 191 # unmark from being zero if necessary 192 if w_old == 0: 193 non_zero += 1 194 # reset the prob of resampling 195 p_resamp[basis, m] = 1.0 196 elif w_new < -lambda_over_2_auto_corr[basis]: 197 w_new += lambda_over_2_auto_corr[basis] 198 changed = True 199 # unmark from being zero if necessary 200 if w_old == 0: 201 non_zero += 1 202 # reset the prob of resampling 203 p_resamp[basis, m] = 1.0 204 else: 205 # gonna zero it out 206 w_new = 0.0 207 208 # decrease the p_resamp 209 p_resamp[basis, m] -= (p_resamp[basis, m] - \ 210 min_resamp) * resamp_decay 211 212 # set number of non-zero 213 if w_old == 0: 214 changed = False 215 wasted_basis += 1 216 else: 217 changed = True 218 non_zero -= 1 219 220 # process any changes 221 if changed: 222 #print "w[%d, %d] = %g" % (basis, m, w_new) 223 # update the expected values 224 w_diff = w_new - w_old 225 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff 226 E_new_m = N.exp(Xw[:, m]) 227 S += E_new_m - E[:, m] 228 E[:, m] = E_new_m 229 230 # update the weight 231 w[basis, m] = w_new 232 233 # keep track of the sqrt sum squared diffs 234 sum2_w_diff += w_diff*w_diff 235 236 # add to the old no matter what 237 sum2_w_old += w_old*w_old 238 239 # update the class and basis 240 m = N.mod(m+1, w.shape[1]) 241 if m == 0: 242 # we completed a cycle of labels 243 basis = N.mod(basis+1, nd) 244 if basis == 0: 245 # we completed a cycle of features 246 cycles += 1 247 248 # assess convergence 249 incr = N.sqrt(sum2_w_diff) / \ 250 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps) 251 252 # save the new weights 253 converged = incr < convergence_tol 254 255 if __debug__: 256 debug("SMLR_", \ 257 "cycle=%d ; incr=%g ; non_zero=%d ; " % 258 (cycles, incr, non_zero) + 259 "wasted_basis=%d ; " % 260 (wasted_basis) + 261 "sum2_w_old=%g ; sum2_w_diff=%g" % 262 (sum2_w_old, sum2_w_diff)) 263 264 # reset the sum diffs and wasted_basis 265 sum2_w_diff = 0.0 266 sum2_w_old = 0.0 267 wasted_basis = 0 268 269 270 if not converged: 271 raise ConvergenceError, \ 272 "More than %d Iterations without convergence" % \ 273 (maxiter)
274 275 # calcualte the log likelihoods and posteriors for the training data 276 #log_likelihood = x 277 278 # if __debug__: 279 # debug("SMLR_", \ 280 # "SMLR converged after %d steps. Error: %g" % \ 281 # (cycles, XXX)) 282 283 # print 'cycles=%d ; wasted basis=%g\n' % (cycles, wasted_basis/((M-1)*nd)) 284 285
286 - def _train(self, dataset):
287 """Train the classifier using `dataset` (`Dataset`). 288 """ 289 # Process the labels to turn into 1 of N encoding 290 labels = _label2oneofm(dataset.labels, dataset.uniquelabels) 291 self.__ulabels = dataset.uniquelabels.copy() 292 293 Y = labels 294 M = len(self.__ulabels) 295 296 # get the dataset information into easy vars 297 X = dataset.samples 298 299 # see if we are adding a bias term 300 if self.params.has_bias: 301 if __debug__: 302 debug("SMLR_", "hstacking 1s for bias") 303 304 # append the bias term to the features 305 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype))) 306 307 if self.params.implementation.upper() == 'C': 308 _stepwise_regression = _cStepwiseRegression 309 # 310 # TODO: avoid copying to non-contig arrays, use strides in ctypes? 311 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']): 312 if __debug__: 313 debug("SMLR_", 314 "Copying data to get it C_CONTIGUOUS/ALIGNED") 315 X = N.array(X, copy=True, dtype=N.double, order='C') 316 317 # currently must be double for the C code 318 if X.dtype != N.double: 319 if __debug__: 320 debug("SMLR_", "Converting data to double") 321 # must cast to double 322 X = X.astype(N.double) 323 324 # set the feature dimensions 325 elif self.params.implementation.upper() == 'PYTHON': 326 _stepwise_regression = self._pythonStepwiseRegression 327 else: 328 raise ValueError, \ 329 "Unknown implementation %s of stepwise_regression" % \ 330 self.params.implementation 331 332 # set the feature dimensions 333 ns, nd = X.shape 334 335 # decide the size of weights based on num classes estimated 336 if self.params.fit_all_weights: 337 c_to_fit = M 338 else: 339 c_to_fit = M-1 340 341 # Precompute what we can 342 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0)) 343 XY = N.dot(X.T, Y[:, :c_to_fit]) 344 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr 345 346 # set starting values 347 w = N.zeros((nd, c_to_fit), dtype=N.double) 348 Xw = N.zeros((ns, c_to_fit), dtype=N.double) 349 E = N.ones((ns, c_to_fit), dtype=N.double) 350 S = M*N.ones(ns, dtype=N.double) 351 352 # set verbosity 353 if __debug__: 354 verbosity = int( "SMLR_" in debug.active ) 355 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed) 356 else: 357 verbosity = 0 358 359 # call the chosen version of stepwise_regression 360 cycles = _stepwise_regression(w, 361 X, 362 XY, 363 Xw, 364 E, 365 auto_corr, 366 lambda_over_2_auto_corr, 367 S, 368 M, 369 self.params.maxiter, 370 self.params.convergence_tol, 371 self.params.resamp_decay, 372 self.params.min_resamp, 373 verbosity, 374 self.params.seed) 375 376 if cycles >= self.params.maxiter: 377 # did not converge 378 raise ConvergenceError, \ 379 "More than %d Iterations without convergence" % \ 380 (self.params.maxiter) 381 382 # save the weights 383 self.__weights_all = w 384 self.__weights = w[:dataset.nfeatures, :] 385 386 if self.states.isEnabled('feature_ids'): 387 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures,:]), axis=1)>0)[0] 388 389 # and a bias 390 if self.params.has_bias: 391 self.__biases = w[-1, :] 392 393 if __debug__: 394 debug('SMLR', "train finished in %s cycles on data.shape=%s " % 395 (`cycles`, `X.shape`) + 396 "min:max(data)=%f:%f, got min:max(w)=%f:%f" % 397 (N.min(X), N.max(X), N.min(w), N.max(w)))
398 399
400 - def _getFeatureIds(self):
401 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
402 403
404 - def _predict(self, data):
405 """ 406 Predict the output for the provided data. 407 """ 408 # see if we are adding a bias term 409 if self.params.has_bias: 410 # append the bias term to the features 411 data = N.hstack((data, 412 N.ones((data.shape[0], 1), dtype=data.dtype))) 413 414 # append the zeros column to the weights if necessary 415 if self.params.fit_all_weights: 416 w = self.__weights_all 417 else: 418 w = N.hstack((self.__weights_all, 419 N.zeros((self.__weights_all.shape[0], 1)))) 420 421 # determine the probability values for making the prediction 422 dot_prod = N.dot(data, w) 423 E = N.exp(dot_prod) 424 S = N.sum(E, 1) 425 426 if __debug__: 427 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " % 428 (`data.shape`, N.min(data), N.max(data)) + 429 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" % 430 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod), 431 N.min(E), N.max(E))) 432 433 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1) 434 self.values = values 435 436 # generate predictions 437 predictions = N.asarray([self.__ulabels[N.argmax(vals)] 438 for vals in values]) 439 # no need to assign state variable here -- would be done in Classifier._postpredict anyway 440 #self.predictions = predictions 441 442 return predictions
443 444
445 - def getSensitivityAnalyzer(self, **kwargs):
446 """Returns a sensitivity analyzer for SMLR.""" 447 kwargs.setdefault('combiner', SecondAxisMaxOfAbs) 448 return SMLRWeights(self, **kwargs)
449 450 451 biases = property(lambda self: self.__biases) 452 weights = property(lambda self: self.__weights)
453 454 455
456 -class SMLRWeights(Sensitivity):
457 """`SensitivityAnalyzer` that reports the weights SMLR trained 458 on a given `Dataset`. 459 460 By default SMLR provides multiple weights per feature (one per label in 461 training dataset). By default, all weights are combined into a single 462 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor 463 arguments how to custmize this behavior. 464 """ 465 466 biases = StateVariable(enabled=True, 467 doc="A 1-d ndarray of biases") 468 469 _LEGAL_CLFS = [ SMLR ] 470
471 - def _call(self, dataset=None):
472 """Extract weights from SMLR classifier. 473 474 SMLR always has weights available, so nothing has to be computed here. 475 """ 476 clf = self.clf 477 weights = clf.weights 478 # XXX: MH: The following warning is inappropriate. In almost all cases 479 # SMLR will return more than one weight per feature. Even in the case of 480 # binary problem it will fit both weights by default. So unless you 481 # specify fit_all_weights=False manually this warning is always there. 482 # To much annoyance IMHO. I moved this information into the docstring, 483 # as there is no technical problem here, as FeaturewiseDatasetMeasure 484 # by default applies a combiner -- just that people should know... 485 # PLEASE ACKNOWLEDGE AND REMOVE 486 #if weights.shape[1] != 1: 487 # warning("You are estimating sensitivity for SMLR %s with multiple" 488 # " sensitivities available %s. Make sure that it is what you" 489 # " intended to do" % (self, weights.shape) ) 490 491 if clf.has_bias: 492 self.biases = clf.biases 493 494 if __debug__: 495 debug('SMLR', 496 "Extracting weights for %d-class SMLR" % 497 (weights.shape[1]+1) + 498 "Result: min=%f max=%f" %\ 499 (N.min(weights), N.max(weights))) 500 501 return weights
502