Package mvpa :: Package measures :: Module base
[hide private]
[frames] | no frames]

Source Code for Module mvpa.measures.base

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Base class for data measures: algorithms that quantify properties of 
 10  datasets. 
 11   
 12  Besides the `DatasetMeasure` base class this module also provides the 
 13  (abstract) `FeaturewiseDatasetMeasure` class. The difference between a general 
 14  measure and the output of the `FeaturewiseDatasetMeasure` is that the latter 
 15  returns a 1d map (one value per feature in the dataset). In contrast there are 
 16  no restrictions on the returned value of `DatasetMeasure` except for that it 
 17  has to be in some iterable container. 
 18   
 19  """ 
 20   
 21  __docformat__ = 'restructuredtext' 
 22   
 23  import numpy as N 
 24  import mvpa.misc.copy as copy 
 25   
 26  from mvpa.misc.state import StateVariable, Stateful 
 27  from mvpa.misc.args import group_kwargs 
 28  from mvpa.misc.transformers import FirstAxisMean, SecondAxisSumOfAbs 
 29  from mvpa.base.dochelpers import enhancedDocString 
 30  from mvpa.base import externals 
 31  from mvpa.clfs.stats import autoNullDist 
 32   
 33  if __debug__: 
 34      from mvpa.base import debug 
35 36 37 -class DatasetMeasure(Stateful):
38 """A measure computed from a `Dataset` 39 40 All dataset measures support arbitrary transformation of the measure 41 after it has been computed. Transformation are done by processing the 42 measure with a functor that is specified via the `transformer` keyword 43 argument of the constructor. Upon request, the raw measure (before 44 transformations are applied) is stored in the `raw_result` state variable. 45 46 Additionally all dataset measures support the estimation of the 47 probabilit(y,ies) of a measure under some distribution. Typically this will 48 be the NULL distribution (no signal), that can be estimated with 49 permutation tests. If a distribution estimator instance is passed to the 50 `null_dist` keyword argument of the constructor the respective 51 probabilities are automatically computed and stored in the `null_prob` 52 state variable. 53 54 :Developer note: 55 All subclasses shall get all necessary parameters via their constructor, 56 so it is possible to get the same type of measure for multiple datasets 57 by passing them to the __call__() method successively. 58 """ 59 60 raw_result = StateVariable(enabled=False, 61 doc="Computed results before applying any " + 62 "transformation algorithm") 63 null_prob = StateVariable(enabled=True) 64 """Stores the probability of a measure under the NULL hypothesis""" 65 null_t = StateVariable(enabled=False) 66 """Stores the t-score corresponding to null_prob under assumption 67 of Normal distribution""" 68
69 - def __init__(self, transformer=None, null_dist=None, **kwargs):
70 """Does nothing special. 71 72 :Parameter: 73 transformer: Functor 74 This functor is called in `__call__()` to perform a final 75 processing step on the to be returned dataset measure. If None, 76 nothing is called 77 null_dist : instance of distribution estimator 78 """ 79 Stateful.__init__(self, **kwargs) 80 81 self.__transformer = transformer 82 """Functor to be called in return statement of all subclass __call__() 83 methods.""" 84 null_dist_ = autoNullDist(null_dist) 85 if __debug__: 86 debug('SA', 'Assigning null_dist %s whenever original given was %s' 87 % (null_dist_, null_dist)) 88 self.__null_dist = null_dist_
89 90 91 __doc__ = enhancedDocString('DatasetMeasure', locals(), Stateful) 92 93
94 - def __call__(self, dataset):
95 """Compute measure on a given `Dataset`. 96 97 Each implementation has to handle a single arguments: the source 98 dataset. 99 100 Returns the computed measure in some iterable (list-like) 101 container applying transformer if such is defined 102 """ 103 result = self._call(dataset) 104 result = self._postcall(dataset, result) 105 return result
106 107
108 - def _call(self, dataset):
109 """Actually compute measure on a given `Dataset`. 110 111 Each implementation has to handle a single arguments: the source 112 dataset. 113 114 Returns the computed measure in some iterable (list-like) container. 115 """ 116 raise NotImplemented
117 118
119 - def _postcall(self, dataset, result):
120 """Some postprocessing on the result 121 """ 122 self.raw_result = result 123 if not self.__transformer is None: 124 if __debug__: 125 debug("SA_", "Applying transformer %s" % self.__transformer) 126 result = self.__transformer(result) 127 128 # estimate the NULL distribution when functor is given 129 if not self.__null_dist is None: 130 if __debug__: 131 debug("SA_", "Estimating NULL distribution using %s" 132 % self.__null_dist) 133 134 # we need a matching datameasure instance, but we have to disable 135 # the estimation of the null distribution in that child to prevent 136 # infinite looping. 137 measure = copy.copy(self) 138 measure.__null_dist = None 139 self.__null_dist.fit(measure, dataset) 140 141 if self.states.isEnabled('null_t'): 142 # get probability under NULL hyp, but also request 143 # either it belong to the right tail 144 null_prob, null_right_tail = \ 145 self.__null_dist.p(result, return_tails=True) 146 self.null_prob = null_prob 147 148 externals.exists('scipy', raiseException=True) 149 from scipy.stats import norm 150 151 # TODO: following logic should appear in NullDist, 152 # not here 153 tail = self.null_dist.tail 154 if tail == 'left': 155 acdf = N.abs(null_prob) 156 elif tail == 'right': 157 acdf = 1.0 - N.abs(null_prob) 158 elif tail in ['any', 'both']: 159 acdf = 1.0 - N.clip(N.abs(null_prob), 0, 0.5) 160 else: 161 raise RuntimeError, 'Unhandled tail %s' % tail 162 # We need to clip to avoid non-informative inf's ;-) 163 # that happens due to lack of precision in mantissa 164 # which is 11 bits in double. We could clip values 165 # around 0 at as low as 1e-100 (correspond to z~=21), 166 # but for consistency lets clip at 1e-16 which leads 167 # to distinguishable value around p=1 and max z=8.2. 168 # Should be sufficient range of z-values ;-) 169 clip = 1e-16 170 null_t = norm.ppf(N.clip(acdf, clip, 1.0 - clip)) 171 null_t[~null_right_tail] *= -1.0 # revert sign for negatives 172 self.null_t = null_t # store 173 else: 174 # get probability of result under NULL hypothesis if available 175 # and don't request tail information 176 self.null_prob = self.__null_dist.p(result) 177 178 return result
179 180
181 - def __repr__(self, prefixes=[]):
182 prefixes = prefixes[:] 183 if self.__transformer is not None: 184 prefixes.append("transformer=%s" % self.__transformer) 185 if self.__null_dist is not None: 186 prefixes.append("null_dist=%s" % self.__null_dist) 187 return super(DatasetMeasure, self).__repr__(prefixes=prefixes)
188 189 190 @property
191 - def null_dist(self): return self.__null_dist
192
193 194 -class FeaturewiseDatasetMeasure(DatasetMeasure):
195 """A per-feature-measure computed from a `Dataset` (base class). 196 197 Should behave like a DatasetMeasure. 198 """ 199 200 base_sensitivities = StateVariable(enabled=False, 201 doc="Stores basic sensitivities if the sensitivity " + 202 "relies on combining multiple ones") 203 204 # XXX should we may be default to combiner=None to avoid 205 # unexpected results? Also rethink if we need combiner here at 206 # all... May be combiners should be 'adjoint' with transformer 207 # YYY in comparison to CombinedSensitivityAnalyzer here default 208 # value for combiner is worse than anywhere. From now on, 209 # default combiners should be provided "in place", ie 210 # in SMLR it makes sense to have SecondAxisMaxOfAbs, 211 # in SVM (pair-wise) only for not-binary should be 212 # SecondAxisSumOfAbs, though could be Max as well... uff 213 # YOH: started to do so, but still have issues... thus 214 # reverting back for now
215 - def __init__(self, combiner=SecondAxisSumOfAbs, **kwargs): # SecondAxisSumOfAbs
216 """Initialize 217 218 :Parameters: 219 combiner : Functor 220 The combiner is only applied if the computed featurewise dataset 221 measure is more than one-dimensional. This is different from a 222 `transformer`, which is always applied. By default, the sum of 223 absolute values along the second axis is computed. 224 """ 225 DatasetMeasure.__init__(self, **kwargs) 226 227 self.__combiner = combiner
228
229 - def __repr__(self, prefixes=None):
230 if prefixes is None: 231 prefixes = [] 232 if self.__combiner != SecondAxisSumOfAbs: 233 prefixes.append("combiner=%s" % self.__combiner) 234 return \ 235 super(FeaturewiseDatasetMeasure, self).__repr__(prefixes=prefixes)
236 237
238 - def _call(self, dataset):
239 """Computes a per-feature-measure on a given `Dataset`. 240 241 Behaves like a `DatasetMeasure`, but computes and returns a 1d ndarray 242 with one value per feature. 243 """ 244 raise NotImplementedError
245 246
247 - def _postcall(self, dataset, result):
248 """Adjusts per-feature-measure for computed `result` 249 250 251 TODO: overlaps in what it does heavily with 252 CombinedSensitivityAnalyzer, thus this one might make use of 253 CombinedSensitivityAnalyzer yoh thinks, and here 254 base_sensitivities doesn't sound appropriate. 255 MH: There is indeed some overlap, but also significant differences. 256 This one operates on a single sensana and combines over second 257 axis, CombinedFeaturewiseDatasetMeasure uses first axis. 258 Additionally, 'Sensitivity' base class is 259 FeaturewiseDatasetMeasures which would have to be changed to 260 CombinedFeaturewiseDatasetMeasure to deal with stuff like 261 SMLRWeights that return multiple sensitivity values by default. 262 Not sure if unification of both (and/or removal of functionality 263 here does not lead to an overall more complicated situation, 264 without any real gain -- after all this one works ;-) 265 """ 266 rsshape = result.squeeze().shape 267 if len(result.squeeze().shape)>1: 268 n_base = result.shape[1] 269 """Number of base sensitivities""" 270 if self.states.isEnabled('base_sensitivities'): 271 b_sensitivities = [] 272 if not self.states.isKnown('biases'): 273 biases = None 274 else: 275 biases = self.biases 276 if len(self.biases) != n_base: 277 raise ValueError, \ 278 "Number of biases %d is " % len(self.biases) \ 279 + "different from number of base sensitivities" \ 280 + "%d" % n_base 281 for i in xrange(n_base): 282 if not biases is None: 283 bias = biases[i] 284 else: 285 bias = None 286 b_sensitivities = StaticDatasetMeasure( 287 measure = result[:,i], 288 bias = bias) 289 self.base_sensitivities = b_sensitivities 290 291 # After we stored each sensitivity separately, 292 # we can apply combiner 293 if self.__combiner is not None: 294 result = self.__combiner(result) 295 else: 296 # remove bogus dimensions 297 # XXX we might need to come up with smth better. May be some naive 298 # combiner? :-) 299 result = result.squeeze() 300 301 # call base class postcall 302 result = DatasetMeasure._postcall(self, dataset, result) 303 304 return result
305
306 307 308 -class StaticDatasetMeasure(DatasetMeasure):
309 """A static (assigned) sensitivity measure. 310 311 Since implementation is generic it might be per feature or 312 per whole dataset 313 """ 314
315 - def __init__(self, measure=None, bias=None, *args, **kwargs):
316 """Initialize. 317 318 :Parameters: 319 measure 320 actual sensitivity to be returned 321 bias 322 optionally available bias 323 """ 324 DatasetMeasure.__init__(self, *args, **kwargs) 325 if measure is None: 326 raise ValueError, "Sensitivity measure has to be provided" 327 self.__measure = measure 328 self.__bias = bias
329
330 - def _call(self, dataset):
331 """Returns assigned sensitivity 332 """ 333 return self.__measure
334 335 #XXX Might need to move into StateVariable? 336 bias = property(fget=lambda self:self.__bias)
337
338 339 340 # 341 # Flavored implementations of FeaturewiseDatasetMeasures 342 343 -class Sensitivity(FeaturewiseDatasetMeasure):
344 345 _LEGAL_CLFS = [] 346 """If Sensitivity is classifier specific, classes of classifiers 347 should be listed in the list 348 """ 349
350 - def __init__(self, clf, force_training=True, **kwargs):
351 """Initialize the analyzer with the classifier it shall use. 352 353 :Parameters: 354 clf : :class:`Classifier` 355 classifier to use. 356 force_training : Bool 357 if classifier was already trained -- do not retrain 358 """ 359 360 """Does nothing special.""" 361 FeaturewiseDatasetMeasure.__init__(self, **kwargs) 362 363 _LEGAL_CLFS = self._LEGAL_CLFS 364 if len(_LEGAL_CLFS) > 0: 365 found = False 366 for clf_class in _LEGAL_CLFS: 367 if isinstance(clf, clf_class): 368 found = True 369 break 370 if not found: 371 raise ValueError, \ 372 "Classifier %s has to be of allowed class (%s), but is %s" \ 373 % (clf, _LEGAL_CLFS, `type(clf)`) 374 375 self.__clf = clf 376 """Classifier used to computed sensitivity""" 377 378 self._force_training = force_training 379 """Either to force it to train"""
380
381 - def __repr__(self, prefixes=None):
382 if prefixes is None: 383 prefixes = [] 384 prefixes.append("clf=%s" % repr(self.clf)) 385 if not self._force_training: 386 prefixes.append("force_training=%s" % self._force_training) 387 return super(Sensitivity, self).__repr__(prefixes=prefixes)
388 389
390 - def __call__(self, dataset=None):
391 """Train classifier on `dataset` and then compute actual sensitivity. 392 393 If the classifier is already trained it is possible to extract the 394 sensitivities without passing a dataset. 395 """ 396 # local bindings 397 clf = self.__clf 398 if not clf.trained or self._force_training: 399 if dataset is None: 400 raise ValueError, \ 401 "Training classifier to compute sensitivities requires " \ 402 "a dataset." 403 if __debug__: 404 debug("SA", "Training classifier %s %s" % 405 (`clf`, 406 {False: "since it wasn't yet trained", 407 True: "although it was trained previousely"} 408 [clf.trained])) 409 clf.train(dataset) 410 411 return FeaturewiseDatasetMeasure.__call__(self, dataset)
412 413
414 - def _setClassifier(self, clf):
415 self.__clf = clf
416 417 418 @property
419 - def feature_ids(self):
420 """Return feature_ids used by the underlying classifier 421 """ 422 return self.__clf._getFeatureIds()
423 424 425 clf = property(fget=lambda self:self.__clf, 426 fset=_setClassifier)
427
428 429 430 -class CombinedFeaturewiseDatasetMeasure(FeaturewiseDatasetMeasure):
431 """Set sensitivity analyzers to be merged into a single output""" 432 433 sensitivities = StateVariable(enabled=False, 434 doc="Sensitivities produced by each classifier") 435 436 # XXX think again about combiners... now we have it in here and as 437 # well as in the parent -- FeaturewiseDatasetMeasure... 438 # YYY because we don't use parent's _call. Needs RF
439 - def __init__(self, analyzers=None, 440 combiner=None, #FirstAxisMean, 441 **kwargs):
442 """Initialize CombinedFeaturewiseDatasetMeasure 443 444 :Parameters: 445 analyzers : list or None 446 List of analyzers to be used. There is no logic to populate 447 such a list in __call__, so it must be either provided to 448 the constructor or assigned to .analyzers prior calling 449 """ 450 if analyzers is None: 451 analyzers = [] 452 453 FeaturewiseDatasetMeasure.__init__(self, **kwargs) 454 self.__analyzers = analyzers 455 """List of analyzers to use""" 456 457 self.__combiner = combiner 458 """Which functor to use to combine all sensitivities"""
459 460
461 - def _call(self, dataset):
462 sensitivities = [] 463 for ind,analyzer in enumerate(self.__analyzers): 464 if __debug__: 465 debug("SA", "Computing sensitivity for SA#%d:%s" % 466 (ind, analyzer)) 467 sensitivity = analyzer(dataset) 468 sensitivities.append(sensitivity) 469 470 self.sensitivities = sensitivities 471 if __debug__: 472 debug("SA", 473 "Returning combined using %s sensitivity across %d items" % 474 (self.__combiner, len(sensitivities))) 475 476 if self.__combiner is not None: 477 sensitivities = self.__combiner(sensitivities) 478 else: 479 # assure that we have an ndarray on output 480 sensitivities = N.asarray(sensitivities) 481 return sensitivities
482 483
484 - def _setAnalyzers(self, analyzers):
485 """Set the analyzers 486 """ 487 self.__analyzers = analyzers 488 """Analyzers to use"""
489 490 analyzers = property(fget=lambda x:x.__analyzers, 491 fset=_setAnalyzers, 492 doc="Used analyzers")
493
494 495 496 -class BoostedClassifierSensitivityAnalyzer(Sensitivity):
497 """Set sensitivity analyzers to be merged into a single output""" 498 499 500 # XXX we might like to pass parameters also for combined_analyzer 501 @group_kwargs(prefixes=['slave_'], assign=True)
502 - def __init__(self, 503 clf, 504 analyzer=None, 505 combined_analyzer=None, 506 slave_kwargs={}, 507 **kwargs):
508 """Initialize Sensitivity Analyzer for `BoostedClassifier` 509 510 :Parameters: 511 clf : `BoostedClassifier` 512 Classifier to be used 513 analyzer : analyzer 514 Is used to populate combined_analyzer 515 slave_* 516 Arguments to pass to created analyzer if analyzer is None 517 """ 518 Sensitivity.__init__(self, clf, **kwargs) 519 if combined_analyzer is None: 520 # sanitarize kwargs 521 kwargs.pop('force_training', None) 522 combined_analyzer = CombinedFeaturewiseDatasetMeasure(**kwargs) 523 self.__combined_analyzer = combined_analyzer 524 """Combined analyzer to use""" 525 526 if analyzer is not None and len(self._slave_kwargs): 527 raise ValueError, \ 528 "Provide either analyzer of slave_* arguments, not both" 529 self.__analyzer = analyzer 530 """Analyzer to use for basic classifiers within boosted classifier"""
531 532
533 - def _call(self, dataset):
534 analyzers = [] 535 # create analyzers 536 for clf in self.clf.clfs: 537 if self.__analyzer is None: 538 analyzer = clf.getSensitivityAnalyzer(**(self._slave_kwargs)) 539 if analyzer is None: 540 raise ValueError, \ 541 "Wasn't able to figure basic analyzer for clf %s" % \ 542 `clf` 543 if __debug__: 544 debug("SA", "Selected analyzer %s for clf %s" % \ 545 (`analyzer`, `clf`)) 546 else: 547 # XXX shallow copy should be enough... 548 analyzer = copy.copy(self.__analyzer) 549 550 # assign corresponding classifier 551 analyzer.clf = clf 552 # if clf was trained already - don't train again 553 if clf.trained: 554 analyzer._force_training = False 555 analyzers.append(analyzer) 556 557 self.__combined_analyzer.analyzers = analyzers 558 559 # XXX not sure if we don't want to call directly ._call(dataset) to avoid 560 # double application of transformers/combiners, after all we are just 561 # 'proxying' here to combined_analyzer... 562 # YOH: decided -- lets call ._call 563 return self.__combined_analyzer._call(dataset)
564 565 combined_analyzer = property(fget=lambda x:x.__combined_analyzer)
566
567 568 -class ProxyClassifierSensitivityAnalyzer(Sensitivity):
569 """Set sensitivity analyzer output just to pass through""" 570 571 @group_kwargs(prefixes=['slave_'], assign=True)
572 - def __init__(self, 573 clf, 574 analyzer=None, 575 **kwargs):
576 """Initialize Sensitivity Analyzer for `BoostedClassifier` 577 """ 578 Sensitivity.__init__(self, clf, **kwargs) 579 580 if analyzer is not None and len(self._slave_kwargs): 581 raise ValueError, \ 582 "Provide either analyzer of slave_* arguments, not both" 583 584 self.__analyzer = analyzer 585 """Analyzer to use for basic classifiers within boosted classifier"""
586 587
588 - def _call(self, dataset):
589 # OPT: local bindings 590 clfclf = self.clf.clf 591 analyzer = self.__analyzer 592 593 if analyzer is None: 594 analyzer = clfclf.getSensitivityAnalyzer( 595 **(self._slave_kwargs)) 596 if analyzer is None: 597 raise ValueError, \ 598 "Wasn't able to figure basic analyzer for clf %s" % \ 599 `clfclf` 600 if __debug__: 601 debug("SA", "Selected analyzer %s for clf %s" % \ 602 (analyzer, clfclf)) 603 # bind to the instance finally 604 self.__analyzer = analyzer 605 606 # TODO "remove" unnecessary things below on each call... 607 # assign corresponding classifier 608 analyzer.clf = clfclf 609 610 # if clf was trained already - don't train again 611 if clfclf.trained: 612 analyzer._force_training = False 613 614 return analyzer._call(dataset)
615 616 analyzer = property(fget=lambda x:x.__analyzer)
617
618 619 -class MappedClassifierSensitivityAnalyzer(ProxyClassifierSensitivityAnalyzer):
620 """Set sensitivity analyzer output be reverse mapped using mapper of the 621 slave classifier""" 622
623 - def _call(self, dataset):
624 sens = super(MappedClassifierSensitivityAnalyzer, self)._call(dataset) 625 # So we have here the case that some sensitivities are given 626 # as nfeatures x nclasses, thus we need to take .T for the 627 # mapper and revert back afterwards 628 # devguide's TODO lists this point to 'disguss' 629 sens_mapped = self.clf.maskclf.mapper.reverse(sens.T) 630 return sens_mapped.T
631