Package mvpa :: Package measures :: Module base
[hide private]
[frames] | no frames]

Source Code for Module mvpa.measures.base

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Base class for data measures: algorithms that quantify properties of 
 10  datasets. 
 11   
 12  Besides the `DatasetMeasure` base class this module also provides the (abstract) 
 13  `FeaturewiseDatasetMeasure` class. The difference between a general measure and 
 14  the output of the `FeaturewiseDatasetMeasure` is that the latter returns a 1d map 
 15  (one value per feature in the dataset). In contrast there are no restrictions 
 16  on the returned value of `DatasetMeasure` except for that it has to be in some 
 17  iterable container. 
 18  """ 
 19   
 20  __docformat__ = 'restructuredtext' 
 21   
 22  import copy 
 23   
 24  from mvpa.misc.state import StateVariable, Stateful 
 25  from mvpa.misc.transformers import FirstAxisMean, SecondAxisSumOfAbs 
 26  from mvpa.base.dochelpers import enhancedDocString 
 27   
 28  if __debug__: 
 29      from mvpa.misc import debug 
 30   
 31   
32 -class DatasetMeasure(Stateful):
33 """A measure computed from a `Dataset` 34 35 All dataset measures support arbitrary transformation of the measure 36 after it has been computed. Transformation are done by processing the 37 measure with a functor that is specified via the `transformer` keyword 38 argument of the constructor. Upon request, the raw measure (before 39 transformations are applied) is stored in the `raw_result` state variable. 40 41 Additionally all dataset measures support the estimation of the 42 probabilit(y,ies) of a measure under some distribution. Typically this will 43 be the NULL distribution (no signal), that can be estimated with 44 permutation tests. If a distribution estimator instance is passed to the 45 `null_dist` keyword argument of the constructor the respective 46 probabilities are automatically computed and stored in the `null_prob` 47 state variable. 48 49 :Developer note: 50 All subclasses shall get all necessary parameters via their constructor, 51 so it is possible to get the same type of measure for multiple datasets 52 by passing them to the __call__() method successively. 53 """ 54 55 raw_result = StateVariable(enabled=False, 56 doc="Computed results before applying any " + 57 "transformation algorithm") 58 null_prob = StateVariable(enabled=True) 59 """Stores the probability of a measure under the NULL hypothesis""" 60
61 - def __init__(self, transformer=None, null_dist=None, *args, **kwargs):
62 """Does nothing special. 63 64 :Parameter: 65 transformer: Functor 66 This functor is called in `__call__()` to perform a final 67 processing step on the to be returned dataset measure. If None, 68 nothing is called 69 null_dist : instance of distribution estimator 70 """ 71 Stateful.__init__(self, **kwargs) 72 73 self.__transformer = transformer 74 """Functor to be called in return statement of all subclass __call__() 75 methods.""" 76 self.__null_dist = null_dist
77 78 79 __doc__ = enhancedDocString('DatasetMeasure', locals(), Stateful) 80 81
82 - def __call__(self, dataset):
83 """Compute measure on a given `Dataset`. 84 85 Each implementation has to handle a single arguments: the source 86 dataset. 87 88 Returns the computed measure in some iterable (list-like) 89 container applying transformer if such is defined 90 """ 91 result = self._call(dataset) 92 result = self._postcall(dataset, result) 93 self.raw_result = result 94 if not self.__transformer is None: 95 result = self.__transformer(result) 96 return result
97 98
99 - def _call(self, dataset):
100 """Actually compute measure on a given `Dataset`. 101 102 Each implementation has to handle a single arguments: the source 103 dataset. 104 105 Returns the computed measure in some iterable (list-like) container. 106 """ 107 raise NotImplemented
108 109
110 - def _postcall(self, dataset, result):
111 """Some postprocessing on the result 112 """ 113 # estimate the NULL distribution when functor is given 114 if not self.__null_dist is None: 115 # we need a matching datameasure instance, but we have to disable 116 # the estimation of the null distribution in that child to prevent 117 # infinite looping. 118 measure = copy.copy(self) 119 measure.__null_dist = None 120 self.__null_dist.fit(measure, dataset) 121 122 # get probability of result under NULL hypothesis if available 123 self.null_prob = self.__null_dist.cdf(result) 124 125 return result
126 127
128 - def __str__(self):
129 return "%s(transformer=%s, enable_states=%s)" % \ 130 (self.__class__.__name__, self.__transformer, 131 str(self.states.enabled))
132 133 134
135 -class FeaturewiseDatasetMeasure(DatasetMeasure):
136 """A per-feature-measure computed from a `Dataset` (base class). 137 138 Should behave like a DatasetMeasure. 139 """ 140 141 base_sensitivities = StateVariable(enabled=False, 142 doc="Stores basic sensitivities if the sensitivity " + 143 "relies on combining multiple ones") 144
145 - def __init__(self, combiner=SecondAxisSumOfAbs, *args, **kwargs):
146 """Initialize 147 148 :Parameters: 149 combiner : Functor 150 If _call returned value is 2d -- combines along 2nd 151 dimension as well as sets base_sensitivities 152 TODO change combiner's default 153 """ 154 DatasetMeasure.__init__(self, *(args), **(kwargs)) 155 156 self.__combiner = combiner
157 158
159 - def _call(self, dataset):
160 """Computes a per-feature-measure on a given `Dataset`. 161 162 Behaves like a `DatasetMeasure`, but computes and returns a 1d ndarray 163 with one value per feature. 164 """ 165 raise NotImplementedError
166 167
168 - def _postcall(self, dataset, result):
169 """Adjusts per-feature-measure for computed `result` 170 171 172 TODO: overlaps in what it does heavily with 173 CombinedSensitivityAnalyzer, thus this one might make use of 174 CombinedSensitivityAnalyzer yoh thinks, and here 175 base_sensitivities doesn't sound appropriate. 176 """ 177 if len(result.shape)>1: 178 n_base = result.shape[1] 179 """Number of base sensitivities""" 180 if self.states.isEnabled('base_sensitivities'): 181 b_sensitivities = [] 182 if not self.states.isKnown('biases'): 183 biases = None 184 else: 185 biases = self.biases 186 if len(self.biases) != n_base: 187 raise ValueError, \ 188 "Number of biases %d is different" % len(self.biases)\ 189 + " from number of base sensitivities %d" % n_base 190 for i in xrange(n_base): 191 if not biases is None: 192 bias = biases[i] 193 else: 194 bias = None 195 b_sensitivities = StaticDatasetMeasure( 196 measure = result[:,i], 197 bias = bias) 198 self.base_sensitivities = b_sensitivities 199 200 # After we stored each sensitivity separately, 201 # we can apply combiner 202 result = self.__combiner(result) 203 204 # call base class postcall 205 result = DatasetMeasure._postcall(self, dataset, result) 206 207 return result
208 209 210
211 -class StaticDatasetMeasure(DatasetMeasure):
212 """A static (assigned) sensitivity measure. 213 214 Since implementation is generic it might be per feature or 215 per whole dataset 216 """ 217
218 - def __init__(self, measure=None, bias=None, *args, **kwargs):
219 """Initialize. 220 221 :Parameters: 222 measure 223 actual sensitivity to be returned 224 bias 225 optionally available bias 226 """ 227 DatasetMeasure.__init__(self, *(args), **(kwargs)) 228 if measure is None: 229 raise ValueError, "Sensitivity measure has to be provided" 230 self.__measure = measure 231 self.__bias = bias
232
233 - def _call(self, dataset):
234 """Returns assigned sensitivity 235 """ 236 return self.__measure
237 238 #XXX Might need to move into StateVariable? 239 bias = property(fget=lambda self:self.__bias)
240 241 242 243 # 244 # Flavored implementations of FeaturewiseDatasetMeasures 245
246 -class Sensitivity(FeaturewiseDatasetMeasure):
247
248 - def __init__(self, clf, force_training=True, **kwargs):
249 """Initialize the analyzer with the classifier it shall use. 250 251 :Parameters: 252 clf : Classifier 253 classifier to use. Only classifiers sub-classed from 254 `LinearSVM` may be used. 255 force_training : Bool 256 if classifier was already trained -- do not retrain 257 """ 258 259 """Does nothing special.""" 260 FeaturewiseDatasetMeasure.__init__(self, **kwargs) 261 262 self.__clf = clf 263 """Classifier used to computed sensitivity""" 264 265 self._force_training = force_training 266 """Either to force it to train"""
267
268 - def __repr__(self):
269 return \ 270 "<%s on %s, force_training=%s>" % \ 271 (str(self), `self.__clf`, str(self._force_training))
272 273
274 - def __call__(self, dataset):
275 """Train classifier on `dataset` and then compute actual sensitivity. 276 """ 277 if not self.clf.trained or self._force_training: 278 if __debug__: 279 debug("SA", "Training classifier %s %s" % 280 (`self.clf`, 281 {False: "since it wasn't yet trained", 282 True: "although it was trained previousely"} 283 [self.clf.trained])) 284 self.clf.train(dataset) 285 286 return FeaturewiseDatasetMeasure.__call__(self, dataset)
287 288
289 - def _setClassifier(self, clf):
290 self.__clf = clf
291 292 clf = property(fget=lambda self:self.__clf, 293 fset=_setClassifier)
294 295 296
297 -class CombinedFeaturewiseDatasetMeasure(FeaturewiseDatasetMeasure):
298 """Set sensitivity analyzers to be merged into a single output""" 299 300 sensitivities = StateVariable(enabled=False, 301 doc="Sensitivities produced by each classifier") 302
303 - def __init__(self, analyzers=None, 304 combiner=FirstAxisMean, 305 **kwargs):
306 if analyzers == None: 307 analyzers = [] 308 309 FeaturewiseDatasetMeasure.__init__(self, **kwargs) 310 self.__analyzers = analyzers 311 """List of analyzers to use""" 312 313 self.__combiner = combiner 314 """Which functor to use to combine all sensitivities"""
315 316 317
318 - def _call(self, dataset):
319 sensitivities = [] 320 ind = 0 321 for analyzer in self.__analyzers: 322 if __debug__: 323 debug("SA", "Computing sensitivity for SA#%d:%s" % 324 (ind, analyzer)) 325 sensitivity = analyzer(dataset) 326 sensitivities.append(sensitivity) 327 ind += 1 328 329 self.sensitivities = sensitivities 330 if __debug__: 331 debug("SA", "Returning combined using %s sensitivity across %d items" % 332 (`self.__combiner`, len(sensitivities))) 333 334 return self.__combiner(sensitivities)
335 336
337 - def _setAnalyzers(self, analyzers):
338 """Set the analyzers 339 """ 340 self.__analyzers = analyzers 341 """Analyzers to use"""
342 343 analyzers = property(fget=lambda x:x.__analyzers, 344 fset=_setAnalyzers, 345 doc="Used analyzers")
346 347 348
349 -class BoostedClassifierSensitivityAnalyzer(Sensitivity):
350 """Set sensitivity analyzers to be merged into a single output""" 351
352 - def __init__(self, 353 clf, 354 analyzer=None, 355 combined_analyzer=None, 356 **kwargs):
357 """Initialize Sensitivity Analyzer for `BoostedClassifier` 358 """ 359 Sensitivity.__init__(self, clf, **kwargs) 360 if combined_analyzer is None: 361 combined_analyzer = CombinedFeaturewiseDatasetMeasure(**kwargs) 362 self.__combined_analyzer = combined_analyzer 363 """Combined analyzer to use""" 364 365 self.__analyzer = None 366 """Analyzer to use for basic classifiers within boosted classifier"""
367 368
369 - def _call(self, dataset):
370 analyzers = [] 371 # create analyzers 372 for clf in self.clf.clfs: 373 if self.__analyzer is None: 374 analyzer = clf.getSensitivityAnalyzer() 375 if analyzer is None: 376 raise ValueError, \ 377 "Wasn't able to figure basic analyzer for clf %s" % \ 378 `clf` 379 if __debug__: 380 debug("SA", "Selected analyzer %s for clf %s" % \ 381 (`analyzer`, `clf`)) 382 else: 383 # XXX shallow copy should be enough... 384 analyzer = copy.copy(self.__analyzer) 385 386 # assign corresponding classifier 387 analyzer.clf = clf 388 # if clf was trained already - don't train again 389 if clf.trained: 390 analyzer._force_training = False 391 analyzers.append(analyzer) 392 393 self.__combined_analyzer.analyzers = analyzers 394 395 return self.__combined_analyzer(dataset)
396 397 combined_analyzer = property(fget=lambda x:x.__combined_analyzer)
398 399
400 -class ProxyClassifierSensitivityAnalyzer(Sensitivity):
401 """Set sensitivity analyzer output just to pass through""" 402
403 - def __init__(self, 404 clf, 405 analyzer=None, 406 **kwargs):
407 """Initialize Sensitivity Analyzer for `BoostedClassifier` 408 """ 409 Sensitivity.__init__(self, clf, **kwargs) 410 411 self.__analyzer = None 412 """Analyzer to use for basic classifiers within boosted classifier"""
413 414
415 - def _call(self, dataset):
416 if self.__analyzer is None: 417 self.__analyzer = self.clf.clf.getSensitivityAnalyzer() 418 if self.__analyzer is None: 419 raise ValueError, \ 420 "Wasn't able to figure basic analyzer for clf %s" % \ 421 `self.clf.clf` 422 if __debug__: 423 debug("SA", "Selected analyzer %s for clf %s" % \ 424 (`self.__analyzer`, `self.clf.clf`)) 425 426 # TODO "remove" unnecessary things below on each call... 427 # assign corresponding classifier 428 self.__analyzer.clf = self.clf.clf 429 430 # if clf was trained already - don't train again 431 if self.clf.clf.trained: 432 self.__analyzer._force_training = False 433 434 return self.__analyzer._call(dataset)
435 436 analyzer = property(fget=lambda x:x.__analyzer)
437