1
2
3
4
5
6
7
8
9 """Base class for data measures: algorithms that quantify properties of
10 datasets.
11
12 Besides the `DatasetMeasure` base class this module also provides the
13 (abstract) `FeaturewiseDatasetMeasure` class. The difference between a general
14 measure and the output of the `FeaturewiseDatasetMeasure` is that the latter
15 returns a 1d map (one value per feature in the dataset). In contrast there are
16 no restrictions on the returned value of `DatasetMeasure` except for that it
17 has to be in some iterable container.
18
19 """
20
21 __docformat__ = 'restructuredtext'
22
23 import numpy as N
24 import mvpa.misc.copy as copy
25
26 from mvpa.misc.state import StateVariable, Stateful
27 from mvpa.misc.args import group_kwargs
28 from mvpa.misc.transformers import FirstAxisMean, SecondAxisSumOfAbs
29 from mvpa.base.dochelpers import enhancedDocString
30 from mvpa.base import externals
31 from mvpa.clfs.stats import autoNullDist
32
33 if __debug__:
34 from mvpa.base import debug
38 """A measure computed from a `Dataset`
39
40 All dataset measures support arbitrary transformation of the measure
41 after it has been computed. Transformation are done by processing the
42 measure with a functor that is specified via the `transformer` keyword
43 argument of the constructor. Upon request, the raw measure (before
44 transformations are applied) is stored in the `raw_result` state variable.
45
46 Additionally all dataset measures support the estimation of the
47 probabilit(y,ies) of a measure under some distribution. Typically this will
48 be the NULL distribution (no signal), that can be estimated with
49 permutation tests. If a distribution estimator instance is passed to the
50 `null_dist` keyword argument of the constructor the respective
51 probabilities are automatically computed and stored in the `null_prob`
52 state variable.
53
54 :Developer note:
55 All subclasses shall get all necessary parameters via their constructor,
56 so it is possible to get the same type of measure for multiple datasets
57 by passing them to the __call__() method successively.
58 """
59
60 raw_result = StateVariable(enabled=False,
61 doc="Computed results before applying any " +
62 "transformation algorithm")
63 null_prob = StateVariable(enabled=True)
64 """Stores the probability of a measure under the NULL hypothesis"""
65 null_t = StateVariable(enabled=False)
66 """Stores the t-score corresponding to null_prob under assumption
67 of Normal distribution"""
68
69 - def __init__(self, transformer=None, null_dist=None, **kwargs):
70 """Does nothing special.
71
72 :Parameter:
73 transformer: Functor
74 This functor is called in `__call__()` to perform a final
75 processing step on the to be returned dataset measure. If None,
76 nothing is called
77 null_dist : instance of distribution estimator
78 """
79 Stateful.__init__(self, **kwargs)
80
81 self.__transformer = transformer
82 """Functor to be called in return statement of all subclass __call__()
83 methods."""
84 null_dist_ = autoNullDist(null_dist)
85 if __debug__:
86 debug('SA', 'Assigning null_dist %s whenever original given was %s'
87 % (null_dist_, null_dist))
88 self.__null_dist = null_dist_
89
90
91 __doc__ = enhancedDocString('DatasetMeasure', locals(), Stateful)
92
93
95 """Compute measure on a given `Dataset`.
96
97 Each implementation has to handle a single arguments: the source
98 dataset.
99
100 Returns the computed measure in some iterable (list-like)
101 container applying transformer if such is defined
102 """
103 result = self._call(dataset)
104 result = self._postcall(dataset, result)
105 return result
106
107
108 - def _call(self, dataset):
109 """Actually compute measure on a given `Dataset`.
110
111 Each implementation has to handle a single arguments: the source
112 dataset.
113
114 Returns the computed measure in some iterable (list-like) container.
115 """
116 raise NotImplemented
117
118
119 - def _postcall(self, dataset, result):
120 """Some postprocessing on the result
121 """
122 self.raw_result = result
123 if not self.__transformer is None:
124 if __debug__:
125 debug("SA_", "Applying transformer %s" % self.__transformer)
126 result = self.__transformer(result)
127
128
129 if not self.__null_dist is None:
130 if __debug__:
131 debug("SA_", "Estimating NULL distribution using %s"
132 % self.__null_dist)
133
134
135
136
137 measure = copy.copy(self)
138 measure.__null_dist = None
139 self.__null_dist.fit(measure, dataset)
140
141 if self.states.isEnabled('null_t'):
142
143
144 null_prob, null_right_tail = \
145 self.__null_dist.p(result, return_tails=True)
146 self.null_prob = null_prob
147
148 externals.exists('scipy', raiseException=True)
149 from scipy.stats import norm
150
151
152
153 tail = self.null_dist.tail
154 if tail == 'left':
155 acdf = N.abs(null_prob)
156 elif tail == 'right':
157 acdf = 1.0 - N.abs(null_prob)
158 elif tail in ['any', 'both']:
159 acdf = 1.0 - N.clip(N.abs(null_prob), 0, 0.5)
160 else:
161 raise RuntimeError, 'Unhandled tail %s' % tail
162
163
164
165
166
167
168
169 clip = 1e-16
170 null_t = norm.ppf(N.clip(acdf, clip, 1.0 - clip))
171 null_t[~null_right_tail] *= -1.0
172 self.null_t = null_t
173 else:
174
175
176 self.null_prob = self.__null_dist.p(result)
177
178 return result
179
180
182 prefixes = prefixes[:]
183 if self.__transformer is not None:
184 prefixes.append("transformer=%s" % self.__transformer)
185 if self.__null_dist is not None:
186 prefixes.append("null_dist=%s" % self.__null_dist)
187 return super(DatasetMeasure, self).__repr__(prefixes=prefixes)
188
189
190 @property
191 - def null_dist(self): return self.__null_dist
192
195 """A per-feature-measure computed from a `Dataset` (base class).
196
197 Should behave like a DatasetMeasure.
198 """
199
200 base_sensitivities = StateVariable(enabled=False,
201 doc="Stores basic sensitivities if the sensitivity " +
202 "relies on combining multiple ones")
203
204
205
206
207
208
209
210
211
212
213
214
216 """Initialize
217
218 :Parameters:
219 combiner : Functor
220 The combiner is only applied if the computed featurewise dataset
221 measure is more than one-dimensional. This is different from a
222 `transformer`, which is always applied. By default, the sum of
223 absolute values along the second axis is computed.
224 """
225 DatasetMeasure.__init__(self, **kwargs)
226
227 self.__combiner = combiner
228
236
237
238 - def _call(self, dataset):
239 """Computes a per-feature-measure on a given `Dataset`.
240
241 Behaves like a `DatasetMeasure`, but computes and returns a 1d ndarray
242 with one value per feature.
243 """
244 raise NotImplementedError
245
246
247 - def _postcall(self, dataset, result):
248 """Adjusts per-feature-measure for computed `result`
249
250
251 TODO: overlaps in what it does heavily with
252 CombinedSensitivityAnalyzer, thus this one might make use of
253 CombinedSensitivityAnalyzer yoh thinks, and here
254 base_sensitivities doesn't sound appropriate.
255 MH: There is indeed some overlap, but also significant differences.
256 This one operates on a single sensana and combines over second
257 axis, CombinedFeaturewiseDatasetMeasure uses first axis.
258 Additionally, 'Sensitivity' base class is
259 FeaturewiseDatasetMeasures which would have to be changed to
260 CombinedFeaturewiseDatasetMeasure to deal with stuff like
261 SMLRWeights that return multiple sensitivity values by default.
262 Not sure if unification of both (and/or removal of functionality
263 here does not lead to an overall more complicated situation,
264 without any real gain -- after all this one works ;-)
265 """
266 rsshape = result.squeeze().shape
267 if len(result.squeeze().shape)>1:
268 n_base = result.shape[1]
269 """Number of base sensitivities"""
270 if self.states.isEnabled('base_sensitivities'):
271 b_sensitivities = []
272 if not self.states.isKnown('biases'):
273 biases = None
274 else:
275 biases = self.biases
276 if len(self.biases) != n_base:
277 raise ValueError, \
278 "Number of biases %d is " % len(self.biases) \
279 + "different from number of base sensitivities" \
280 + "%d" % n_base
281 for i in xrange(n_base):
282 if not biases is None:
283 bias = biases[i]
284 else:
285 bias = None
286 b_sensitivities = StaticDatasetMeasure(
287 measure = result[:,i],
288 bias = bias)
289 self.base_sensitivities = b_sensitivities
290
291
292
293 if self.__combiner is not None:
294 result = self.__combiner(result)
295 else:
296
297
298
299 result = result.squeeze()
300
301
302 result = DatasetMeasure._postcall(self, dataset, result)
303
304 return result
305
309 """A static (assigned) sensitivity measure.
310
311 Since implementation is generic it might be per feature or
312 per whole dataset
313 """
314
315 - def __init__(self, measure=None, bias=None, *args, **kwargs):
316 """Initialize.
317
318 :Parameters:
319 measure
320 actual sensitivity to be returned
321 bias
322 optionally available bias
323 """
324 DatasetMeasure.__init__(self, *args, **kwargs)
325 if measure is None:
326 raise ValueError, "Sensitivity measure has to be provided"
327 self.__measure = measure
328 self.__bias = bias
329
330 - def _call(self, dataset):
331 """Returns assigned sensitivity
332 """
333 return self.__measure
334
335
336 bias = property(fget=lambda self:self.__bias)
337
338
339
340
341
342
343 -class Sensitivity(FeaturewiseDatasetMeasure):
344
345 _LEGAL_CLFS = []
346 """If Sensitivity is classifier specific, classes of classifiers
347 should be listed in the list
348 """
349
350 - def __init__(self, clf, force_training=True, **kwargs):
351 """Initialize the analyzer with the classifier it shall use.
352
353 :Parameters:
354 clf : :class:`Classifier`
355 classifier to use.
356 force_training : Bool
357 if classifier was already trained -- do not retrain
358 """
359
360 """Does nothing special."""
361 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
362
363 _LEGAL_CLFS = self._LEGAL_CLFS
364 if len(_LEGAL_CLFS) > 0:
365 found = False
366 for clf_class in _LEGAL_CLFS:
367 if isinstance(clf, clf_class):
368 found = True
369 break
370 if not found:
371 raise ValueError, \
372 "Classifier %s has to be of allowed class (%s), but is %s" \
373 % (clf, _LEGAL_CLFS, `type(clf)`)
374
375 self.__clf = clf
376 """Classifier used to computed sensitivity"""
377
378 self._force_training = force_training
379 """Either to force it to train"""
380
382 if prefixes is None:
383 prefixes = []
384 prefixes.append("clf=%s" % repr(self.clf))
385 if not self._force_training:
386 prefixes.append("force_training=%s" % self._force_training)
387 return super(Sensitivity, self).__repr__(prefixes=prefixes)
388
389
391 """Train classifier on `dataset` and then compute actual sensitivity.
392
393 If the classifier is already trained it is possible to extract the
394 sensitivities without passing a dataset.
395 """
396
397 clf = self.__clf
398 if not clf.trained or self._force_training:
399 if dataset is None:
400 raise ValueError, \
401 "Training classifier to compute sensitivities requires " \
402 "a dataset."
403 if __debug__:
404 debug("SA", "Training classifier %s %s" %
405 (`clf`,
406 {False: "since it wasn't yet trained",
407 True: "although it was trained previousely"}
408 [clf.trained]))
409 clf.train(dataset)
410
411 return FeaturewiseDatasetMeasure.__call__(self, dataset)
412
413
416
417
418 @property
420 """Return feature_ids used by the underlying classifier
421 """
422 return self.__clf._getFeatureIds()
423
424
425 clf = property(fget=lambda self:self.__clf,
426 fset=_setClassifier)
427
431 """Set sensitivity analyzers to be merged into a single output"""
432
433 sensitivities = StateVariable(enabled=False,
434 doc="Sensitivities produced by each classifier")
435
436
437
438
439 - def __init__(self, analyzers=None,
440 combiner=None,
441 **kwargs):
442 """Initialize CombinedFeaturewiseDatasetMeasure
443
444 :Parameters:
445 analyzers : list or None
446 List of analyzers to be used. There is no logic to populate
447 such a list in __call__, so it must be either provided to
448 the constructor or assigned to .analyzers prior calling
449 """
450 if analyzers is None:
451 analyzers = []
452
453 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
454 self.__analyzers = analyzers
455 """List of analyzers to use"""
456
457 self.__combiner = combiner
458 """Which functor to use to combine all sensitivities"""
459
460
461 - def _call(self, dataset):
482
483
485 """Set the analyzers
486 """
487 self.__analyzers = analyzers
488 """Analyzers to use"""
489
490 analyzers = property(fget=lambda x:x.__analyzers,
491 fset=_setAnalyzers,
492 doc="Used analyzers")
493
497 """Set sensitivity analyzers to be merged into a single output"""
498
499
500
501 @group_kwargs(prefixes=['slave_'], assign=True)
502 - def __init__(self,
503 clf,
504 analyzer=None,
505 combined_analyzer=None,
506 slave_kwargs={},
507 **kwargs):
508 """Initialize Sensitivity Analyzer for `BoostedClassifier`
509
510 :Parameters:
511 clf : `BoostedClassifier`
512 Classifier to be used
513 analyzer : analyzer
514 Is used to populate combined_analyzer
515 slave_*
516 Arguments to pass to created analyzer if analyzer is None
517 """
518 Sensitivity.__init__(self, clf, **kwargs)
519 if combined_analyzer is None:
520
521 kwargs.pop('force_training', None)
522 combined_analyzer = CombinedFeaturewiseDatasetMeasure(**kwargs)
523 self.__combined_analyzer = combined_analyzer
524 """Combined analyzer to use"""
525
526 if analyzer is not None and len(self._slave_kwargs):
527 raise ValueError, \
528 "Provide either analyzer of slave_* arguments, not both"
529 self.__analyzer = analyzer
530 """Analyzer to use for basic classifiers within boosted classifier"""
531
532
533 - def _call(self, dataset):
564
565 combined_analyzer = property(fget=lambda x:x.__combined_analyzer)
566
569 """Set sensitivity analyzer output just to pass through"""
570
571 @group_kwargs(prefixes=['slave_'], assign=True)
572 - def __init__(self,
573 clf,
574 analyzer=None,
575 **kwargs):
576 """Initialize Sensitivity Analyzer for `BoostedClassifier`
577 """
578 Sensitivity.__init__(self, clf, **kwargs)
579
580 if analyzer is not None and len(self._slave_kwargs):
581 raise ValueError, \
582 "Provide either analyzer of slave_* arguments, not both"
583
584 self.__analyzer = analyzer
585 """Analyzer to use for basic classifiers within boosted classifier"""
586
587
588 - def _call(self, dataset):
615
616 analyzer = property(fget=lambda x:x.__analyzer)
617
620 """Set sensitivity analyzer output be reverse mapped using mapper of the
621 slave classifier"""
622
623 - def _call(self, dataset):
631