1
2
3
4
5
6
7
8
9 """Base class for data measures: algorithms that quantify properties of
10 datasets.
11
12 Besides the `DatasetMeasure` base class this module also provides the (abstract)
13 `FeaturewiseDatasetMeasure` class. The difference between a general measure and
14 the output of the `FeaturewiseDatasetMeasure` is that the latter returns a 1d map
15 (one value per feature in the dataset). In contrast there are no restrictions
16 on the returned value of `DatasetMeasure` except for that it has to be in some
17 iterable container.
18 """
19
20 __docformat__ = 'restructuredtext'
21
22 import copy
23
24 from mvpa.misc.state import StateVariable, Stateful
25 from mvpa.misc.transformers import FirstAxisMean, SecondAxisSumOfAbs
26 from mvpa.base.dochelpers import enhancedDocString
27
28 if __debug__:
29 from mvpa.misc import debug
30
31
33 """A measure computed from a `Dataset`
34
35 All dataset measures support arbitrary transformation of the measure
36 after it has been computed. Transformation are done by processing the
37 measure with a functor that is specified via the `transformer` keyword
38 argument of the constructor. Upon request, the raw measure (before
39 transformations are applied) is stored in the `raw_result` state variable.
40
41 Additionally all dataset measures support the estimation of the
42 probabilit(y,ies) of a measure under some distribution. Typically this will
43 be the NULL distribution (no signal), that can be estimated with
44 permutation tests. If a distribution estimator instance is passed to the
45 `null_dist` keyword argument of the constructor the respective
46 probabilities are automatically computed and stored in the `null_prob`
47 state variable.
48
49 :Developer note:
50 All subclasses shall get all necessary parameters via their constructor,
51 so it is possible to get the same type of measure for multiple datasets
52 by passing them to the __call__() method successively.
53 """
54
55 raw_result = StateVariable(enabled=False,
56 doc="Computed results before applying any " +
57 "transformation algorithm")
58 null_prob = StateVariable(enabled=True)
59 """Stores the probability of a measure under the NULL hypothesis"""
60
61 - def __init__(self, transformer=None, null_dist=None, *args, **kwargs):
62 """Does nothing special.
63
64 :Parameter:
65 transformer: Functor
66 This functor is called in `__call__()` to perform a final
67 processing step on the to be returned dataset measure. If None,
68 nothing is called
69 null_dist : instance of distribution estimator
70 """
71 Stateful.__init__(self, **kwargs)
72
73 self.__transformer = transformer
74 """Functor to be called in return statement of all subclass __call__()
75 methods."""
76 self.__null_dist = null_dist
77
78
79 __doc__ = enhancedDocString('DatasetMeasure', locals(), Stateful)
80
81
83 """Compute measure on a given `Dataset`.
84
85 Each implementation has to handle a single arguments: the source
86 dataset.
87
88 Returns the computed measure in some iterable (list-like)
89 container applying transformer if such is defined
90 """
91 result = self._call(dataset)
92 result = self._postcall(dataset, result)
93 self.raw_result = result
94 if not self.__transformer is None:
95 result = self.__transformer(result)
96 return result
97
98
99 - def _call(self, dataset):
100 """Actually compute measure on a given `Dataset`.
101
102 Each implementation has to handle a single arguments: the source
103 dataset.
104
105 Returns the computed measure in some iterable (list-like) container.
106 """
107 raise NotImplemented
108
109
110 - def _postcall(self, dataset, result):
111 """Some postprocessing on the result
112 """
113
114 if not self.__null_dist is None:
115
116
117
118 measure = copy.copy(self)
119 measure.__null_dist = None
120 self.__null_dist.fit(measure, dataset)
121
122
123 self.null_prob = self.__null_dist.cdf(result)
124
125 return result
126
127
129 return "%s(transformer=%s, enable_states=%s)" % \
130 (self.__class__.__name__, self.__transformer,
131 str(self.states.enabled))
132
133
134
136 """A per-feature-measure computed from a `Dataset` (base class).
137
138 Should behave like a DatasetMeasure.
139 """
140
141 base_sensitivities = StateVariable(enabled=False,
142 doc="Stores basic sensitivities if the sensitivity " +
143 "relies on combining multiple ones")
144
146 """Initialize
147
148 :Parameters:
149 combiner : Functor
150 If _call returned value is 2d -- combines along 2nd
151 dimension as well as sets base_sensitivities
152 TODO change combiner's default
153 """
154 DatasetMeasure.__init__(self, *(args), **(kwargs))
155
156 self.__combiner = combiner
157
158
159 - def _call(self, dataset):
160 """Computes a per-feature-measure on a given `Dataset`.
161
162 Behaves like a `DatasetMeasure`, but computes and returns a 1d ndarray
163 with one value per feature.
164 """
165 raise NotImplementedError
166
167
168 - def _postcall(self, dataset, result):
169 """Adjusts per-feature-measure for computed `result`
170
171
172 TODO: overlaps in what it does heavily with
173 CombinedSensitivityAnalyzer, thus this one might make use of
174 CombinedSensitivityAnalyzer yoh thinks, and here
175 base_sensitivities doesn't sound appropriate.
176 """
177 if len(result.shape)>1:
178 n_base = result.shape[1]
179 """Number of base sensitivities"""
180 if self.states.isEnabled('base_sensitivities'):
181 b_sensitivities = []
182 if not self.states.isKnown('biases'):
183 biases = None
184 else:
185 biases = self.biases
186 if len(self.biases) != n_base:
187 raise ValueError, \
188 "Number of biases %d is different" % len(self.biases)\
189 + " from number of base sensitivities %d" % n_base
190 for i in xrange(n_base):
191 if not biases is None:
192 bias = biases[i]
193 else:
194 bias = None
195 b_sensitivities = StaticDatasetMeasure(
196 measure = result[:,i],
197 bias = bias)
198 self.base_sensitivities = b_sensitivities
199
200
201
202 result = self.__combiner(result)
203
204
205 result = DatasetMeasure._postcall(self, dataset, result)
206
207 return result
208
209
210
212 """A static (assigned) sensitivity measure.
213
214 Since implementation is generic it might be per feature or
215 per whole dataset
216 """
217
218 - def __init__(self, measure=None, bias=None, *args, **kwargs):
219 """Initialize.
220
221 :Parameters:
222 measure
223 actual sensitivity to be returned
224 bias
225 optionally available bias
226 """
227 DatasetMeasure.__init__(self, *(args), **(kwargs))
228 if measure is None:
229 raise ValueError, "Sensitivity measure has to be provided"
230 self.__measure = measure
231 self.__bias = bias
232
233 - def _call(self, dataset):
234 """Returns assigned sensitivity
235 """
236 return self.__measure
237
238
239 bias = property(fget=lambda self:self.__bias)
240
241
242
243
244
245
247
248 - def __init__(self, clf, force_training=True, **kwargs):
249 """Initialize the analyzer with the classifier it shall use.
250
251 :Parameters:
252 clf : Classifier
253 classifier to use. Only classifiers sub-classed from
254 `LinearSVM` may be used.
255 force_training : Bool
256 if classifier was already trained -- do not retrain
257 """
258
259 """Does nothing special."""
260 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
261
262 self.__clf = clf
263 """Classifier used to computed sensitivity"""
264
265 self._force_training = force_training
266 """Either to force it to train"""
267
269 return \
270 "<%s on %s, force_training=%s>" % \
271 (str(self), `self.__clf`, str(self._force_training))
272
273
275 """Train classifier on `dataset` and then compute actual sensitivity.
276 """
277 if not self.clf.trained or self._force_training:
278 if __debug__:
279 debug("SA", "Training classifier %s %s" %
280 (`self.clf`,
281 {False: "since it wasn't yet trained",
282 True: "although it was trained previousely"}
283 [self.clf.trained]))
284 self.clf.train(dataset)
285
286 return FeaturewiseDatasetMeasure.__call__(self, dataset)
287
288
291
292 clf = property(fget=lambda self:self.__clf,
293 fset=_setClassifier)
294
295
296
298 """Set sensitivity analyzers to be merged into a single output"""
299
300 sensitivities = StateVariable(enabled=False,
301 doc="Sensitivities produced by each classifier")
302
315
316
317
318 - def _call(self, dataset):
335
336
338 """Set the analyzers
339 """
340 self.__analyzers = analyzers
341 """Analyzers to use"""
342
343 analyzers = property(fget=lambda x:x.__analyzers,
344 fset=_setAnalyzers,
345 doc="Used analyzers")
346
347
348
350 """Set sensitivity analyzers to be merged into a single output"""
351
352 - def __init__(self,
353 clf,
354 analyzer=None,
355 combined_analyzer=None,
356 **kwargs):
367
368
369 - def _call(self, dataset):
396
397 combined_analyzer = property(fget=lambda x:x.__combined_analyzer)
398
399
401 """Set sensitivity analyzer output just to pass through"""
402
403 - def __init__(self,
404 clf,
405 analyzer=None,
406 **kwargs):
407 """Initialize Sensitivity Analyzer for `BoostedClassifier`
408 """
409 Sensitivity.__init__(self, clf, **kwargs)
410
411 self.__analyzer = None
412 """Analyzer to use for basic classifiers within boosted classifier"""
413
414
415 - def _call(self, dataset):
416 if self.__analyzer is None:
417 self.__analyzer = self.clf.clf.getSensitivityAnalyzer()
418 if self.__analyzer is None:
419 raise ValueError, \
420 "Wasn't able to figure basic analyzer for clf %s" % \
421 `self.clf.clf`
422 if __debug__:
423 debug("SA", "Selected analyzer %s for clf %s" % \
424 (`self.__analyzer`, `self.clf.clf`))
425
426
427
428 self.__analyzer.clf = self.clf.clf
429
430
431 if self.clf.clf.trained:
432 self.__analyzer._force_training = False
433
434 return self.__analyzer._call(dataset)
435
436 analyzer = property(fget=lambda x:x.__analyzer)
437