Package mvpa :: Package measures :: Module anova
[hide private]
[frames] | no frames]

Source Code for Module mvpa.measures.anova

 1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
 2  #ex: set sts=4 ts=4 sw=4 et: 
 3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
 4  # 
 5  #   See COPYING file distributed along with the PyMVPA package for the 
 6  #   copyright and license terms. 
 7  # 
 8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
 9  """FeaturewiseDatasetMeasure performing a univariate ANOVA.""" 
10   
11  __docformat__ = 'restructuredtext' 
12   
13  import numpy as N 
14   
15  from mvpa.measures.base import FeaturewiseDatasetMeasure 
16   
17  # TODO: Extend with access to functionality from scipy.stats? 
18  # For binary: 
19  #  2-sample kolmogorov-smirnof might be interesting 
20  #   (scipy.stats.ks_2samp) to judge if two conditions are derived 
21  #   from different distributions (take it as 'activity' vs 'rest'), 
22  # 
23  # For binary+multiclass: 
24  #  kruskal-wallis H-test (scipy.stats.kruskal) 
25  #  GLM: scipy.stats.glm 
26  # 
27  # and may be some others 
28   
29 -class OneWayAnova(FeaturewiseDatasetMeasure):
30 """`FeaturewiseDatasetMeasure` that performs a univariate ANOVA. 31 32 F-scores are computed for each feature as the standard fraction of between 33 and within group variances. Groups are defined by samples with unique 34 labels. 35 36 No statistical testing is performed, but raw F-scores are returned as a 37 sensitivity map. As usual F-scores have a range of [0,inf] with greater 38 values indicating higher sensitivity. 39 """
40 - def __init__(self, **kwargs):
41 """Nothing special to do here. 42 """ 43 # init base classes first 44 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
45 46
47 - def _call(self, dataset):
48 """Computes featurewise f-scores.""" 49 # group means 50 means = [] 51 # with group variance 52 vars_ = [] 53 54 # split by groups -> [groups x [samples x features]] 55 for ul in dataset.uniquelabels: 56 ul_samples = dataset.samples[dataset.labels == ul] 57 means.append(ul_samples.mean(axis=0)) 58 vars_.append(ul_samples.var(axis=0)) 59 60 # mean of within group variances 61 mvw = N.array(vars_).mean(axis=0) 62 # variance of group means 63 vgm = N.array(means).var(axis=0) 64 65 # compute f-scores (in-place to save some cycles) 66 # XXX may cause problems when there are features with no variance in 67 # some groups. One could deal with them here and possibly assign a 68 # zero f-score to throw them out, but at least theoretically zero 69 # variance is possible. Another possiblilty could be to apply 70 # N.nan_to_num(), but this might hide the problem. 71 # Michael therefore thinks that it is best to let the user deal with 72 # it prior to any analysis. 73 74 # for features where there is no variance between the groups, 75 # we should simply leave 0 as is, and avoid that way NaNs for 76 # invariance features 77 vgm0 = vgm.nonzero() 78 vgm[vgm0] /= mvw[vgm0] 79 80 return vgm
81