Package mvpa :: Package featsel :: Module base
[hide private]
[frames] | no frames]

Source Code for Module mvpa.featsel.base

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Feature selection base class and related stuff base classes and helpers.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  from mvpa.featsel.helpers import FractionTailSelector 
 14  from mvpa.misc.state import StateVariable, Stateful 
 15   
 16  if __debug__: 
 17      from mvpa.misc import debug 
 18   
19 -class FeatureSelection(Stateful):
20 """Base class for any feature selection 21 22 Base class for Functors which implement feature selection on the 23 datasets. 24 """ 25 26 selected_ids = StateVariable(enabled=False) 27
28 - def __init__(self, **kwargs):
29 # base init first 30 Stateful.__init__(self, **kwargs)
31 32
33 - def __call__(self, dataset, testdataset=None):
34 """Invocation of the feature selection 35 36 :Parameters: 37 dataset : Dataset 38 dataset used to select features 39 testdataset : Dataset 40 dataset the might be used to compute a stopping criterion 41 42 Returns a tuple with the dataset containing the selected features. 43 If present the tuple also contains the selected features of the 44 test dataset. Derived classes must provide interface to access other 45 relevant to the feature selection process information (e.g. mask, 46 elimination step (in RFE), etc) 47 """ 48 raise NotImplementedError
49 50 51
52 -class SensitivityBasedFeatureSelection(FeatureSelection):
53 """Feature elimination. 54 55 A `FeaturewiseDatasetMeasure` is used to compute sensitivity maps given a certain 56 dataset. These sensitivity maps are in turn used to discard unimportant 57 features. 58 """ 59 60 sensitivity = StateVariable(enabled=False) 61
62 - def __init__(self, 63 sensitivity_analyzer, 64 feature_selector=FractionTailSelector(0.05), 65 **kwargs 66 ):
67 """Initialize feature selection 68 69 :Parameters: 70 sensitivity_analyzer : FeaturewiseDatasetMeasure 71 sensitivity analyzer to come up with sensitivity 72 feature_selector : Functor 73 Given a sensitivity map it has to return the ids of those 74 features that should be kept. 75 76 """ 77 78 # base init first 79 FeatureSelection.__init__(self, **kwargs) 80 81 self.__sensitivity_analyzer = sensitivity_analyzer 82 """Sensitivity analyzer to use once""" 83 84 self.__feature_selector = feature_selector 85 """Functor which takes care about removing some features."""
86 87 88
89 - def __call__(self, dataset, testdataset=None):
90 """Select the most important features 91 92 :Parameters: 93 dataset : Dataset 94 used to compute sensitivity maps 95 testdataset: Dataset 96 optional dataset to select features on 97 98 Returns a tuple of two new datasets with selected feature 99 subset of `dataset`. 100 """ 101 102 sensitivity = self.__sensitivity_analyzer(dataset) 103 """Compute the sensitivity map.""" 104 105 self.sensitivity = sensitivity 106 107 # Select features to preserve 108 selected_ids = self.__feature_selector(sensitivity) 109 110 if __debug__: 111 debug("FS_", "Sensitivity: %s Selected ids: %s" % 112 (sensitivity, selected_ids)) 113 114 # Create a dataset only with selected features 115 wdataset = dataset.selectFeatures(selected_ids) 116 117 if not testdataset is None: 118 wtestdataset = testdataset.selectFeatures(selected_ids) 119 else: 120 wtestdataset = None 121 122 # Differ from the order in RFE when actually error reported is for 123 results = (wdataset, wtestdataset) 124 125 # WARNING: THIS MUST BE THE LAST THING TO DO ON selected_ids 126 selected_ids.sort() 127 self.selected_ids = selected_ids 128 129 # dataset with selected features is returned 130 return results
131 132 133
134 -class FeatureSelectionPipeline(FeatureSelection):
135 """Feature elimination through the list of FeatureSelection's. 136 137 Given as list of FeatureSelections it applies them in turn. 138 """ 139 140 nfeatures = StateVariable( 141 doc="Number of features before each step in pipeline") 142 # TODO: may be we should also append resultant number of features? 143
144 - def __init__(self, 145 feature_selections, 146 **kwargs 147 ):
148 """Initialize feature selection pipeline 149 150 :Parameters: 151 feature_selections : lisf of FeatureSelection 152 selections which to use. Order matters 153 """ 154 # base init first 155 FeatureSelection.__init__(self, **kwargs) 156 157 self.__feature_selections = feature_selections 158 """Selectors to use in turn"""
159 160
161 - def __call__(self, dataset, testdataset=None, **kwargs):
162 """Invocation of the feature selection 163 """ 164 wdataset = dataset 165 wtestdataset = testdataset 166 167 self.selected_ids = None 168 169 self.nfeatures = [] 170 """Number of features at each step (before running selection)""" 171 172 for fs in self.__feature_selections: 173 174 # enable selected_ids state if it was requested from this class 175 fs.states._changeTemporarily( 176 enable_states=["selected_ids"], other=self) 177 if self.states.isEnabled("nfeatures"): 178 self.nfeatures.append(wdataset.nfeatures) 179 180 if __debug__: 181 debug('FSPL', 'Invoking %s on (%s, %s)' % 182 (fs, wdataset, wtestdataset)) 183 wdataset, wtestdataset = fs(wdataset, wtestdataset, **kwargs) 184 185 if self.states.isEnabled("selected_ids"): 186 if self.selected_ids == None: 187 self.selected_ids = fs.selected_ids 188 else: 189 self.selected_ids = self.selected_ids[fs.selected_ids] 190 191 fs.states._resetEnabledTemporarily() 192 193 return (wdataset, wtestdataset)
194 195 feature_selections = property(fget=lambda self:self.__feature_selections, 196 doc="List of `FeatureSelections`")
197