Package mvpa :: Package featsel :: Module base
[hide private]
[frames] | no frames]

Source Code for Module mvpa.featsel.base

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Feature selection base class and related stuff base classes and helpers.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  from mvpa.featsel.helpers import FractionTailSelector 
 14  from mvpa.misc.state import StateVariable, Stateful 
 15   
 16  if __debug__: 
 17      from mvpa.base import debug 
 18   
19 -class FeatureSelection(Stateful):
20 """Base class for any feature selection 21 22 Base class for Functors which implement feature selection on the 23 datasets. 24 """ 25 26 selected_ids = StateVariable(enabled=False) 27
28 - def __init__(self, **kwargs):
29 # base init first 30 Stateful.__init__(self, **kwargs)
31 32
33 - def __call__(self, dataset, testdataset=None):
34 """Invocation of the feature selection 35 36 :Parameters: 37 dataset : Dataset 38 dataset used to select features 39 testdataset : Dataset 40 dataset the might be used to compute a stopping criterion 41 42 Returns a tuple with the dataset containing the selected features. 43 If present the tuple also contains the selected features of the 44 test dataset. Derived classes must provide interface to access other 45 relevant to the feature selection process information (e.g. mask, 46 elimination step (in RFE), etc) 47 """ 48 raise NotImplementedError
49 50 51
52 -class SensitivityBasedFeatureSelection(FeatureSelection):
53 """Feature elimination. 54 55 A `FeaturewiseDatasetMeasure` is used to compute sensitivity maps given a certain 56 dataset. These sensitivity maps are in turn used to discard unimportant 57 features. 58 """ 59 60 sensitivity = StateVariable(enabled=False) 61
62 - def __init__(self, 63 sensitivity_analyzer, 64 feature_selector=FractionTailSelector(0.05), 65 **kwargs 66 ):
67 """Initialize feature selection 68 69 :Parameters: 70 sensitivity_analyzer : FeaturewiseDatasetMeasure 71 sensitivity analyzer to come up with sensitivity 72 feature_selector : Functor 73 Given a sensitivity map it has to return the ids of those 74 features that should be kept. 75 76 """ 77 78 # base init first 79 FeatureSelection.__init__(self, **kwargs) 80 81 self.__sensitivity_analyzer = sensitivity_analyzer 82 """Sensitivity analyzer to use once""" 83 84 self.__feature_selector = feature_selector 85 """Functor which takes care about removing some features."""
86 87 88
89 - def __call__(self, dataset, testdataset=None):
90 """Select the most important features 91 92 :Parameters: 93 dataset : Dataset 94 used to compute sensitivity maps 95 testdataset: Dataset 96 optional dataset to select features on 97 98 Returns a tuple of two new datasets with selected feature 99 subset of `dataset`. 100 """ 101 102 sensitivity = self.__sensitivity_analyzer(dataset) 103 """Compute the sensitivity map.""" 104 105 self.sensitivity = sensitivity 106 107 # Select features to preserve 108 selected_ids = self.__feature_selector(sensitivity) 109 110 if __debug__: 111 debug("FS_", "Sensitivity: %s Selected ids: %s" % 112 (sensitivity, selected_ids)) 113 114 # Create a dataset only with selected features 115 wdataset = dataset.selectFeatures(selected_ids) 116 117 if not testdataset is None: 118 wtestdataset = testdataset.selectFeatures(selected_ids) 119 else: 120 wtestdataset = None 121 122 # Differ from the order in RFE when actually error reported is for 123 results = (wdataset, wtestdataset) 124 125 # WARNING: THIS MUST BE THE LAST THING TO DO ON selected_ids 126 selected_ids.sort() 127 self.selected_ids = selected_ids 128 129 # dataset with selected features is returned 130 return results
131 132 # make it accessible from outside 133 sensitivity_analyzer = property(fget=lambda self:self.__sensitivity_analyzer, 134 doc="Measure which was used to do selection")
135 136
137 -class FeatureSelectionPipeline(FeatureSelection):
138 """Feature elimination through the list of FeatureSelection's. 139 140 Given as list of FeatureSelections it applies them in turn. 141 """ 142 143 nfeatures = StateVariable( 144 doc="Number of features before each step in pipeline") 145 # TODO: may be we should also append resultant number of features? 146
147 - def __init__(self, 148 feature_selections, 149 **kwargs 150 ):
151 """Initialize feature selection pipeline 152 153 :Parameters: 154 feature_selections : lisf of FeatureSelection 155 selections which to use. Order matters 156 """ 157 # base init first 158 FeatureSelection.__init__(self, **kwargs) 159 160 self.__feature_selections = feature_selections 161 """Selectors to use in turn"""
162 163
164 - def __call__(self, dataset, testdataset=None, **kwargs):
165 """Invocation of the feature selection 166 """ 167 wdataset = dataset 168 wtestdataset = testdataset 169 170 self.selected_ids = None 171 172 self.nfeatures = [] 173 """Number of features at each step (before running selection)""" 174 175 for fs in self.__feature_selections: 176 177 # enable selected_ids state if it was requested from this class 178 fs.states._changeTemporarily( 179 enable_states=["selected_ids"], other=self) 180 if self.states.isEnabled("nfeatures"): 181 self.nfeatures.append(wdataset.nfeatures) 182 183 if __debug__: 184 debug('FSPL', 'Invoking %s on (%s, %s)' % 185 (fs, wdataset, wtestdataset)) 186 wdataset, wtestdataset = fs(wdataset, wtestdataset, **kwargs) 187 188 if self.states.isEnabled("selected_ids"): 189 if self.selected_ids == None: 190 self.selected_ids = fs.selected_ids 191 else: 192 self.selected_ids = self.selected_ids[fs.selected_ids] 193 194 fs.states._resetEnabledTemporarily() 195 196 return (wdataset, wtestdataset)
197 198 feature_selections = property(fget=lambda self:self.__feature_selections, 199 doc="List of `FeatureSelections`")
200