Package mvpa :: Package featsel :: Module ifs
[hide private]
[frames] | no frames]

Source Code for Module mvpa.featsel.ifs

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Incremental feature search (IFS). 
 10   
 11  Very similar to Recursive feature elimination (RFE), but instead of begining 
 12  with all features and stripping some sequentially, start with an empty feature 
 13  set and include important features successively. 
 14  """ 
 15   
 16  __docformat__ = 'restructuredtext' 
 17   
 18  from mvpa.misc.copy import copy 
 19   
 20  from mvpa.featsel.base import FeatureSelection 
 21  from mvpa.featsel.helpers import NBackHistoryStopCrit, \ 
 22                                   FixedNElementTailSelector, \ 
 23                                   BestDetector 
 24   
 25  from mvpa.misc.state import StateVariable 
 26   
 27  if __debug__: 
 28      from mvpa.base import debug 
 29   
 30   
31 -class IFS(FeatureSelection):
32 """Incremental feature search. 33 34 A scalar `DatasetMeasure` is computed multiple times on variations of a 35 certain dataset. These measures are in turn used to incrementally select 36 important features. Starting with an empty feature set the dataset measure 37 is first computed for each single feature. A number of features is selected 38 based on the resulting data measure map (using an `ElementSelector`). 39 40 Next the dataset measure is computed again using each feature in addition 41 to the already selected feature set. Again the `ElementSelector` is used to 42 select more features. 43 44 For each feature selection the transfer error on some testdatset is 45 computed. This procedure is repeated until a given `StoppingCriterion` 46 is reached. 47 """ 48 49 errors = StateVariable() 50
51 - def __init__(self, 52 data_measure, 53 transfer_error, 54 bestdetector=BestDetector(), 55 stopping_criterion=NBackHistoryStopCrit(BestDetector()), 56 feature_selector=FixedNElementTailSelector(1, 57 tail='upper', 58 mode='select'), 59 **kwargs 60 ):
61 """Initialize incremental feature search 62 63 :Parameter: 64 data_measure : DatasetMeasure 65 Computed for each candidate feature selection. 66 transfer_error : TransferError 67 Compute against a test dataset for each incremental feature 68 set. 69 bestdetector : Functor 70 Given a list of error values it has to return a boolean that 71 signals whether the latest error value is the total minimum. 72 stopping_criterion : Functor 73 Given a list of error values it has to return whether the 74 criterion is fulfilled. 75 """ 76 # bases init first 77 FeatureSelection.__init__(self, **kwargs) 78 79 self.__data_measure = data_measure 80 self.__transfer_error = transfer_error 81 self.__feature_selector = feature_selector 82 self.__bestdetector = bestdetector 83 self.__stopping_criterion = stopping_criterion
84 85
86 - def __call__(self, dataset, testdataset):
87 """Proceed and select the features recursively eliminating less 88 important ones. 89 90 :Parameters: 91 `dataset`: `Dataset` 92 used to select features and train classifiers to determine the 93 transfer error. 94 `testdataset`: `Dataset` 95 used to test the trained classifer on a certain feature set 96 to determine the transfer error. 97 98 Returns a tuple with the dataset containing the feature subset of 99 `dataset` that had the lowest transfer error of all tested sets until 100 the stopping criterion was reached. The tuple also contains a dataset 101 with the corrsponding features from the `testdataset`. 102 """ 103 errors = [] 104 """Computed error for each tested features set.""" 105 106 # feature candidate are all features in the pattern object 107 candidates = range( dataset.nfeatures ) 108 109 # initially empty list of selected features 110 selected = [] 111 112 # results in here please 113 results = None 114 115 # as long as there are candidates left 116 # the loop will most likely get broken earlier if the stopping 117 # criterion is reached 118 while len( candidates ): 119 # measures for all candidates 120 measures = [] 121 122 # for all possible candidates 123 for i, candidate in enumerate(candidates): 124 if __debug__: 125 debug('IFSC', "Tested %i" % i, cr=True) 126 127 # take the new candidate and all already selected features 128 # select a new temporay feature subset from the dataset 129 # XXX assume MappedDataset and issue plain=True ?? 130 tmp_dataset = \ 131 dataset.selectFeatures(selected + [candidate]) 132 133 # compute data measure on this feature set 134 measures.append(self.__data_measure(tmp_dataset)) 135 136 # Select promissing feature candidates (staging) 137 # IDs are only applicable to the current set of feature candidates 138 tmp_staging_ids = self.__feature_selector(measures) 139 140 # translate into real candidate ids 141 staging_ids = [ candidates[i] for i in tmp_staging_ids ] 142 143 # mark them as selected and remove from candidates 144 selected += staging_ids 145 for i in staging_ids: 146 candidates.remove(i) 147 148 # compute transfer error for the new set 149 # XXX assume MappedDataset and issue plain=True ?? 150 error = self.__transfer_error(testdataset.selectFeatures(selected), 151 dataset.selectFeatures(selected)) 152 errors.append(error) 153 154 # Check if it is time to stop and if we got 155 # the best result 156 stop = self.__stopping_criterion(errors) 157 isthebest = self.__bestdetector(errors) 158 159 if __debug__: 160 debug('IFSC', 161 "nselected %i; error: %.4f " \ 162 "best/stop=%d/%d\n" \ 163 % (len(selected), errors[-1], isthebest, stop), 164 cr=True, lf=True) 165 166 if isthebest: 167 # do copy to survive later selections 168 results = copy(selected) 169 170 # leave the loop when the criterion is reached 171 if stop: 172 break 173 174 # charge state 175 self.errors = errors 176 177 # best dataset ever is returned 178 return dataset.selectFeatures(results), \ 179 testdataset.selectFeatures(results)
180