1
2
3
4
5
6
7
8
9 """Incremental feature search (IFS).
10
11 Very similar to Recursive feature elimination (RFE), but instead of begining
12 with all features and stripping some sequentially, start with an empty feature
13 set and include important features successively.
14 """
15
16 __docformat__ = 'restructuredtext'
17
18 from mvpa.misc.copy import copy
19
20 from mvpa.featsel.base import FeatureSelection
21 from mvpa.featsel.helpers import NBackHistoryStopCrit, \
22 FixedNElementTailSelector, \
23 BestDetector
24
25 from mvpa.misc.state import StateVariable
26
27 if __debug__:
28 from mvpa.base import debug
29
30
31 -class IFS(FeatureSelection):
32 """Incremental feature search.
33
34 A scalar `DatasetMeasure` is computed multiple times on variations of a
35 certain dataset. These measures are in turn used to incrementally select
36 important features. Starting with an empty feature set the dataset measure
37 is first computed for each single feature. A number of features is selected
38 based on the resulting data measure map (using an `ElementSelector`).
39
40 Next the dataset measure is computed again using each feature in addition
41 to the already selected feature set. Again the `ElementSelector` is used to
42 select more features.
43
44 For each feature selection the transfer error on some testdatset is
45 computed. This procedure is repeated until a given `StoppingCriterion`
46 is reached.
47 """
48
49 errors = StateVariable()
50
61 """Initialize incremental feature search
62
63 :Parameter:
64 data_measure : DatasetMeasure
65 Computed for each candidate feature selection.
66 transfer_error : TransferError
67 Compute against a test dataset for each incremental feature
68 set.
69 bestdetector : Functor
70 Given a list of error values it has to return a boolean that
71 signals whether the latest error value is the total minimum.
72 stopping_criterion : Functor
73 Given a list of error values it has to return whether the
74 criterion is fulfilled.
75 """
76
77 FeatureSelection.__init__(self, **kwargs)
78
79 self.__data_measure = data_measure
80 self.__transfer_error = transfer_error
81 self.__feature_selector = feature_selector
82 self.__bestdetector = bestdetector
83 self.__stopping_criterion = stopping_criterion
84
85
86 - def __call__(self, dataset, testdataset):
87 """Proceed and select the features recursively eliminating less
88 important ones.
89
90 :Parameters:
91 `dataset`: `Dataset`
92 used to select features and train classifiers to determine the
93 transfer error.
94 `testdataset`: `Dataset`
95 used to test the trained classifer on a certain feature set
96 to determine the transfer error.
97
98 Returns a tuple with the dataset containing the feature subset of
99 `dataset` that had the lowest transfer error of all tested sets until
100 the stopping criterion was reached. The tuple also contains a dataset
101 with the corrsponding features from the `testdataset`.
102 """
103 errors = []
104 """Computed error for each tested features set."""
105
106
107 candidates = range( dataset.nfeatures )
108
109
110 selected = []
111
112
113 results = None
114
115
116
117
118 while len( candidates ):
119
120 measures = []
121
122
123 for i, candidate in enumerate(candidates):
124 if __debug__:
125 debug('IFSC', "Tested %i" % i, cr=True)
126
127
128
129
130 tmp_dataset = \
131 dataset.selectFeatures(selected + [candidate])
132
133
134 measures.append(self.__data_measure(tmp_dataset))
135
136
137
138 tmp_staging_ids = self.__feature_selector(measures)
139
140
141 staging_ids = [ candidates[i] for i in tmp_staging_ids ]
142
143
144 selected += staging_ids
145 for i in staging_ids:
146 candidates.remove(i)
147
148
149
150 error = self.__transfer_error(testdataset.selectFeatures(selected),
151 dataset.selectFeatures(selected))
152 errors.append(error)
153
154
155
156 stop = self.__stopping_criterion(errors)
157 isthebest = self.__bestdetector(errors)
158
159 if __debug__:
160 debug('IFSC',
161 "nselected %i; error: %.4f " \
162 "best/stop=%d/%d\n" \
163 % (len(selected), errors[-1], isthebest, stop),
164 cr=True, lf=True)
165
166 if isthebest:
167
168 results = copy(selected)
169
170
171 if stop:
172 break
173
174
175 self.errors = errors
176
177
178 return dataset.selectFeatures(results), \
179 testdataset.selectFeatures(results)
180