1
2
3
4
5
6
7
8
9 """Collection of classifiers to ease the exploration.
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from sets import Set
15 import operator
16
17
18 from mvpa.datasets.splitter import OddEvenSplitter
19
20
21 from mvpa.clfs.base import FeatureSelectionClassifier, SplitClassifier, \
22 MulticlassClassifier
23 from mvpa.clfs.smlr import SMLR
24 from mvpa.clfs.knn import kNN
25 from mvpa.clfs.gpr import GPR
26
27
28 from mvpa.clfs.transerror import TransferError
29 from mvpa.base import externals
30 from mvpa.measures.anova import OneWayAnova
31 from mvpa.misc.transformers import Absolute
32 from mvpa.featsel.rfe import RFE
33 from mvpa.clfs.smlr import SMLRWeights
34 from mvpa.featsel.helpers import FractionTailSelector, \
35 FixedNElementTailSelector, RangeElementSelector, \
36 FixedErrorThresholdStopCrit
37 from mvpa.clfs.transerror import ConfusionBasedError
38 from mvpa.featsel.base import SensitivityBasedFeatureSelection
39
40 _KNOWN_INTERNALS=[ 'knn', 'binary', 'svm', 'linear',
41 'smlr', 'does_feature_selection', 'has_sensitivity',
42 'multiclass', 'non-linear', 'kernel-based', 'lars',
43 'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr' ]
46 """Class to keep known instantiated classifiers
47
48 Should provide easy ways to select classifiers of needed kind:
49 clfs['linear', 'svm'] should return all linear SVMs
50 clfs['linear', 'multiclass'] should return all linear classifiers
51 capable of doing multiclass classification
52 """
53
55 self.__known_tags = Set(known_tags)
56 self.__items = []
57 self.__keys = Set()
58
60 if isinstance(args[0], tuple):
61 args = args[0]
62
63
64 if args == (slice(None),):
65 args = []
66
67
68 dargs = Set([x.lstrip('!') for x in args]).difference(
69 self.__known_tags)
70
71 if len(dargs)>0:
72 raise ValueError, "Unknown internals %s requested. Known are %s" % \
73 (list(dargs), list(self.__known_tags))
74
75
76 result = []
77 for item in self.__items:
78 good = True
79 for arg in args:
80 if (arg.startswith('!') and \
81 (arg[1:] in item._clf_internals)) or \
82 (not arg.startswith('!') and \
83 (not (arg in item._clf_internals))):
84 good = False
85 break
86 if good:
87 result.append(item)
88 return result
89
91 if operator.isSequenceType(item):
92 for item_ in item:
93 self.__iadd__(item_)
94 else:
95 if not hasattr(item, '_clf_internals'):
96 raise ValueError, "Cannot register %s " % item + \
97 "which has no _clf_internals defined"
98 if len(item._clf_internals) == 0:
99 raise ValueError, "Cannot register %s " % item + \
100 "which has empty _clf_internals"
101 clf_internals = Set(item._clf_internals)
102 if clf_internals.issubset(self.__known_tags):
103 self.__items.append(item)
104 self.__keys |= clf_internals
105 else:
106 raise ValueError, 'Unknown clf internal(s) %s' % \
107 clf_internals.difference(self.__known_tags)
108 return self
109
110 @property
113
116
117 @property
120
121 clfs = Warehouse(known_tags=_KNOWN_INTERNALS)
122
123
124
125
126
127
128
129 clfs += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
130 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
131 SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"),
132
133
134 ]
135
136 clfs += \
137 [ MulticlassClassifier(clfs['smlr'][0],
138 descr='Pairs+maxvote multiclass on ' + \
139 clfs['smlr'][0].descr) ]
140
141 if externals.exists('libsvm'):
142 from mvpa.clfs import libsvm
143 clfs += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
144 libsvm.SVM(
145 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
146 libsvm.SVM(
147 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
148 libsvm.SVM(svm_impl='NU_SVC',
149 descr="libsvm.LinNuSVM(nu=def)", probability=1)
150 ]
151 clfs += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"),
152 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC',
153 descr="libsvm.RbfNuSVM(nu=def)"),
154 libsvm.SVM(kernel_type='poly',
155 descr='libsvm.PolySVM()', probability=1),
156
157
158
159 ]
160
161 if externals.exists('shogun'):
162 from mvpa.clfs import sg
163
164
165 bad_classifiers = [
166 'mpd',
167
168
169
170 'gpbt',
171
172 'gmnp',
173 'svrlight',
174 'krr',
175 ]
176 if not externals.exists('sg_fixedcachesize'):
177
178 bad_classifiers.append('gnpp')
179
180 for impl in sg.svm.known_svm_impl:
181
182 if impl in bad_classifiers:
183 continue
184 clfs += [
185 sg.SVM(
186 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl),
187 sg.SVM(
188 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl),
189 sg.SVM(
190 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl),
191 ]
192 clfs += [
193 sg.SVM(kernel_type='RBF', descr="sg.RbfSVM()/%s" % impl, svm_impl=impl),
194
195
196 ]
197
198
199
200 if len(clfs['svm', 'linear']) > 0:
201
202 from mvpa.clfs.svm import *
203
204
205 if externals.exists('lars'):
206 import mvpa.clfs.lars as lars
207 from mvpa.clfs.lars import LARS
208 for model in lars.known_models:
209
210 lars = LARS(descr="LARS(%s)" % model, model_type=model)
211 clfs += lars
212
213
214
215 clfs += kNN(k=5, descr="kNN(k=5)")
216
217
218 clfs += GPR(descr="GPR()")
219
220
221 clfs += \
222 FeatureSelectionClassifier(
223 LinearCSVMC(),
224 SensitivityBasedFeatureSelection(
225 SMLRWeights(SMLR(lm=1.0, implementation="C")),
226 RangeElementSelector(mode='select')),
227 descr="LinSVM on SMLR(lm=1) non-0")
228
229
230
231 clfs += \
232 FeatureSelectionClassifier(
233 LinearCSVMC(),
234 SensitivityBasedFeatureSelection(
235 SMLRWeights(SMLR(lm=1.0, implementation="C")),
236 RangeElementSelector(mode='select')),
237 descr="LinSVM on SMLR(lm=1) non-0")
238
239
240
241 clfs += \
242 FeatureSelectionClassifier(
243 RbfCSVMC(),
244 SensitivityBasedFeatureSelection(
245 SMLRWeights(SMLR(lm=1.0, implementation="C")),
246 RangeElementSelector(mode='select')),
247 descr="RbfSVM on SMLR(lm=1) non-0")
248
249 clfs += \
250 FeatureSelectionClassifier(
251 kNN(),
252 SensitivityBasedFeatureSelection(
253 SMLRWeights(SMLR(lm=1.0, implementation="C")),
254 RangeElementSelector(mode='select')),
255 descr="kNN on SMLR(lm=1) non-0")
256
257 clfs += \
258 FeatureSelectionClassifier(
259 kNN(),
260 SensitivityBasedFeatureSelection(
261 OneWayAnova(),
262 FractionTailSelector(0.05, mode='select', tail='upper')),
263 descr="kNN on 5%(ANOVA)")
264
265 clfs += \
266 FeatureSelectionClassifier(
267 kNN(),
268 SensitivityBasedFeatureSelection(
269 OneWayAnova(),
270 FixedNElementTailSelector(50, mode='select', tail='upper')),
271 descr="kNN on 50(ANOVA)")
272
273 clfs += \
274 FeatureSelectionClassifier(
275 LinearCSVMC(),
276 SensitivityBasedFeatureSelection(
277 OneWayAnova(),
278 FractionTailSelector(0.05, mode='select', tail='upper')),
279 descr="LinSVM on 5%(ANOVA)")
280
281 clfs += \
282 FeatureSelectionClassifier(
283 LinearCSVMC(),
284 SensitivityBasedFeatureSelection(
285 OneWayAnova(),
286 FixedNElementTailSelector(50, mode='select', tail='upper')),
287 descr="LinSVM on 50(ANOVA)")
288
289 sample_linear_svm = clfs['linear', 'svm'][0]
290
291 clfs += \
292 FeatureSelectionClassifier(
293 sample_linear_svm,
294 SensitivityBasedFeatureSelection(
295 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute),
296 FractionTailSelector(0.05, mode='select', tail='upper')),
297 descr="LinSVM on 5%(SVM)")
298
299 clfs += \
300 FeatureSelectionClassifier(
301 sample_linear_svm,
302 SensitivityBasedFeatureSelection(
303 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute),
304 FixedNElementTailSelector(50, mode='select', tail='upper')),
305 descr="LinSVM on 50(SVM)")
306
307
308
309
310
311
312 rfesvm_split = SplitClassifier(LinearCSVMC())
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354 rfesvm = LinearCSVMC()
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392