1
2
3
4
5
6
7
8
9 """Collection of classifiers to ease the exploration.
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from sets import Set
15 import operator
16
17
18 from mvpa.clfs.meta import FeatureSelectionClassifier, SplitClassifier, \
19 MulticlassClassifier
20 from mvpa.clfs.smlr import SMLR
21 from mvpa.clfs.knn import kNN
22 from mvpa.clfs.gnb import GNB
23 from mvpa.clfs.kernel import KernelLinear, KernelSquaredExponential
24
25
26 from mvpa.base import externals, cfg
27 from mvpa.measures.anova import OneWayAnova
28 from mvpa.misc.transformers import Absolute
29 from mvpa.clfs.smlr import SMLRWeights
30 from mvpa.featsel.helpers import FractionTailSelector, \
31 FixedNElementTailSelector, RangeElementSelector
32
33 from mvpa.featsel.base import SensitivityBasedFeatureSelection
34
35 _KNOWN_INTERNALS = [ 'knn', 'binary', 'svm', 'linear',
36 'smlr', 'does_feature_selection', 'has_sensitivity',
37 'multiclass', 'non-linear', 'kernel-based', 'lars',
38 'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr',
39 'notrain2predict', 'ridge', 'blr', 'gnpp', 'enet', 'glmnet',
40 'gnb']
43 """Class to keep known instantiated classifiers
44
45 Should provide easy ways to select classifiers of needed kind:
46 clfswh['linear', 'svm'] should return all linear SVMs
47 clfswh['linear', 'multiclass'] should return all linear classifiers
48 capable of doing multiclass classification
49 """
50
51 - def __init__(self, known_tags=None, matches=None):
52 """Initialize warehouse
53
54 :Parameters:
55 known_tags : list of basestring
56 List of known tags
57 matches : dict
58 Optional dictionary of additional matches. E.g. since any
59 regression can be used as a binary classifier,
60 matches={'binary':['regression']}, would allow to provide
61 regressions also if 'binary' was requested
62 """
63 self._known_tags = Set(known_tags)
64 self.__items = []
65 self.__keys = Set()
66 if matches is None:
67 matches = {}
68 self.__matches = matches
69
71 if isinstance(args[0], tuple):
72 args = args[0]
73
74
75 if args == (slice(None),):
76 args = []
77
78
79 dargs = Set([str(x).lstrip('!') for x in args]).difference(
80 self._known_tags)
81
82 if len(dargs)>0:
83 raise ValueError, "Unknown internals %s requested. Known are %s" % \
84 (list(dargs), list(self._known_tags))
85
86
87 result = []
88
89 for item in self.__items:
90 good = True
91
92 for arg in args:
93
94 if arg.startswith('!'):
95 if (arg[1:] in item._clf_internals):
96 good = False
97 break
98 else:
99 continue
100
101 found = False
102 for arg in [arg] + self.__matches.get(arg, []):
103 if (arg in item._clf_internals):
104 found = True
105 break
106 good = found
107 if not good:
108 break
109 if good:
110 result.append(item)
111 return result
112
114 if operator.isSequenceType(item):
115 for item_ in item:
116 self.__iadd__(item_)
117 else:
118 if not hasattr(item, '_clf_internals'):
119 raise ValueError, "Cannot register %s " % item + \
120 "which has no _clf_internals defined"
121 if len(item._clf_internals) == 0:
122 raise ValueError, "Cannot register %s " % item + \
123 "which has empty _clf_internals"
124 clf_internals = Set(item._clf_internals)
125 if clf_internals.issubset(self._known_tags):
126 self.__items.append(item)
127 self.__keys |= clf_internals
128 else:
129 raise ValueError, 'Unknown clf internal(s) %s' % \
130 clf_internals.difference(self._known_tags)
131 return self
132
133 @property
135 """Known internal tags of the classifiers
136 """
137 return self.__keys
138
140 """Listing (description + internals) of registered items
141 """
142 return [(x.descr, x._clf_internals) for x in self.__items]
143
144 @property
146 """Registered items
147 """
148 return self.__items
149
150 clfswh = Warehouse(known_tags=_KNOWN_INTERNALS)
151 regrswh = Warehouse(known_tags=_KNOWN_INTERNALS)
152
153
154
155
156
157
158
159 clfswh += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
160 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
161
162
163
164 ]
165
166 clfswh += \
167 [ MulticlassClassifier(clfswh['smlr'][0],
168 descr='Pairs+maxvote multiclass on ' + \
169 clfswh['smlr'][0].descr) ]
170
171 if externals.exists('libsvm'):
172 from mvpa.clfs import libsvmc as libsvm
173 clfswh._known_tags.union_update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())
174 clfswh += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
175 libsvm.SVM(
176 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
177 libsvm.SVM(
178 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
179 libsvm.SVM(svm_impl='NU_SVC',
180 descr="libsvm.LinNuSVM(nu=def)", probability=1)
181 ]
182 clfswh += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"),
183 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC',
184 descr="libsvm.RbfNuSVM(nu=def)"),
185 libsvm.SVM(kernel_type='poly',
186 descr='libsvm.PolySVM()', probability=1),
187
188
189
190 ]
191
192
193 regrswh._known_tags.union_update(['EPSILON_SVR', 'NU_SVR'])
194 regrswh += [libsvm.SVM(svm_impl='EPSILON_SVR', descr='libsvm epsilon-SVR',
195 regression=True),
196 libsvm.SVM(svm_impl='NU_SVR', descr='libsvm nu-SVR',
197 regression=True)]
198
199 if externals.exists('shogun'):
200 from mvpa.clfs import sg
201 clfswh._known_tags.union_update(sg.SVM._KNOWN_IMPLEMENTATIONS)
202
203
204
205 bad_classifiers = [
206 'mpd',
207
208
209
210 'gpbt',
211
212 'gmnp',
213
214 'svrlight',
215
216 'krr',
217 ]
218 if not externals.exists('sg_fixedcachesize'):
219
220 bad_classifiers.append('gnpp')
221
222 for impl in sg.SVM._KNOWN_IMPLEMENTATIONS:
223
224 if impl in bad_classifiers:
225 continue
226 clfswh += [
227 sg.SVM(
228 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl),
229 sg.SVM(
230 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl),
231 sg.SVM(
232 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl),
233 ]
234 clfswh += [
235 sg.SVM(kernel_type='RBF',
236 descr="sg.RbfSVM()/%s" % impl, svm_impl=impl),
237
238
239
240
241
242 ]
243
244 _optional_regressions = []
245 if externals.exists('shogun.krr'):
246 _optional_regressions += ['krr']
247 for impl in ['libsvr'] + _optional_regressions:
248
249
250 regrswh._known_tags.union_update([impl])
251 regrswh += [ sg.SVM(svm_impl=impl, descr='sg.LinSVMR()/%s' % impl,
252 regression=True),
253
254
255
256 ]
257
258 if len(clfswh['svm', 'linear']) > 0:
259
260 from mvpa.clfs.svm import *
261
262
263 if externals.exists('lars'):
264 import mvpa.clfs.lars as lars
265 from mvpa.clfs.lars import LARS
266 for model in lars.known_models:
267
268 lars_clf = LARS(descr="LARS(%s)" % model, model_type=model)
269 clfswh += lars_clf
270
271
272 lars_regr = LARS(descr="_LARS(%s, regression=True)" % model,
273 regression=True, model_type=model)
274 regrswh += lars_regr
275
276
277
278
279
280
281
282
283
284
285
286
287 if externals.exists('glmnet'):
288 from mvpa.clfs.glmnet import GLMNET_C, GLMNET_R
289 clfswh += GLMNET_C(descr="GLMNET_C()")
290 regrswh += GLMNET_R(descr="GLMNET_R()")
291
292
293 clfswh += kNN(k=5, descr="kNN(k=5)")
294 clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')")
295
296 clfswh += \
297 FeatureSelectionClassifier(
298 kNN(),
299 SensitivityBasedFeatureSelection(
300 SMLRWeights(SMLR(lm=1.0, implementation="C")),
301 RangeElementSelector(mode='select')),
302 descr="kNN on SMLR(lm=1) non-0")
303
304 clfswh += \
305 FeatureSelectionClassifier(
306 kNN(),
307 SensitivityBasedFeatureSelection(
308 OneWayAnova(),
309 FractionTailSelector(0.05, mode='select', tail='upper')),
310 descr="kNN on 5%(ANOVA)")
311
312 clfswh += \
313 FeatureSelectionClassifier(
314 kNN(),
315 SensitivityBasedFeatureSelection(
316 OneWayAnova(),
317 FixedNElementTailSelector(50, mode='select', tail='upper')),
318 descr="kNN on 50(ANOVA)")
319
320
321
322 clfswh += GNB(descr="GNB()")
323 clfswh += GNB(common_variance=True, descr="GNB(common_variance=True)")
324 clfswh += GNB(prior='uniform', descr="GNB(prior='uniform')")
325 clfswh += \
326 FeatureSelectionClassifier(
327 GNB(),
328 SensitivityBasedFeatureSelection(
329 OneWayAnova(),
330 FractionTailSelector(0.05, mode='select', tail='upper')),
331 descr="GNB on 5%(ANOVA)")
332
333
334
335 if externals.exists('scipy'):
336 from mvpa.clfs.gpr import GPR
337
338 clfswh += GPR(kernel=KernelLinear(), descr="GPR(kernel='linear')")
339 clfswh += GPR(kernel=KernelSquaredExponential(),
340 descr="GPR(kernel='sqexp')")
341
342
343 from mvpa.clfs.blr import BLR
344 clfswh += BLR(descr="BLR()")
345
346
347
348
349 if len(clfswh['linear', 'svm']) > 0:
350
351 linearSVMC = clfswh['linear', 'svm',
352 cfg.get('svm', 'backend', default='libsvm').lower()
353 ][0]
354
355
356 clfswh += \
357 FeatureSelectionClassifier(
358 linearSVMC.clone(),
359 SensitivityBasedFeatureSelection(
360 SMLRWeights(SMLR(lm=0.1, implementation="C")),
361 RangeElementSelector(mode='select')),
362 descr="LinSVM on SMLR(lm=0.1) non-0")
363
364
365 clfswh += \
366 FeatureSelectionClassifier(
367 linearSVMC.clone(),
368 SensitivityBasedFeatureSelection(
369 SMLRWeights(SMLR(lm=1.0, implementation="C")),
370 RangeElementSelector(mode='select')),
371 descr="LinSVM on SMLR(lm=1) non-0")
372
373
374
375 clfswh += \
376 FeatureSelectionClassifier(
377 RbfCSVMC(),
378 SensitivityBasedFeatureSelection(
379 SMLRWeights(SMLR(lm=1.0, implementation="C")),
380 RangeElementSelector(mode='select')),
381 descr="RbfSVM on SMLR(lm=1) non-0")
382
383 clfswh += \
384 FeatureSelectionClassifier(
385 linearSVMC.clone(),
386 SensitivityBasedFeatureSelection(
387 OneWayAnova(),
388 FractionTailSelector(0.05, mode='select', tail='upper')),
389 descr="LinSVM on 5%(ANOVA)")
390
391 clfswh += \
392 FeatureSelectionClassifier(
393 linearSVMC.clone(),
394 SensitivityBasedFeatureSelection(
395 OneWayAnova(),
396 FixedNElementTailSelector(50, mode='select', tail='upper')),
397 descr="LinSVM on 50(ANOVA)")
398
399 clfswh += \
400 FeatureSelectionClassifier(
401 linearSVMC.clone(),
402 SensitivityBasedFeatureSelection(
403 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
404 FractionTailSelector(0.05, mode='select', tail='upper')),
405 descr="LinSVM on 5%(SVM)")
406
407 clfswh += \
408 FeatureSelectionClassifier(
409 linearSVMC.clone(),
410 SensitivityBasedFeatureSelection(
411 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
412 FixedNElementTailSelector(50, mode='select', tail='upper')),
413 descr="LinSVM on 50(SVM)")
414
415
416
417
418
419
420
421
422
423
424
425
426
427 rfesvm_split = SplitClassifier(linearSVMC)
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469 rfesvm = LinearCSVMC()
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507