1
2
3
4
5
6
7
8
9 """Collection of classifiers to ease the exploration.
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from sets import Set
15 import operator
16
17
18 from mvpa.datasets.splitter import OddEvenSplitter
19
20
21 from mvpa.clfs.base import FeatureSelectionClassifier, SplitClassifier, \
22 MulticlassClassifier
23 from mvpa.clfs.smlr import SMLR
24 from mvpa.clfs.knn import kNN
25 from mvpa.clfs.kernel import KernelLinear, KernelSquaredExponential
26
27
28 from mvpa.clfs.transerror import TransferError
29 from mvpa.base import externals, cfg
30 from mvpa.measures.anova import OneWayAnova
31 from mvpa.misc.transformers import Absolute
32 from mvpa.featsel.rfe import RFE
33 from mvpa.clfs.smlr import SMLRWeights
34 from mvpa.featsel.helpers import FractionTailSelector, \
35 FixedNElementTailSelector, RangeElementSelector, \
36 FixedErrorThresholdStopCrit
37 from mvpa.clfs.transerror import ConfusionBasedError
38 from mvpa.featsel.base import SensitivityBasedFeatureSelection
39
40 _KNOWN_INTERNALS=[ 'knn', 'binary', 'svm', 'linear',
41 'smlr', 'does_feature_selection', 'has_sensitivity',
42 'multiclass', 'non-linear', 'kernel-based', 'lars',
43 'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr',
44 'notrain2predict', 'ridge', 'blr', 'gnpp']
47 """Class to keep known instantiated classifiers
48
49 Should provide easy ways to select classifiers of needed kind:
50 clfs['linear', 'svm'] should return all linear SVMs
51 clfs['linear', 'multiclass'] should return all linear classifiers
52 capable of doing multiclass classification
53 """
54
55 - def __init__(self, known_tags=None, matches={}):
56 """Initialize warehouse
57
58 :Parameters:
59 known_tags : list of basestring
60 List of known tags
61 matches : dict
62 Optional dictionary of additional matches. E.g. since any
63 regression can be used as a binary classifier,
64 matches={'binary':['regression']}, would allow to provide
65 regressions also if 'binary' was requested
66 """
67 self._known_tags = Set(known_tags)
68 self.__items = []
69 self.__keys = Set()
70 self.__matches = matches
71
73 if isinstance(args[0], tuple):
74 args = args[0]
75
76
77 if args == (slice(None),):
78 args = []
79
80
81 dargs = Set([x.lstrip('!') for x in args]).difference(
82 self._known_tags)
83
84 if len(dargs)>0:
85 raise ValueError, "Unknown internals %s requested. Known are %s" % \
86 (list(dargs), list(self._known_tags))
87
88
89 result = []
90
91 for item in self.__items:
92 good = True
93
94 for arg in args:
95
96 if arg.startswith('!'):
97 if (arg[1:] in item._clf_internals):
98 good = False
99 break
100 else:
101 continue
102
103 found = False
104 for arg in [arg] + self.__matches.get(arg, []):
105 if (arg in item._clf_internals):
106 found = True
107 break
108 good = found
109 if not good:
110 break
111 if good:
112 result.append(item)
113 return result
114
116 if operator.isSequenceType(item):
117 for item_ in item:
118 self.__iadd__(item_)
119 else:
120 if not hasattr(item, '_clf_internals'):
121 raise ValueError, "Cannot register %s " % item + \
122 "which has no _clf_internals defined"
123 if len(item._clf_internals) == 0:
124 raise ValueError, "Cannot register %s " % item + \
125 "which has empty _clf_internals"
126 clf_internals = Set(item._clf_internals)
127 if clf_internals.issubset(self._known_tags):
128 self.__items.append(item)
129 self.__keys |= clf_internals
130 else:
131 raise ValueError, 'Unknown clf internal(s) %s' % \
132 clf_internals.difference(self._known_tags)
133 return self
134
135 @property
138
141
142 @property
145
146 clfs = Warehouse(known_tags=_KNOWN_INTERNALS)
147 regrs = Warehouse(known_tags=_KNOWN_INTERNALS)
148
149
150
151
152
153
154
155 clfs += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
156 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
157
158
159
160 ]
161
162 clfs += \
163 [ MulticlassClassifier(clfs['smlr'][0],
164 descr='Pairs+maxvote multiclass on ' + \
165 clfs['smlr'][0].descr) ]
166
167 if externals.exists('libsvm'):
168 from mvpa.clfs import libsvmc as libsvm
169 clfs._known_tags.union_update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())
170 clfs += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
171 libsvm.SVM(
172 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
173 libsvm.SVM(
174 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
175 libsvm.SVM(svm_impl='NU_SVC',
176 descr="libsvm.LinNuSVM(nu=def)", probability=1)
177 ]
178 clfs += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"),
179 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC',
180 descr="libsvm.RbfNuSVM(nu=def)"),
181 libsvm.SVM(kernel_type='poly',
182 descr='libsvm.PolySVM()', probability=1),
183
184
185
186 ]
187
188
189 regrs._known_tags.union_update(['EPSILON_SVR', 'NU_SVR'])
190 regrs += [libsvm.SVM(svm_impl='EPSILON_SVR', descr='libsvm epsilon-SVR',
191 regression=True),
192 libsvm.SVM(svm_impl='NU_SVR', descr='libsvm nu-SVR',
193 regression=True)]
194
195 if externals.exists('shogun'):
196 from mvpa.clfs import sg
197 clfs._known_tags.union_update(sg.SVM._KNOWN_IMPLEMENTATIONS)
198
199
200
201 bad_classifiers = [
202 'mpd',
203
204
205
206 'gpbt',
207
208 'gmnp',
209 'svrlight',
210 'krr',
211 ]
212 if not externals.exists('sg_fixedcachesize'):
213
214 bad_classifiers.append('gnpp')
215
216 for impl in sg.SVM._KNOWN_IMPLEMENTATIONS:
217
218 if impl in bad_classifiers:
219 continue
220 clfs += [
221 sg.SVM(
222 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl),
223 sg.SVM(
224 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl),
225 sg.SVM(
226 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl),
227 ]
228 clfs += [
229 sg.SVM(kernel_type='RBF', descr="sg.RbfSVM()/%s" % impl, svm_impl=impl),
230
231
232 ]
233
234 for impl in ['libsvr', 'krr']:
235
236
237 regrs._known_tags.union_update([impl])
238 regrs += [ sg.SVM(svm_impl=impl, descr='sg.LinSVMR()/%s' % impl,
239 regression=True),
240
241
242
243 ]
244
245 if len(clfs['svm', 'linear']) > 0:
246
247 from mvpa.clfs.svm import *
248
249
250 if externals.exists('lars'):
251 import mvpa.clfs.lars as lars
252 from mvpa.clfs.lars import LARS
253 for model in lars.known_models:
254
255 lars = LARS(descr="LARS(%s)" % model, model_type=model)
256 clfs += lars
257
258
259
260 clfs += kNN(k=5, descr="kNN(k=5)")
261
262 clfs += \
263 FeatureSelectionClassifier(
264 kNN(),
265 SensitivityBasedFeatureSelection(
266 SMLRWeights(SMLR(lm=1.0, implementation="C")),
267 RangeElementSelector(mode='select')),
268 descr="kNN on SMLR(lm=1) non-0")
269
270 clfs += \
271 FeatureSelectionClassifier(
272 kNN(),
273 SensitivityBasedFeatureSelection(
274 OneWayAnova(),
275 FractionTailSelector(0.05, mode='select', tail='upper')),
276 descr="kNN on 5%(ANOVA)")
277
278 clfs += \
279 FeatureSelectionClassifier(
280 kNN(),
281 SensitivityBasedFeatureSelection(
282 OneWayAnova(),
283 FixedNElementTailSelector(50, mode='select', tail='upper')),
284 descr="kNN on 50(ANOVA)")
285
286
287
288 if externals.exists('scipy'):
289 from mvpa.clfs.gpr import GPR
290
291 clfs += GPR(kernel=KernelLinear(), descr="GPR(kernel='linear')")
292 clfs += GPR(kernel=KernelSquaredExponential(), descr="GPR(kernel='sqexp')")
293
294
295 from mvpa.clfs.blr import BLR
296 clfs += BLR(descr="BLR()")
297
298
299
300
301 if len(clfs['linear', 'svm']) > 0:
302
303 linearSVMC = clfs['linear', 'svm',
304 cfg.get('svm', 'backend', default='libsvm').lower()
305 ][0]
306
307
308 clfs += \
309 FeatureSelectionClassifier(
310 linearSVMC,
311 SensitivityBasedFeatureSelection(
312 SMLRWeights(SMLR(lm=0.1, implementation="C")),
313 RangeElementSelector(mode='select')),
314 descr="LinSVM on SMLR(lm=0.1) non-0")
315
316
317 clfs += \
318 FeatureSelectionClassifier(
319 linearSVMC,
320 SensitivityBasedFeatureSelection(
321 SMLRWeights(SMLR(lm=1.0, implementation="C")),
322 RangeElementSelector(mode='select')),
323 descr="LinSVM on SMLR(lm=1) non-0")
324
325
326
327 clfs += \
328 FeatureSelectionClassifier(
329 RbfCSVMC(),
330 SensitivityBasedFeatureSelection(
331 SMLRWeights(SMLR(lm=1.0, implementation="C")),
332 RangeElementSelector(mode='select')),
333 descr="RbfSVM on SMLR(lm=1) non-0")
334
335 clfs += \
336 FeatureSelectionClassifier(
337 linearSVMC,
338 SensitivityBasedFeatureSelection(
339 OneWayAnova(),
340 FractionTailSelector(0.05, mode='select', tail='upper')),
341 descr="LinSVM on 5%(ANOVA)")
342
343 clfs += \
344 FeatureSelectionClassifier(
345 linearSVMC,
346 SensitivityBasedFeatureSelection(
347 OneWayAnova(),
348 FixedNElementTailSelector(50, mode='select', tail='upper')),
349 descr="LinSVM on 50(ANOVA)")
350
351 clfs += \
352 FeatureSelectionClassifier(
353 linearSVMC,
354 SensitivityBasedFeatureSelection(
355 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
356 FractionTailSelector(0.05, mode='select', tail='upper')),
357 descr="LinSVM on 5%(SVM)")
358
359 clfs += \
360 FeatureSelectionClassifier(
361 linearSVMC,
362 SensitivityBasedFeatureSelection(
363 linearSVMC.getSensitivityAnalyzer(transformer=Absolute),
364 FixedNElementTailSelector(50, mode='select', tail='upper')),
365 descr="LinSVM on 50(SVM)")
366
367
368
369
370
371
372 rfesvm_split = SplitClassifier(linearSVMC)
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414 rfesvm = LinearCSVMC()
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452