Package mvpa :: Package clfs :: Module warehouse
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.warehouse

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Collection of classifiers to ease the exploration. 
 10  """ 
 11   
 12  __docformat__ = 'restructuredtext' 
 13   
 14  from sets import Set 
 15  import operator 
 16   
 17  # Data 
 18  from mvpa.datasets.splitter import OddEvenSplitter 
 19   
 20  # Define sets of classifiers 
 21  from mvpa.clfs.base import FeatureSelectionClassifier, SplitClassifier, \ 
 22                                   MulticlassClassifier 
 23  from mvpa.clfs.smlr import SMLR 
 24  from mvpa.clfs.knn import kNN 
 25  from mvpa.clfs.gpr import GPR 
 26   
 27  # Helpers 
 28  from mvpa.clfs.transerror import TransferError 
 29  from mvpa.base import externals 
 30  from mvpa.measures.anova import OneWayAnova 
 31  from mvpa.misc.transformers import Absolute 
 32  from mvpa.featsel.rfe import RFE 
 33  from mvpa.clfs.smlr import SMLRWeights 
 34  from mvpa.featsel.helpers import FractionTailSelector, \ 
 35      FixedNElementTailSelector, RangeElementSelector, \ 
 36      FixedErrorThresholdStopCrit 
 37  from mvpa.clfs.transerror import ConfusionBasedError 
 38  from mvpa.featsel.base import SensitivityBasedFeatureSelection 
 39   
 40  _KNOWN_INTERNALS=[ 'knn', 'binary', 'svm', 'linear', 
 41          'smlr', 'does_feature_selection', 'has_sensitivity', 
 42          'multiclass', 'non-linear', 'kernel-based', 'lars', 
 43          'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr' ] 
44 45 -class Warehouse(object):
46 """Class to keep known instantiated classifiers 47 48 Should provide easy ways to select classifiers of needed kind: 49 clfs['linear', 'svm'] should return all linear SVMs 50 clfs['linear', 'multiclass'] should return all linear classifiers 51 capable of doing multiclass classification 52 """ 53
54 - def __init__(self, known_tags=None):
55 self.__known_tags = Set(known_tags) 56 self.__items = [] 57 self.__keys = Set()
58
59 - def __getitem__(self, *args):
60 if isinstance(args[0], tuple): 61 args = args[0] 62 63 # so we explicitely handle [:] 64 if args == (slice(None),): 65 args = [] 66 67 # lets remove optional modifier '!' 68 dargs = Set([x.lstrip('!') for x in args]).difference( 69 self.__known_tags) 70 71 if len(dargs)>0: 72 raise ValueError, "Unknown internals %s requested. Known are %s" % \ 73 (list(dargs), list(self.__known_tags)) 74 75 # dummy implementation for now 76 result = [] 77 for item in self.__items: 78 good = True 79 for arg in args: 80 if (arg.startswith('!') and \ 81 (arg[1:] in item._clf_internals)) or \ 82 (not arg.startswith('!') and \ 83 (not (arg in item._clf_internals))): 84 good = False 85 break 86 if good: 87 result.append(item) 88 return result
89
90 - def __iadd__(self, item):
91 if operator.isSequenceType(item): 92 for item_ in item: 93 self.__iadd__(item_) 94 else: 95 if not hasattr(item, '_clf_internals'): 96 raise ValueError, "Cannot register %s " % item + \ 97 "which has no _clf_internals defined" 98 if len(item._clf_internals) == 0: 99 raise ValueError, "Cannot register %s " % item + \ 100 "which has empty _clf_internals" 101 clf_internals = Set(item._clf_internals) 102 if clf_internals.issubset(self.__known_tags): 103 self.__items.append(item) 104 self.__keys |= clf_internals 105 else: 106 raise ValueError, 'Unknown clf internal(s) %s' % \ 107 clf_internals.difference(self.__known_tags) 108 return self
109 110 @property
111 - def internals(self):
112 return self.__keys
113
114 - def listing(self):
115 return [(x.descr, x._clf_internals) for x in self.__items]
116 117 @property
118 - def items(self):
119 return self.__items
120 121 clfs = Warehouse(known_tags=_KNOWN_INTERNALS) 122 123 # NB: 124 # - Nu-classifiers are turned off since for haxby DS default nu 125 # is an 'infisible' one 126 # - Python's SMLR is turned off for the duration of development 127 # since it is slow and results should be the same as of C version 128 # 129 clfs += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"), 130 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"), 131 SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"), 132 #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"), 133 # SMLR(implementation="Python", descr="SMLR(Python)") 134 ] 135 136 clfs += \ 137 [ MulticlassClassifier(clfs['smlr'][0], 138 descr='Pairs+maxvote multiclass on ' + \ 139 clfs['smlr'][0].descr) ] 140 141 if externals.exists('libsvm'): 142 from mvpa.clfs import libsvm 143 clfs += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1), 144 libsvm.SVM( 145 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1), 146 libsvm.SVM( 147 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1), 148 libsvm.SVM(svm_impl='NU_SVC', 149 descr="libsvm.LinNuSVM(nu=def)", probability=1) 150 ] 151 clfs += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"), 152 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC', 153 descr="libsvm.RbfNuSVM(nu=def)"), 154 libsvm.SVM(kernel_type='poly', 155 descr='libsvm.PolySVM()', probability=1), 156 #libsvm.svm.SVM(kernel_type='sigmoid', 157 # svm_impl='C_SVC', 158 # descr='libsvm.SigmoidSVM()'), 159 ] 160 161 if externals.exists('shogun'): 162 from mvpa.clfs import sg 163 # some classifiers are not yet ready to be used out-of-the-box in 164 # PyMVPA, thus we don't populate warehouse with their instances 165 bad_classifiers = [ 166 'mpd', # was segfault, now non-training on testcases, and XOR. 167 # and was described as "for educational purposes", thus 168 # shouldn't be used for real data ;-) 169 # Should be a drop-in replacement for lightsvm 170 'gpbt', # fails to train for testAnalyzerWithSplitClassifier 171 # also 'retraining' doesn't work -- fails to generalize 172 'gmnp', # would fail with 'assertion Cache_Size > 2' if shogun < 0.6.3, also refuses to train 173 'svrlight', # fails to 'generalize' as a binary classifier after 'binning' 174 'krr', # fails to generalize 175 ] 176 if not externals.exists('sg_fixedcachesize'): 177 # would fail with 'assertion Cache_Size > 2' if shogun < 0.6.3 178 bad_classifiers.append('gnpp') 179 180 for impl in sg.svm.known_svm_impl: 181 # Uncomment the ones to disable 182 if impl in bad_classifiers: 183 continue 184 clfs += [ 185 sg.SVM( 186 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl), 187 sg.SVM( 188 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl), 189 sg.SVM( 190 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl), 191 ] 192 clfs += [ 193 sg.SVM(kernel_type='RBF', descr="sg.RbfSVM()/%s" % impl, svm_impl=impl), 194 # sg.SVM(kernel_type='RBF', descr="sg.RbfSVM(gamma=0.1)/%s" % impl, svm_impl=impl, gamma=0.1), 195 # sg.SVM(descr="sg.SigmoidSVM()/%s" % impl, svm_impl=impl, kernel_type="sigmoid"), 196 ] 197 198 199 200 if len(clfs['svm', 'linear']) > 0: 201 # if any SVM implementation is known, import default ones 202 from mvpa.clfs.svm import * 203 204 # lars from R via RPy 205 if externals.exists('lars'): 206 import mvpa.clfs.lars as lars 207 from mvpa.clfs.lars import LARS 208 for model in lars.known_models: 209 # XXX create proper repository of classifiers! 210 lars = LARS(descr="LARS(%s)" % model, model_type=model) 211 clfs += lars 212 # clfs += MulticlassClassifier(lars, descr='Multiclass %s' % lars.descr) 213 214 # kNN 215 clfs += kNN(k=5, descr="kNN(k=5)") 216 217 # GPR 218 clfs += GPR(descr="GPR()") 219 220 # "Interesting" classifiers 221 clfs += \ 222 FeatureSelectionClassifier( 223 LinearCSVMC(), 224 SensitivityBasedFeatureSelection( 225 SMLRWeights(SMLR(lm=1.0, implementation="C")), 226 RangeElementSelector(mode='select')), 227 descr="LinSVM on SMLR(lm=1) non-0") 228 229 230 # "Interesting" classifiers 231 clfs += \ 232 FeatureSelectionClassifier( 233 LinearCSVMC(), 234 SensitivityBasedFeatureSelection( 235 SMLRWeights(SMLR(lm=1.0, implementation="C")), 236 RangeElementSelector(mode='select')), 237 descr="LinSVM on SMLR(lm=1) non-0") 238 239 240 # "Interesting" classifiers 241 clfs += \ 242 FeatureSelectionClassifier( 243 RbfCSVMC(), 244 SensitivityBasedFeatureSelection( 245 SMLRWeights(SMLR(lm=1.0, implementation="C")), 246 RangeElementSelector(mode='select')), 247 descr="RbfSVM on SMLR(lm=1) non-0") 248 249 clfs += \ 250 FeatureSelectionClassifier( 251 kNN(), 252 SensitivityBasedFeatureSelection( 253 SMLRWeights(SMLR(lm=1.0, implementation="C")), 254 RangeElementSelector(mode='select')), 255 descr="kNN on SMLR(lm=1) non-0") 256 257 clfs += \ 258 FeatureSelectionClassifier( 259 kNN(), 260 SensitivityBasedFeatureSelection( 261 OneWayAnova(), 262 FractionTailSelector(0.05, mode='select', tail='upper')), 263 descr="kNN on 5%(ANOVA)") 264 265 clfs += \ 266 FeatureSelectionClassifier( 267 kNN(), 268 SensitivityBasedFeatureSelection( 269 OneWayAnova(), 270 FixedNElementTailSelector(50, mode='select', tail='upper')), 271 descr="kNN on 50(ANOVA)") 272 273 clfs += \ 274 FeatureSelectionClassifier( 275 LinearCSVMC(), 276 SensitivityBasedFeatureSelection( 277 OneWayAnova(), 278 FractionTailSelector(0.05, mode='select', tail='upper')), 279 descr="LinSVM on 5%(ANOVA)") 280 281 clfs += \ 282 FeatureSelectionClassifier( 283 LinearCSVMC(), 284 SensitivityBasedFeatureSelection( 285 OneWayAnova(), 286 FixedNElementTailSelector(50, mode='select', tail='upper')), 287 descr="LinSVM on 50(ANOVA)") 288 289 sample_linear_svm = clfs['linear', 'svm'][0] 290 291 clfs += \ 292 FeatureSelectionClassifier( 293 sample_linear_svm, 294 SensitivityBasedFeatureSelection( 295 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute), 296 FractionTailSelector(0.05, mode='select', tail='upper')), 297 descr="LinSVM on 5%(SVM)") 298 299 clfs += \ 300 FeatureSelectionClassifier( 301 sample_linear_svm, 302 SensitivityBasedFeatureSelection( 303 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute), 304 FixedNElementTailSelector(50, mode='select', tail='upper')), 305 descr="LinSVM on 50(SVM)") 306 307 308 # SVM with unbiased RFE -- transfer-error to another splits, or in 309 # other terms leave-1-out error on the same dataset 310 # Has to be bound outside of the RFE definition since both analyzer and 311 # error should use the same instance. 312 rfesvm_split = SplitClassifier(LinearCSVMC())#clfs['LinearSVMC'][0]) 313 314 # "Almost" classical RFE. If this works it would differ only that 315 # our transfer_error is based on internal splitting and classifier used 316 # within RFE is a split classifier and its sensitivities per split will get 317 # averaged 318 # 319 320 #clfs += \ 321 # FeatureSelectionClassifier( 322 # clf = LinearCSVMC(), #clfs['LinearSVMC'][0], # we train LinearSVM 323 # feature_selection = RFE( # on features selected via RFE 324 # # based on sensitivity of a clf which does splitting internally 325 # sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer(), 326 # transfer_error=ConfusionBasedError( 327 # rfesvm_split, 328 # confusion_state="training_confusions"), 329 # # and whose internal error we use 330 # feature_selector=FractionTailSelector( 331 # 0.2, mode='discard', tail='lower'), 332 # # remove 20% of features at each step 333 # update_sensitivity=True), 334 # # update sensitivity at each step 335 # descr='LinSVM+RFE(splits_avg)' ) 336 # 337 #clfs += \ 338 # FeatureSelectionClassifier( 339 # clf = LinearCSVMC(), #clfs['LinearSVMC'][0], # we train LinearSVM 340 # feature_selection = RFE( # on features selected via RFE 341 # # based on sensitivity of a clf which does splitting internally 342 # sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer(), 343 # transfer_error=ConfusionBasedError( 344 # rfesvm_split, 345 # confusion_state="training_confusions"), 346 # # and whose internal error we use 347 # feature_selector=FractionTailSelector( 348 # 0.2, mode='discard', tail='lower'), 349 # # remove 20% of features at each step 350 # update_sensitivity=False), 351 # # update sensitivity at each step 352 # descr='LinSVM+RFE(splits_avg,static)' ) 353 354 rfesvm = LinearCSVMC() 355 356 # This classifier will do RFE while taking transfer error to testing 357 # set of that split. Resultant classifier is voted classifier on top 358 # of all splits, let see what that would do ;-) 359 #clfs += \ 360 # SplitClassifier( # which does splitting internally 361 # FeatureSelectionClassifier( 362 # clf = LinearCSVMC(), 363 # feature_selection = RFE( # on features selected via RFE 364 # sensitivity_analyzer=\ 365 # rfesvm.getSensitivityAnalyzer(transformer=Absolute), 366 # transfer_error=TransferError(rfesvm), 367 # stopping_criterion=FixedErrorThresholdStopCrit(0.05), 368 # feature_selector=FractionTailSelector( 369 # 0.2, mode='discard', tail='lower'), 370 # # remove 20% of features at each step 371 # update_sensitivity=True)), 372 # # update sensitivity at each step 373 # descr='LinSVM+RFE(N-Fold)') 374 # 375 # 376 #clfs += \ 377 # SplitClassifier( # which does splitting internally 378 # FeatureSelectionClassifier( 379 # clf = LinearCSVMC(), 380 # feature_selection = RFE( # on features selected via RFE 381 # sensitivity_analyzer=\ 382 # rfesvm.getSensitivityAnalyzer(transformer=Absolute), 383 # transfer_error=TransferError(rfesvm), 384 # stopping_criterion=FixedErrorThresholdStopCrit(0.05), 385 # feature_selector=FractionTailSelector( 386 # 0.2, mode='discard', tail='lower'), 387 # # remove 20% of features at each step 388 # update_sensitivity=True)), 389 # # update sensitivity at each step 390 # splitter = OddEvenSplitter(), 391 # descr='LinSVM+RFE(OddEven)') 392