Package mvpa :: Package clfs :: Module _svmbase
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs._svmbase

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Common to all SVM implementations functionality. For internal use only""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from copy import deepcopy 
 16   
 17  from mvpa.misc import warning 
 18  from mvpa.clfs.base import Classifier 
 19  from mvpa.misc.param import Parameter 
 20   
 21  if __debug__: 
 22      from mvpa.misc import debug 
 23   
24 -class _SVM(Classifier):
25 """Support Vector Machine Classifier. 26 27 Base class for all external SVM implementations. 28 29 Derived classes should define: 30 31 * _KERNELS: map(dict) should define assignment to a tuple containing 32 implementation kernel type, list of parameters adherent to the 33 kernel, and sensitivity analyzer e.g.:: 34 35 _KERNELS = { 36 'linear': (shogun.Kernel.LinearKernel, (), LinearSVMWeights), 37 'rbf' : (shogun.Kernel.GaussianKernel, ('gamma',), None), 38 ... 39 } 40 41 """ 42 43 _ATTRIBUTE_COLLECTIONS = ['params', 'kernel_params'] # enforce presence of params collections 44 45 _SVM_PARAMS = { 46 'C' : Parameter(-1.0, descr='Trade-off parameter. High C -- rigid margin SVM'), 47 'nu' : Parameter(0.5, min=0.0, max=1.0, descr='Fraction of datapoints within the margin'), 48 'cache_size': Parameter(100, descr='Size of the kernel cache, specified in megabytes'), 49 'coef0': Parameter(0.5, descr='Offset coefficient in polynomial and sigmoid kernels'), 50 'degree': Parameter(3, descr='Degree of polynomial kernel'), 51 # init the parameter interface 52 'tube_epsilon': Parameter(0.1, descr='Epsilon in epsilon-insensitive loss function of epsilon-SVM regression (SVR)'), 53 'gamma': Parameter(0, descr='Scaling (width in RBF) within non-linear kernels'), 54 'tau': Parameter(1e-6, descr='TAU parameter of KRR regression in shogun'), 55 'max_shift': Parameter(10.0, min=0.0, descr='Maximal shift for SGs GaussianShiftKernel'), 56 'shift_step': Parameter(1.0, min=0.0, descr='Shift step for SGs GaussianShiftKernel'), 57 'probability': Parameter(0, descr='Flag to signal either probability estimate is obtained within LibSVM'), 58 'shrinking': Parameter(1, descr='Either shrinking is to be conducted'), 59 'weight_label': Parameter([], descr='???'), 60 'weight': Parameter([], descr='???'), 61 # For some reason setting up epsilong to 1e-5 slowed things down a bit 62 # in comparison to how it was before (in yoh/master) by up to 20%... not clear why 63 # may be related to 1e-3 default within _svm.py? 64 'epsilon': Parameter(5e-5, 65 min=1e-10, 66 descr='Tolerance of termination criterium') 67 } 68 69 70 _clf_internals = [ 'svm', 'kernel-based' ] 71
72 - def __init__(self, kernel_type='linear', **kwargs):
73 """Init base class of SVMs. *Not to be publicly used* 74 75 :Parameters: 76 kernel_type : basestr 77 String must be a valid key for cls._KERNELS 78 79 TODO: handling of parameters might migrate to be generic for 80 all classifiers. SVMs are choosen to be testbase for that 81 functionality to see how well it would fit. 82 """ 83 84 kernel_type = kernel_type.lower() 85 self._kernel_type_literal = kernel_type 86 if not kernel_type in self._KERNELS: 87 raise ValueError, "Unknown kernel " + kernel_type 88 89 # Add corresponding kernel parameters to 'known' depending on what 90 # kernel was chosen 91 if self._KERNELS[kernel_type][1] is not None: 92 # XXX need to do only if it is a class variable 93 self._KNOWN_KERNEL_PARAMS = \ 94 self._KNOWN_KERNEL_PARAMS[:] + list(self._KERNELS[kernel_type][1]) 95 96 # Assign per-instance _clf_internals 97 self._clf_internals = self._clf_internals[:] 98 if kernel_type == 'linear': 99 self._clf_internals += [ 'linear', 'has_sensitivity' ] 100 else: 101 self._clf_internals += [ 'non-linear' ] 102 103 # pop out all args from **kwargs which are known to be SVM parameters 104 _args = {} 105 for param in self._KNOWN_KERNEL_PARAMS + self._KNOWN_PARAMS: 106 if param in kwargs: 107 _args[param] = kwargs.pop(param) 108 109 try: 110 Classifier.__init__(self, **kwargs) 111 except TypeError, e: 112 if "__init__() got an unexpected keyword argument " in e.args[0]: 113 # TODO: make it even more specific -- if that argument is listed 114 # within _SVM_PARAMS 115 e.args = tuple( [e.args[0] + 116 "\n Given SVM instance knows following parameters: %s" % 117 self._KNOWN_PARAMS + 118 ", and kernel parameters: %s" % 119 self._KNOWN_KERNEL_PARAMS] + list(e.args)[1:]) 120 raise e 121 122 for paramfamily, paramset in ( (self._KNOWN_PARAMS, self.params), 123 (self._KNOWN_KERNEL_PARAMS, self.kernel_params)): 124 for paramname in paramfamily: 125 if not (paramname in self._SVM_PARAMS): 126 raise ValueError, "Unknown parameter %s" % paramname + \ 127 ". Known SVM params are: %s" % self._SVM_PARAMS.keys() 128 param = deepcopy(self._SVM_PARAMS[paramname]) 129 param.name = paramname 130 if paramname in _args: 131 param.value = _args[paramname] 132 # XXX might want to set default to it -- not just value 133 134 paramset.add(param) 135 136 # Some postchecks 137 if self.params.isKnown('weight') and self.params.isKnown('weight_label'): 138 if not len(self.weight_label) == len(self.weight): 139 raise ValueError, "Lenghts of 'weight' and 'weight_label' lists" \ 140 "must be equal." 141 142 self._kernel_type = self._KERNELS[kernel_type][0] 143 if __debug__: 144 debug("SVM", "Initialized %s with kernel %s:%s" % 145 (id(self), kernel_type, self._kernel_type))
146 147
148 - def __repr__(self):
149 """Definition of the object summary over the object 150 """ 151 res = "%s(kernel_type='%s'" % (self.__class__.__name__, self._kernel_type_literal) 152 sep = ", " 153 for col in [self.params, self.kernel_params]: 154 for k in col.names: 155 # list only params with not default values 156 if col[k].isDefault: continue 157 res += "%s%s=%s" % (sep, k, col[k].value) 158 #sep = ', ' 159 for name, invert in ( ('enable', False), ('disable', True) ): 160 states = self.states._getEnabled(nondefault=False, invert=invert) 161 if len(states): 162 res += sep + "%s_states=%s" % (name, str(states)) 163 164 res += ")" 165 return res
166 167
168 - def _getDefaultC(self, data):
169 """Compute default C 170 171 TODO: for non-linear SVMs 172 """ 173 174 if self._kernel_type_literal == 'linear': 175 datasetnorm = N.mean(N.sqrt(N.sum(data*data, axis=1))) 176 value = 1.0/(datasetnorm*datasetnorm) 177 if __debug__: 178 debug("SVM", "Default C computed to be %f" % value) 179 else: 180 warning("TODO: Computation of default C is not yet implemented" + 181 " for non-linear SVMs. Assigning 1.0") 182 value = 1.0 183 184 return value
185 186
187 - def _getDefaultGamma(self, dataset):
188 """Compute default Gamma 189 190 TODO: unify bloody libsvm interface so it makes use of this function. 191 Now it is computed within SVMModel.__init__ 192 """ 193 194 if self.kernel_params.isKnown('gamma'): 195 value = 1.0 / len(dataset.uniquelabels) 196 if __debug__: 197 debug("SVM", "Default Gamma is computed to be %f" % value) 198 else: 199 raise RuntimeError, "Shouldn't ask for default Gamma here" 200 201 return value
202
203 - def getSensitivityAnalyzer(self, **kwargs):
204 """Returns an appropriate SensitivityAnalyzer.""" 205 sana = self._KERNELS[self._kernel_type_literal][2] 206 if sana is not None: 207 return sana(self, **kwargs) 208 else: 209 raise NotImplementedError, \ 210 "Sensitivity analyzers for kernel %s is TODO" % \ 211 self._kernel_type_literal
212