Package mvpa :: Package clfs :: Module _svmbase
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs._svmbase

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Common to all SVM implementations functionality. For internal use only""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from mvpa.misc.copy import deepcopy 
 16   
 17  from mvpa.base import warning 
 18  from mvpa.clfs.base import Classifier 
 19  from mvpa.misc.param import Parameter 
 20  from mvpa.misc.transformers import SecondAxisSumOfAbs 
 21   
 22  if __debug__: 
 23      from mvpa.base import debug 
 24   
25 -class _SVM(Classifier):
26 """Support Vector Machine Classifier. 27 28 Base class for all external SVM implementations. 29 """ 30 31 """ 32 Derived classes should define: 33 34 * _KERNELS: map(dict) should define assignment to a tuple containing 35 implementation kernel type, list of parameters adherent to the 36 kernel, and sensitivity analyzer e.g.:: 37 38 _KERNELS = { 39 'linear': (shogun.Kernel.LinearKernel, (), LinearSVMWeights), 40 'rbf' : (shogun.Kernel.GaussianKernel, ('gamma',), None), 41 ... 42 } 43 44 * _KNOWN_IMPLEMENTATIONS: map(dict) should define assignment to a 45 tuple containing implementation of the SVM, list of parameters 46 adherent to the implementation, additional internals, and 47 description e.g.:: 48 49 _KNOWN_IMPLEMENTATIONS = { 50 'C_SVC' : (svm.svmc.C_SVC, ('C',), 51 ('binary', 'multiclass'), 'C-SVM classification'), 52 ... 53 } 54 55 """ 56 57 _ATTRIBUTE_COLLECTIONS = ['params', 'kernel_params'] # enforce presence of params collections 58 59 _SVM_PARAMS = { 60 'C' : Parameter(-1.0, descr='Trade-off parameter. High C -- rigid margin SVM'), 61 'nu' : Parameter(0.5, min=0.0, max=1.0, descr='Fraction of datapoints within the margin'), 62 'cache_size': Parameter(100, descr='Size of the kernel cache, specified in megabytes'), 63 'coef0': Parameter(0.5, descr='Offset coefficient in polynomial and sigmoid kernels'), 64 'degree': Parameter(3, descr='Degree of polynomial kernel'), 65 # init the parameter interface 66 'tube_epsilon': Parameter(0.01, descr='Epsilon in epsilon-insensitive loss function of epsilon-SVM regression (SVR)'), 67 'gamma': Parameter(0, descr='Scaling (width in RBF) within non-linear kernels'), 68 'tau': Parameter(1e-6, descr='TAU parameter of KRR regression in shogun'), 69 'max_shift': Parameter(10, min=0.0, descr='Maximal shift for SGs GaussianShiftKernel'), 70 'shift_step': Parameter(1, min=0.0, descr='Shift step for SGs GaussianShiftKernel'), 71 'probability': Parameter(0, descr='Flag to signal either probability estimate is obtained within LibSVM'), 72 'scale': Parameter(1.0, descr='Scale factor for linear kernel. (0 triggers automagic rescaling by SG'), 73 'shrinking': Parameter(1, descr='Either shrinking is to be conducted'), 74 'weight_label': Parameter([], allowedtype='[int]', descr='???'), 75 'weight': Parameter([], allowedtype='[double]', descr='Custom weights per label'), 76 # For some reason setting up epsilong to 1e-5 slowed things down a bit 77 # in comparison to how it was before (in yoh/master) by up to 20%... not clear why 78 # may be related to 1e-3 default within _svm.py? 79 'epsilon': Parameter(5e-5, 80 min=1e-10, 81 descr='Tolerance of termination criterium') 82 } 83 84 85 _clf_internals = [ 'svm', 'kernel-based' ] 86
87 - def __init__(self, kernel_type='linear', **kwargs):
88 """Init base class of SVMs. *Not to be publicly used* 89 90 :Parameters: 91 kernel_type : basestr 92 String must be a valid key for cls._KERNELS 93 94 TODO: handling of parameters might migrate to be generic for 95 all classifiers. SVMs are choosen to be testbase for that 96 functionality to see how well it would fit. 97 """ 98 99 # Check if requested implementation is known 100 svm_impl = kwargs.get('svm_impl', None) 101 if not svm_impl in self._KNOWN_IMPLEMENTATIONS: 102 raise ValueError, \ 103 "Unknown SVM implementation '%s' is requested for %s." \ 104 "Known are: %s" % (svm_impl, self.__class__, 105 self._KNOWN_IMPLEMENTATIONS.keys()) 106 self._svm_impl = svm_impl 107 108 # Check the kernel 109 kernel_type = kernel_type.lower() 110 if not kernel_type in self._KERNELS: 111 raise ValueError, "Unknown kernel " + kernel_type 112 self._kernel_type_literal = kernel_type 113 114 impl, add_params, add_internals, descr = \ 115 self._KNOWN_IMPLEMENTATIONS[svm_impl] 116 117 # Add corresponding parameters to 'known' depending on the 118 # implementation chosen 119 if add_params is not None: 120 self._KNOWN_PARAMS = \ 121 self._KNOWN_PARAMS[:] + list(add_params) 122 123 # Add corresponding kernel parameters to 'known' depending on what 124 # kernel chosen 125 if self._KERNELS[kernel_type][1] is not None: 126 self._KNOWN_KERNEL_PARAMS = \ 127 self._KNOWN_KERNEL_PARAMS[:] + list(self._KERNELS[kernel_type][1]) 128 129 # Assign per-instance _clf_internals 130 self._clf_internals = self._clf_internals[:] 131 132 # Add corresponding internals 133 if add_internals is not None: 134 self._clf_internals += list(add_internals) 135 self._clf_internals.append(svm_impl) 136 137 if kernel_type == 'linear': 138 self._clf_internals += [ 'linear', 'has_sensitivity' ] 139 else: 140 self._clf_internals += [ 'non-linear' ] 141 142 # pop out all args from **kwargs which are known to be SVM parameters 143 _args = {} 144 for param in self._KNOWN_KERNEL_PARAMS + self._KNOWN_PARAMS + ['svm_impl']: 145 if param in kwargs: 146 _args[param] = kwargs.pop(param) 147 148 try: 149 Classifier.__init__(self, **kwargs) 150 except TypeError, e: 151 if "__init__() got an unexpected keyword argument " in e.args[0]: 152 # TODO: make it even more specific -- if that argument is listed 153 # within _SVM_PARAMS 154 e.args = tuple( [e.args[0] + 155 "\n Given SVM instance of class %s knows following parameters: %s" % 156 (self.__class__, self._KNOWN_PARAMS) + 157 ", and kernel parameters: %s" % 158 self._KNOWN_KERNEL_PARAMS] + list(e.args)[1:]) 159 raise e 160 161 # populate collections and add values from arguments 162 for paramfamily, paramset in ( (self._KNOWN_PARAMS, self.params), 163 (self._KNOWN_KERNEL_PARAMS, self.kernel_params)): 164 for paramname in paramfamily: 165 if not (paramname in self._SVM_PARAMS): 166 raise ValueError, "Unknown parameter %s" % paramname + \ 167 ". Known SVM params are: %s" % self._SVM_PARAMS.keys() 168 param = deepcopy(self._SVM_PARAMS[paramname]) 169 param.name = paramname 170 if paramname in _args: 171 param.value = _args[paramname] 172 # XXX might want to set default to it -- not just value 173 174 paramset.add(param) 175 176 # tune up C if it has one and non-linear classifier is used 177 if self.params.isKnown('C') and kernel_type != "linear" \ 178 and self.params['C'].isDefault: 179 if __debug__: 180 debug("SVM_", "Assigning default C value to be 1.0 for SVM " 181 "%s with non-linear kernel" % self) 182 self.params['C'].default = 1.0 183 184 # Some postchecks 185 if self.params.isKnown('weight') and self.params.isKnown('weight_label'): 186 if not len(self.weight_label) == len(self.weight): 187 raise ValueError, "Lenghts of 'weight' and 'weight_label' lists" \ 188 "must be equal." 189 190 self._kernel_type = self._KERNELS[kernel_type][0] 191 if __debug__: 192 debug("SVM", "Initialized %s with kernel %s:%s" % 193 (self, kernel_type, self._kernel_type))
194 195
196 - def __repr__(self):
197 """Definition of the object summary over the object 198 """ 199 res = "%s(kernel_type='%s', svm_impl='%s'" % \ 200 (self.__class__.__name__, self._kernel_type_literal, 201 self._svm_impl) 202 sep = ", " 203 for col in [self.params, self.kernel_params]: 204 for k in col.names: 205 # list only params with not default values 206 if col[k].isDefault: continue 207 res += "%s%s=%s" % (sep, k, col[k].value) 208 #sep = ', ' 209 for name, invert in ( ('enable', False), ('disable', True) ): 210 states = self.states._getEnabled(nondefault=False, invert=invert) 211 if len(states): 212 res += sep + "%s_states=%s" % (name, str(states)) 213 214 res += ")" 215 return res
216 217
218 - def _getDefaultC(self, data):
219 """Compute default C 220 221 TODO: for non-linear SVMs 222 """ 223 224 if self._kernel_type_literal == 'linear': 225 datasetnorm = N.mean(N.sqrt(N.sum(data*data, axis=1))) 226 value = 1.0/(datasetnorm*datasetnorm) 227 if __debug__: 228 debug("SVM", "Default C computed to be %f" % value) 229 else: 230 warning("TODO: Computation of default C is not yet implemented" + 231 " for non-linear SVMs. Assigning 1.0") 232 value = 1.0 233 234 return value
235 236
237 - def _getDefaultGamma(self, dataset):
238 """Compute default Gamma 239 240 TODO: unify bloody libsvm interface so it makes use of this function. 241 Now it is computed within SVMModel.__init__ 242 """ 243 244 if self.kernel_params.isKnown('gamma'): 245 value = 1.0 / len(dataset.uniquelabels) 246 if __debug__: 247 debug("SVM", "Default Gamma is computed to be %f" % value) 248 else: 249 raise RuntimeError, "Shouldn't ask for default Gamma here" 250 251 return value
252
253 - def getSensitivityAnalyzer(self, **kwargs):
254 """Returns an appropriate SensitivityAnalyzer.""" 255 sana = self._KERNELS[self._kernel_type_literal][2] 256 if sana is not None: 257 kwargs.setdefault('combiner', SecondAxisSumOfAbs) 258 return sana(self, **kwargs) 259 else: 260 raise NotImplementedError, \ 261 "Sensitivity analyzers for kernel %s is TODO" % \ 262 self._kernel_type_literal
263