1
2
3
4
5
6
7
8
9 """Common to all SVM implementations functionality. For internal use only"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from copy import deepcopy
16
17 from mvpa.misc import warning
18 from mvpa.clfs.base import Classifier
19 from mvpa.misc.param import Parameter
20
21 if __debug__:
22 from mvpa.misc import debug
23
24 -class _SVM(Classifier):
25 """Support Vector Machine Classifier.
26
27 Base class for all external SVM implementations.
28
29 Derived classes should define:
30
31 * _KERNELS: map(dict) should define assignment to a tuple containing
32 implementation kernel type, list of parameters adherent to the
33 kernel, and sensitivity analyzer e.g.::
34
35 _KERNELS = {
36 'linear': (shogun.Kernel.LinearKernel, (), LinearSVMWeights),
37 'rbf' : (shogun.Kernel.GaussianKernel, ('gamma',), None),
38 ...
39 }
40
41 """
42
43 _ATTRIBUTE_COLLECTIONS = ['params', 'kernel_params']
44
45 _SVM_PARAMS = {
46 'C' : Parameter(-1.0, descr='Trade-off parameter. High C -- rigid margin SVM'),
47 'nu' : Parameter(0.5, min=0.0, max=1.0, descr='Fraction of datapoints within the margin'),
48 'cache_size': Parameter(100, descr='Size of the kernel cache, specified in megabytes'),
49 'coef0': Parameter(0.5, descr='Offset coefficient in polynomial and sigmoid kernels'),
50 'degree': Parameter(3, descr='Degree of polynomial kernel'),
51
52 'tube_epsilon': Parameter(0.1, descr='Epsilon in epsilon-insensitive loss function of epsilon-SVM regression (SVR)'),
53 'gamma': Parameter(0, descr='Scaling (width in RBF) within non-linear kernels'),
54 'tau': Parameter(1e-6, descr='TAU parameter of KRR regression in shogun'),
55 'max_shift': Parameter(10.0, min=0.0, descr='Maximal shift for SGs GaussianShiftKernel'),
56 'shift_step': Parameter(1.0, min=0.0, descr='Shift step for SGs GaussianShiftKernel'),
57 'probability': Parameter(0, descr='Flag to signal either probability estimate is obtained within LibSVM'),
58 'shrinking': Parameter(1, descr='Either shrinking is to be conducted'),
59 'weight_label': Parameter([], descr='???'),
60 'weight': Parameter([], descr='???'),
61
62
63
64 'epsilon': Parameter(5e-5,
65 min=1e-10,
66 descr='Tolerance of termination criterium')
67 }
68
69
70 _clf_internals = [ 'svm', 'kernel-based' ]
71
72 - def __init__(self, kernel_type='linear', **kwargs):
73 """Init base class of SVMs. *Not to be publicly used*
74
75 :Parameters:
76 kernel_type : basestr
77 String must be a valid key for cls._KERNELS
78
79 TODO: handling of parameters might migrate to be generic for
80 all classifiers. SVMs are choosen to be testbase for that
81 functionality to see how well it would fit.
82 """
83
84 kernel_type = kernel_type.lower()
85 self._kernel_type_literal = kernel_type
86 if not kernel_type in self._KERNELS:
87 raise ValueError, "Unknown kernel " + kernel_type
88
89
90
91 if self._KERNELS[kernel_type][1] is not None:
92
93 self._KNOWN_KERNEL_PARAMS = \
94 self._KNOWN_KERNEL_PARAMS[:] + list(self._KERNELS[kernel_type][1])
95
96
97 self._clf_internals = self._clf_internals[:]
98 if kernel_type == 'linear':
99 self._clf_internals += [ 'linear', 'has_sensitivity' ]
100 else:
101 self._clf_internals += [ 'non-linear' ]
102
103
104 _args = {}
105 for param in self._KNOWN_KERNEL_PARAMS + self._KNOWN_PARAMS:
106 if param in kwargs:
107 _args[param] = kwargs.pop(param)
108
109 try:
110 Classifier.__init__(self, **kwargs)
111 except TypeError, e:
112 if "__init__() got an unexpected keyword argument " in e.args[0]:
113
114
115 e.args = tuple( [e.args[0] +
116 "\n Given SVM instance knows following parameters: %s" %
117 self._KNOWN_PARAMS +
118 ", and kernel parameters: %s" %
119 self._KNOWN_KERNEL_PARAMS] + list(e.args)[1:])
120 raise e
121
122 for paramfamily, paramset in ( (self._KNOWN_PARAMS, self.params),
123 (self._KNOWN_KERNEL_PARAMS, self.kernel_params)):
124 for paramname in paramfamily:
125 if not (paramname in self._SVM_PARAMS):
126 raise ValueError, "Unknown parameter %s" % paramname + \
127 ". Known SVM params are: %s" % self._SVM_PARAMS.keys()
128 param = deepcopy(self._SVM_PARAMS[paramname])
129 param.name = paramname
130 if paramname in _args:
131 param.value = _args[paramname]
132
133
134 paramset.add(param)
135
136
137 if self.params.isKnown('weight') and self.params.isKnown('weight_label'):
138 if not len(self.weight_label) == len(self.weight):
139 raise ValueError, "Lenghts of 'weight' and 'weight_label' lists" \
140 "must be equal."
141
142 self._kernel_type = self._KERNELS[kernel_type][0]
143 if __debug__:
144 debug("SVM", "Initialized %s with kernel %s:%s" %
145 (id(self), kernel_type, self._kernel_type))
146
147
149 """Definition of the object summary over the object
150 """
151 res = "%s(kernel_type='%s'" % (self.__class__.__name__, self._kernel_type_literal)
152 sep = ", "
153 for col in [self.params, self.kernel_params]:
154 for k in col.names:
155
156 if col[k].isDefault: continue
157 res += "%s%s=%s" % (sep, k, col[k].value)
158
159 for name, invert in ( ('enable', False), ('disable', True) ):
160 states = self.states._getEnabled(nondefault=False, invert=invert)
161 if len(states):
162 res += sep + "%s_states=%s" % (name, str(states))
163
164 res += ")"
165 return res
166
167
169 """Compute default C
170
171 TODO: for non-linear SVMs
172 """
173
174 if self._kernel_type_literal == 'linear':
175 datasetnorm = N.mean(N.sqrt(N.sum(data*data, axis=1)))
176 value = 1.0/(datasetnorm*datasetnorm)
177 if __debug__:
178 debug("SVM", "Default C computed to be %f" % value)
179 else:
180 warning("TODO: Computation of default C is not yet implemented" +
181 " for non-linear SVMs. Assigning 1.0")
182 value = 1.0
183
184 return value
185
186
188 """Compute default Gamma
189
190 TODO: unify bloody libsvm interface so it makes use of this function.
191 Now it is computed within SVMModel.__init__
192 """
193
194 if self.kernel_params.isKnown('gamma'):
195 value = 1.0 / len(dataset.uniquelabels)
196 if __debug__:
197 debug("SVM", "Default Gamma is computed to be %f" % value)
198 else:
199 raise RuntimeError, "Shouldn't ask for default Gamma here"
200
201 return value
202
204 """Returns an appropriate SensitivityAnalyzer."""
205 sana = self._KERNELS[self._kernel_type_literal][2]
206 if sana is not None:
207 return sana(self, **kwargs)
208 else:
209 raise NotImplementedError, \
210 "Sensitivity analyzers for kernel %s is TODO" % \
211 self._kernel_type_literal
212