1
2
3
4
5
6
7
8
9 """Python interface to the SWIG-wrapped libsvm"""
10
11 __docformat__ = 'restructuredtext'
12
13
14 from math import exp, fabs
15 import re, copy
16
17 import numpy as N
18
19 from mvpa.clfs.libsvm import svmc
20 from mvpa.clfs.libsvm.svmc import C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, \
21 NU_SVR, LINEAR, POLY, RBF, SIGMOID, \
22 PRECOMPUTED
23
24 if __debug__:
25 from mvpa.misc import debug
28 size = len(seq)
29 array = svmc.new_int(size)
30 i = 0
31 for item in seq:
32 svmc.int_setitem(array, i, item)
33 i = i + 1
34 return array
35
38 size = len(seq)
39 array = svmc.new_double(size)
40 i = 0
41 for item in seq:
42 svmc.double_setitem(array, i, item)
43 i = i + 1
44 return array
45
48 if x != 'NULL' and x != None:
49 svmc.delete_int(x)
50
53 if x != 'NULL' and x != None:
54 svmc.delete_double(x)
55
58 return map(svmc.int_getitem, [x]*n, range(n))
59
62 return map(svmc.double_getitem, [x]*n, range(n))
63
66 """
67 SVMParameter class safe to be deepcopied.
68 """
69
70 default_parameters = {
71 'svm_type' : C_SVC,
72 'kernel_type' : RBF,
73 'degree' : 3,
74 'gamma' : 0,
75 'coef0' : 0,
76 'nu' : 0.5,
77 'cache_size' : 100,
78 'C' : 1,
79 'eps' : 1e-3,
80 'p' : 0.1,
81 'shrinking' : 1,
82 'nr_weight' : 0,
83 'weight_label' : [],
84 'weight' : [],
85 'probability' : 0
86 }
87
89 """Internal class to to avoid memory leaks returning away svmc's params"""
90
92 self.param = svmc.new_svm_parameter()
93 for attr, val in params.items():
94
95 if attr == 'weight_label':
96
97 val = intArray(val)
98
99
100 elif attr == 'weight':
101
102 val = doubleArray(val)
103
104
105
106 set_func = getattr(svmc, 'svm_parameter_%s_set' % (attr))
107 set_func(self.param, val)
108
110 if __debug__:
111 debug('CLF_', 'Destroying libsvm._SVMCParameter %s' % str(self))
112 freeIntArray(svmc.svm_parameter_weight_label_get(self.param))
113 freeDoubleArray(svmc.svm_parameter_weight_get(self.param))
114 svmc.delete_svm_parameter(self.param)
115
116
118 self._orig_params = kw
119 self.untrain()
120
122 self._params = {}
123 self._params.update(self.default_parameters)
124 self._params.update(**self._orig_params)
125 self.__svmc_params = None
126 self.__svmc_recompute = False
127
130
132 return "SVMParameter: %s" % `self._params`
133
138
143
145 if not self.__svmc_params is None:
146 del self.__svmc_params
147 self.__svmc_params = None
148
149 @property
157
162
164 """Not exactly proper one -- if lists are svmc_recompute, would fail anyways"""
165 self.__svmc_recompute = True
166 self._params[key] = value
167
168 @classmethod
170 for key in cls.default_parameters.keys():
171 exec "%s.%s = property(fget=%s, fset=%s)" % \
172 (cls.__name__, key,
173 "lambda self:self._params['%s']" % key,
174 "lambda self,val:self._setParameter('%s', val)" % key)
175
176
177 SVMParameter._register_properties()
180 """convert a sequence or mapping to an SVMNode array"""
181 import operator
182
183
184 iter_range = []
185 if type(x) == dict:
186 for k, v in x.iteritems():
187
188
189 iter_range.append( k )
190 elif operator.isSequenceType(x):
191 for j in range(len(x)):
192
193 iter_range.append( j )
194 else:
195 raise TypeError, "data must be a mapping or a sequence"
196
197 iter_range.sort()
198 data = svmc.svm_node_array(len(iter_range)+1)
199 svmc.svm_node_array_set(data, len(iter_range), -1, 0)
200
201 j = 0
202 for k in iter_range:
203 svmc.svm_node_array_set(data, j, k, x[k])
204 j = j + 1
205 return data
206
211 assert len(y) == len(x)
212 self.prob = prob = svmc.new_svm_problem()
213 self.size = size = len(y)
214
215 self.y_array = y_array = svmc.new_double(size)
216 for i in range(size):
217 svmc.double_setitem(y_array, i, y[i])
218
219 self.x_matrix = x_matrix = svmc.svm_node_matrix(size)
220 self.data = []
221 self.maxlen = 0
222 for i in range(size):
223 data = convert2SVMNode(x[i])
224 self.data.append(data)
225 svmc.svm_node_matrix_set(x_matrix, i, data)
226 if type(x[i]) == dict:
227 if (len(x[i]) > 0):
228 self.maxlen = max(self.maxlen, max(x[i].keys()))
229 else:
230 self.maxlen = max(self.maxlen, len(x[i]))
231
232 svmc.svm_problem_l_set(prob, size)
233 svmc.svm_problem_y_set(prob, y_array)
234 svmc.svm_problem_x_set(prob, x_matrix)
235
236
238 return "<SVMProblem: size = %s>" % (self.size)
239
240
242 if __debug__:
243 debug('CLF_', 'Destroying libsvm.SVMProblem %s' % `self`)
244
245 svmc.delete_svm_problem(self.prob)
246 svmc.delete_double(self.y_array)
247 for i in range(self.size):
248 svmc.svm_node_array_destroy(self.data[i])
249 svmc.svm_node_matrix_destroy(self.x_matrix)
250
255 if arg2 == None:
256
257 filename = arg1
258 self.model = svmc.svm_load_model(filename)
259 else:
260
261 prob, param = arg1, arg2
262 self.prob = prob
263 if param.gamma == 0:
264 param.gamma = 1.0/prob.maxlen
265 msg = svmc.svm_check_parameter(prob.prob, param.param)
266 if msg:
267 raise ValueError, msg
268 self.model = svmc.svm_train(prob.prob, param.param)
269
270
271 self.nr_class = svmc.svm_get_nr_class(self.model)
272 self.svm_type = svmc.svm_get_svm_type(self.model)
273
274 intarr = svmc.new_int(self.nr_class)
275 svmc.svm_get_labels(self.model, intarr)
276 self.labels = intArray2List(intarr, self.nr_class)
277 svmc.delete_int(intarr)
278
279 self.probability = svmc.svm_check_probability_model(self.model)
280
281
283 """
284 Print string representation of the model or easier comprehension
285 and some statistics
286 """
287 ret = '<SVMModel:'
288 try:
289 ret += ' type = %s, ' % `self.svm_type`
290 ret += ' number of classes = %d (%s), ' \
291 % ( self.nr_class, `self.labels` )
292 except:
293 pass
294 return ret+'>'
295
296
298 data = convert2SVMNode(x)
299 ret = svmc.svm_predict(self.model, data)
300 svmc.svm_node_array_destroy(data)
301 return ret
302
303
306
307
309 if self.svm_type == NU_SVR \
310 or self.svm_type == EPSILON_SVR \
311 or self.svm_type == ONE_CLASS:
312 raise TypeError, "Unable to get label from a SVR/ONE_CLASS model"
313 return self.labels
314
315
316
317
318
319
320
322
323 n = self.nr_class*(self.nr_class-1)//2
324 data = convert2SVMNode(x)
325 dblarr = svmc.new_double(n)
326 svmc.svm_predict_values(self.model, data, dblarr)
327 ret = doubleArray2List(dblarr, n)
328 svmc.delete_double(dblarr)
329 svmc.svm_node_array_destroy(data)
330 return ret
331
332
334 v = self.predictValuesRaw(x)
335 if self.svm_type == NU_SVR \
336 or self.svm_type == EPSILON_SVR \
337 or self.svm_type == ONE_CLASS:
338 return v[0]
339 else:
340 count = 0
341 d = {}
342 for i in range(len(self.labels)):
343 for j in range(i+1, len(self.labels)):
344 d[self.labels[i], self.labels[j]] = v[count]
345 d[self.labels[j], self.labels[i]] = -v[count]
346 count += 1
347 return d
348
349
351
352 if self.svm_type == NU_SVR or self.svm_type == EPSILON_SVR:
353 raise TypeError, "call get_svr_probability or get_svr_pdf " \
354 "for probability output of regression"
355 elif self.svm_type == ONE_CLASS:
356 raise TypeError, "probability not supported yet for one-class " \
357 "problem"
358
359 if not self.probability:
360 raise TypeError, "model does not support probabiliy estimates"
361
362
363 data = convert2SVMNode(x)
364 dblarr = svmc.new_double(self.nr_class)
365 pred = svmc.svm_predict_probability(self.model, data, dblarr)
366 pv = doubleArray2List(dblarr, self.nr_class)
367 svmc.delete_double(dblarr)
368 svmc.svm_node_array_destroy(data)
369 p = {}
370 for i in range(len(self.labels)):
371 p[self.labels[i]] = pv[i]
372 return pred, p
373
374
376
377 ret = svmc.svm_get_svr_probability(self.model)
378 if ret == 0:
379 raise TypeError, "not a regression model or probability " \
380 "information not available"
381 return ret
382
383
385
386 sigma = self.getSVRProbability()
387 return lambda z: exp(-fabs(z)/sigma)/(2*sigma)
388
389
390 - def save(self, filename):
391 svmc.svm_save_model(filename, self.model)
392
393
395 if __debug__:
396 debug('CLF_', 'Destroying libsvm.SVMModel %s' % (`self`))
397
398 try:
399 svmc.svm_destroy_model(self.model)
400 except:
401
402
403 pass
404
405
407 return svmc.svm_model_l_get(self.model)
408
409
411 """Returns a list with the number of support vectors per class.
412 """
413 return [ svmc.int_getitem(svmc.svm_model_nSV_get( self.model ), i)
414 for i in range( self.nr_class ) ]
415
416
418 """Returns an array with the all support vectors.
419
420 array( nSV x <nFeatures>)
421 """
422 return svmc.svm_node_matrix2numpy_array(
423 svmc.svm_model_SV_get(self.model),
424 self.getTotalNSV(),
425 self.prob.maxlen)
426
427
429 """Return coefficients for SVs... Needs to be used directly with caution!
430
431 Summary on what is happening in libsvm internals with sv_coef
432
433 svm_model's sv_coef (especially) are "cleverly" packed into a matrix
434 nr_class - 1 x #SVs_total which stores
435 coefficients for
436 nr_class x (nr_class-1) / 2
437 binary classifiers' SV coefficients.
438
439 For classifier i-vs-j
440 General packing rule can be described as:
441
442 i-th row contains sv_coefficients for SVs of class i it took
443 in all i-vs-j or j-vs-i classifiers.
444
445 Another useful excerpt from svm.cpp is
446
447 // classifier (i,j): coefficients with
448 // i are in sv_coef[j-1][nz_start[i]...],
449 // j are in sv_coef[i][nz_start[j]...]
450
451 It can also be described as j-th column lists coefficients for SV # j which
452 belongs to some class C, which it took (if it was an SV, ie != 0)
453 in classifiers i vs C (iff i<C), or C vs i+1 (iff i>C)
454
455 This way no byte of storage is wasted but imho such setup is quite convolved
456 """
457 return svmc.doubleppcarray2numpy_array(
458 svmc.svm_model_sv_coef_get(self.model),
459 self.nr_class - 1,
460 self.getTotalNSV())
461
462
464 """Return constant(s) in decision function(s) (if multi-class)"""
465 return doubleArray2List(svmc.svm_model_rho_get(self.model),
466 self.nr_class * (self.nr_class-1)/2)
467