Package mvpa :: Package clfs :: Package libsvm :: Module _svm
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.libsvm._svm

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Python interface to the SWIG-wrapped libsvm""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13   
 14  from math import exp, fabs 
 15  import re, copy 
 16   
 17  import numpy as N 
 18   
 19  from mvpa.clfs.libsvm import svmc 
 20  from mvpa.clfs.libsvm.svmc import C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, \ 
 21                                    NU_SVR, LINEAR, POLY, RBF, SIGMOID, \ 
 22                                    PRECOMPUTED 
 23   
 24  if __debug__: 
 25      from mvpa.misc import debug 
26 27 -def intArray(seq):
28 size = len(seq) 29 array = svmc.new_int(size) 30 i = 0 31 for item in seq: 32 svmc.int_setitem(array, i, item) 33 i = i + 1 34 return array
35
36 37 -def doubleArray(seq):
38 size = len(seq) 39 array = svmc.new_double(size) 40 i = 0 41 for item in seq: 42 svmc.double_setitem(array, i, item) 43 i = i + 1 44 return array
45
46 47 -def freeIntArray(x):
48 if x != 'NULL' and x != None: 49 svmc.delete_int(x)
50
51 52 -def freeDoubleArray(x):
53 if x != 'NULL' and x != None: 54 svmc.delete_double(x)
55
56 57 -def intArray2List(x, n):
58 return map(svmc.int_getitem, [x]*n, range(n))
59
60 61 -def doubleArray2List(x, n):
62 return map(svmc.double_getitem, [x]*n, range(n))
63
64 65 -class SVMParameter(object):
66 """ 67 SVMParameter class safe to be deepcopied. 68 """ 69 # default values 70 default_parameters = { 71 'svm_type' : C_SVC, 72 'kernel_type' : RBF, 73 'degree' : 3, 74 'gamma' : 0, # 1/k 75 'coef0' : 0, 76 'nu' : 0.5, 77 'cache_size' : 100, 78 'C' : 1, 79 'eps' : 1e-3, 80 'p' : 0.1, 81 'shrinking' : 1, 82 'nr_weight' : 0, 83 'weight_label' : [], 84 'weight' : [], 85 'probability' : 0 86 } 87
88 - class _SVMCParameter(object):
89 """Internal class to to avoid memory leaks returning away svmc's params""" 90
91 - def __init__(self, params):
92 self.param = svmc.new_svm_parameter() 93 for attr, val in params.items(): 94 # adjust val if necessary 95 if attr == 'weight_label': 96 #self.__weight_label_len = len(val) 97 val = intArray(val) 98 # no need? 99 #freeIntArray(self.weight_label) 100 elif attr == 'weight': 101 #self.__weight_len = len(val) 102 val = doubleArray(val) 103 # no need? 104 # freeDoubleArray(self.weight) 105 # set the parameter through corresponding call 106 set_func = getattr(svmc, 'svm_parameter_%s_set' % (attr)) 107 set_func(self.param, val)
108
109 - def __del__(self):
110 if __debug__: 111 debug('CLF_', 'Destroying libsvm._SVMCParameter %s' % str(self)) 112 freeIntArray(svmc.svm_parameter_weight_label_get(self.param)) 113 freeDoubleArray(svmc.svm_parameter_weight_get(self.param)) 114 svmc.delete_svm_parameter(self.param)
115 116
117 - def __init__(self, **kw):
118 self._orig_params = kw 119 self.untrain()
120
121 - def untrain(self):
122 self._params = {} 123 self._params.update(self.default_parameters) # kinda copy.copy ;-) 124 self._params.update(**self._orig_params) # update with new values 125 self.__svmc_params = None # none is computed 126 self.__svmc_recompute = False # thus none to recompute
127
128 - def __repr__(self):
129 return self._params
130
131 - def __str__(self):
132 return "SVMParameter: %s" % `self._params`
133
134 - def __copy__(self):
135 out = SVMParameter() 136 out._params = copy.copy(self._params) 137 return out
138
139 - def __deepcopy__(self, memo):
140 out = SVMParameter() 141 out._params = copy.deepcopy(self._params) 142 return out
143
144 - def _clear_svmc_params(self):
145 if not self.__svmc_params is None: 146 del self.__svmc_params 147 self.__svmc_params = None
148 149 @property
150 - def param(self):
151 if self.__svmc_recompute: 152 self._clear_svmc_params() 153 if self.__svmc_params is None: 154 self.__svmc_params = SVMParameter._SVMCParameter(self._params) 155 self.__svmc_recompute = False 156 return self.__svmc_params.param
157
158 - def __del__(self):
159 if __debug__: 160 debug('CLF_', 'Destroying libsvm.SVMParameter %s' % str(self)) 161 self._clear_svmc_params()
162
163 - def _setParameter(self, key, value):
164 """Not exactly proper one -- if lists are svmc_recompute, would fail anyways""" 165 self.__svmc_recompute = True 166 self._params[key] = value
167 168 @classmethod
169 - def _register_properties(cls):
170 for key in cls.default_parameters.keys(): 171 exec "%s.%s = property(fget=%s, fset=%s)" % \ 172 (cls.__name__, key, 173 "lambda self:self._params['%s']" % key, 174 "lambda self,val:self._setParameter('%s', val)" % key)
175 176 177 SVMParameter._register_properties()
178 179 -def convert2SVMNode(x):
180 """convert a sequence or mapping to an SVMNode array""" 181 import operator 182 183 # Find non zero elements 184 iter_range = [] 185 if type(x) == dict: 186 for k, v in x.iteritems(): 187 # all zeros kept due to the precomputed kernel; no good solution yet 188 # if v != 0: 189 iter_range.append( k ) 190 elif operator.isSequenceType(x): 191 for j in range(len(x)): 192 # if x[j] != 0: 193 iter_range.append( j ) 194 else: 195 raise TypeError, "data must be a mapping or a sequence" 196 197 iter_range.sort() 198 data = svmc.svm_node_array(len(iter_range)+1) 199 svmc.svm_node_array_set(data, len(iter_range), -1, 0) 200 201 j = 0 202 for k in iter_range: 203 svmc.svm_node_array_set(data, j, k, x[k]) 204 j = j + 1 205 return data
206
207 208 209 -class SVMProblem:
210 - def __init__(self, y, x):
211 assert len(y) == len(x) 212 self.prob = prob = svmc.new_svm_problem() 213 self.size = size = len(y) 214 215 self.y_array = y_array = svmc.new_double(size) 216 for i in range(size): 217 svmc.double_setitem(y_array, i, y[i]) 218 219 self.x_matrix = x_matrix = svmc.svm_node_matrix(size) 220 self.data = [] 221 self.maxlen = 0 222 for i in range(size): 223 data = convert2SVMNode(x[i]) 224 self.data.append(data) 225 svmc.svm_node_matrix_set(x_matrix, i, data) 226 if type(x[i]) == dict: 227 if (len(x[i]) > 0): 228 self.maxlen = max(self.maxlen, max(x[i].keys())) 229 else: 230 self.maxlen = max(self.maxlen, len(x[i])) 231 232 svmc.svm_problem_l_set(prob, size) 233 svmc.svm_problem_y_set(prob, y_array) 234 svmc.svm_problem_x_set(prob, x_matrix)
235 236
237 - def __repr__(self):
238 return "<SVMProblem: size = %s>" % (self.size)
239 240
241 - def __del__(self):
242 if __debug__: 243 debug('CLF_', 'Destroying libsvm.SVMProblem %s' % `self`) 244 245 svmc.delete_svm_problem(self.prob) 246 svmc.delete_double(self.y_array) 247 for i in range(self.size): 248 svmc.svm_node_array_destroy(self.data[i]) 249 svmc.svm_node_matrix_destroy(self.x_matrix)
250
251 252 253 -class SVMModel:
254 - def __init__(self, arg1, arg2=None):
255 if arg2 == None: 256 # create model from file 257 filename = arg1 258 self.model = svmc.svm_load_model(filename) 259 else: 260 # create model from problem and parameter 261 prob, param = arg1, arg2 262 self.prob = prob 263 if param.gamma == 0: 264 param.gamma = 1.0/prob.maxlen 265 msg = svmc.svm_check_parameter(prob.prob, param.param) 266 if msg: 267 raise ValueError, msg 268 self.model = svmc.svm_train(prob.prob, param.param) 269 270 #setup some classwide variables 271 self.nr_class = svmc.svm_get_nr_class(self.model) 272 self.svm_type = svmc.svm_get_svm_type(self.model) 273 #create labels(classes) 274 intarr = svmc.new_int(self.nr_class) 275 svmc.svm_get_labels(self.model, intarr) 276 self.labels = intArray2List(intarr, self.nr_class) 277 svmc.delete_int(intarr) 278 #check if valid probability model 279 self.probability = svmc.svm_check_probability_model(self.model)
280 281
282 - def __repr__(self):
283 """ 284 Print string representation of the model or easier comprehension 285 and some statistics 286 """ 287 ret = '<SVMModel:' 288 try: 289 ret += ' type = %s, ' % `self.svm_type` 290 ret += ' number of classes = %d (%s), ' \ 291 % ( self.nr_class, `self.labels` ) 292 except: 293 pass 294 return ret+'>'
295 296
297 - def predict(self, x):
298 data = convert2SVMNode(x) 299 ret = svmc.svm_predict(self.model, data) 300 svmc.svm_node_array_destroy(data) 301 return ret
302 303
304 - def getNRClass(self):
305 return self.nr_class
306 307
308 - def getLabels(self):
309 if self.svm_type == NU_SVR \ 310 or self.svm_type == EPSILON_SVR \ 311 or self.svm_type == ONE_CLASS: 312 raise TypeError, "Unable to get label from a SVR/ONE_CLASS model" 313 return self.labels
314 315 316 #def getParam(self): 317 # return SVMParameter( 318 # svmc_parameter=svmc.svm_model_param_get(self.model)) 319 320
321 - def predictValuesRaw(self, x):
322 #convert x into SVMNode, allocate a double array for return 323 n = self.nr_class*(self.nr_class-1)//2 324 data = convert2SVMNode(x) 325 dblarr = svmc.new_double(n) 326 svmc.svm_predict_values(self.model, data, dblarr) 327 ret = doubleArray2List(dblarr, n) 328 svmc.delete_double(dblarr) 329 svmc.svm_node_array_destroy(data) 330 return ret
331 332
333 - def predictValues(self, x):
334 v = self.predictValuesRaw(x) 335 if self.svm_type == NU_SVR \ 336 or self.svm_type == EPSILON_SVR \ 337 or self.svm_type == ONE_CLASS: 338 return v[0] 339 else: #self.svm_type == C_SVC or self.svm_type == NU_SVC 340 count = 0 341 d = {} 342 for i in range(len(self.labels)): 343 for j in range(i+1, len(self.labels)): 344 d[self.labels[i], self.labels[j]] = v[count] 345 d[self.labels[j], self.labels[i]] = -v[count] 346 count += 1 347 return d
348 349
350 - def predictProbability(self, x):
351 #c code will do nothing on wrong type, so we have to check ourself 352 if self.svm_type == NU_SVR or self.svm_type == EPSILON_SVR: 353 raise TypeError, "call get_svr_probability or get_svr_pdf " \ 354 "for probability output of regression" 355 elif self.svm_type == ONE_CLASS: 356 raise TypeError, "probability not supported yet for one-class " \ 357 "problem" 358 #only C_SVC, NU_SVC goes in 359 if not self.probability: 360 raise TypeError, "model does not support probabiliy estimates" 361 362 #convert x into SVMNode, alloc a double array to receive probabilities 363 data = convert2SVMNode(x) 364 dblarr = svmc.new_double(self.nr_class) 365 pred = svmc.svm_predict_probability(self.model, data, dblarr) 366 pv = doubleArray2List(dblarr, self.nr_class) 367 svmc.delete_double(dblarr) 368 svmc.svm_node_array_destroy(data) 369 p = {} 370 for i in range(len(self.labels)): 371 p[self.labels[i]] = pv[i] 372 return pred, p
373 374
375 - def getSVRProbability(self):
376 #leave the Error checking to svm.cpp code 377 ret = svmc.svm_get_svr_probability(self.model) 378 if ret == 0: 379 raise TypeError, "not a regression model or probability " \ 380 "information not available" 381 return ret
382 383
384 - def getSVRPdf(self):
385 #get_svr_probability will handle error checking 386 sigma = self.getSVRProbability() 387 return lambda z: exp(-fabs(z)/sigma)/(2*sigma)
388 389
390 - def save(self, filename):
391 svmc.svm_save_model(filename, self.model)
392 393
394 - def __del__(self):
395 if __debug__: 396 debug('CLF_', 'Destroying libsvm.SVMModel %s' % (`self`)) 397 398 try: 399 svmc.svm_destroy_model(self.model) 400 except: 401 # blind way to overcome problem of already deleted model and 402 # "SVMModel instance has no attribute 'model'" in ignored 403 pass
404 405
406 - def getTotalNSV(self):
407 return svmc.svm_model_l_get(self.model)
408 409
410 - def getNSV(self):
411 """Returns a list with the number of support vectors per class. 412 """ 413 return [ svmc.int_getitem(svmc.svm_model_nSV_get( self.model ), i) 414 for i in range( self.nr_class ) ]
415 416
417 - def getSV(self):
418 """Returns an array with the all support vectors. 419 420 array( nSV x <nFeatures>) 421 """ 422 return svmc.svm_node_matrix2numpy_array( 423 svmc.svm_model_SV_get(self.model), 424 self.getTotalNSV(), 425 self.prob.maxlen)
426 427
428 - def getSVCoef(self):
429 """Return coefficients for SVs... Needs to be used directly with caution! 430 431 Summary on what is happening in libsvm internals with sv_coef 432 433 svm_model's sv_coef (especially) are "cleverly" packed into a matrix 434 nr_class - 1 x #SVs_total which stores 435 coefficients for 436 nr_class x (nr_class-1) / 2 437 binary classifiers' SV coefficients. 438 439 For classifier i-vs-j 440 General packing rule can be described as: 441 442 i-th row contains sv_coefficients for SVs of class i it took 443 in all i-vs-j or j-vs-i classifiers. 444 445 Another useful excerpt from svm.cpp is 446 447 // classifier (i,j): coefficients with 448 // i are in sv_coef[j-1][nz_start[i]...], 449 // j are in sv_coef[i][nz_start[j]...] 450 451 It can also be described as j-th column lists coefficients for SV # j which 452 belongs to some class C, which it took (if it was an SV, ie != 0) 453 in classifiers i vs C (iff i<C), or C vs i+1 (iff i>C) 454 455 This way no byte of storage is wasted but imho such setup is quite convolved 456 """ 457 return svmc.doubleppcarray2numpy_array( 458 svmc.svm_model_sv_coef_get(self.model), 459 self.nr_class - 1, 460 self.getTotalNSV())
461 462
463 - def getRho(self):
464 """Return constant(s) in decision function(s) (if multi-class)""" 465 return doubleArray2List(svmc.svm_model_rho_get(self.model), 466 self.nr_class * (self.nr_class-1)/2)
467