Package mvpa :: Package clfs :: Module model_selector
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.model_selector

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   Copyright (c) 2008 Emanuele Olivetti <emanuele@relativita.com> 
  6  #   See COPYING file distributed along with the PyMVPA package for the 
  7  #   copyright and license terms. 
  8  # 
  9  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
 10  """Model selction.""" 
 11   
 12  __docformat__ = 'restructuredtext' 
 13   
 14   
 15  import numpy as N 
 16  from mvpa.base import externals 
 17  from mvpa.misc.exceptions import InvalidHyperparameterError 
 18   
 19  externals.exists("scipy", raiseException=True) 
 20  import scipy.linalg as SL 
 21   
 22  # no sense to import this module if openopt is not available 
 23  if externals.exists("openopt", raiseException=True): 
 24      from scikits.openopt import NLP 
 25   
 26  if __debug__: 
 27      from mvpa.base import debug 
 28   
29 -def _openopt_debug():
30 # shut up or make verbose OpenOpt 31 # (-1 = no logs, 0 = small log, 1 = verbose) 32 if __debug__: 33 da = debug.active 34 if 'OPENOPT' in da: 35 return 1 36 elif 'MOD_SEL' in da: 37 return 0 38 return -1
39 40
41 -class ModelSelector(object):
42 """Model selection facility. 43 44 Select a model among multiple models (i.e., a parametric model, 45 parametrized by a set of hyperparamenters). 46 """ 47
48 - def __init__(self, parametric_model, dataset):
49 self.parametric_model = parametric_model 50 self.dataset = dataset 51 self.hyperparameters_best = None 52 self.log_marginal_likelihood_best = None 53 self.problem = None 54 pass
55
56 - def max_log_marginal_likelihood(self, hyp_initial_guess, maxiter=1, 57 optimization_algorithm="scipy_cg", ftol=1.0e-3, fixedHypers=None, 58 use_gradient=False, logscale=False):
59 """ 60 Set up the optimization problem in order to maximize 61 the log_marginal_likelihood. 62 63 :Parameters: 64 65 parametric_model : Classifier 66 the actual parameteric model to be optimized. 67 68 hyp_initial_guess : numpy.ndarray 69 set of hyperparameters' initial values where to start 70 optimization. 71 72 optimization_algorithm : string 73 actual name of the optimization algorithm. See 74 http://scipy.org/scipy/scikits/wiki/NLP 75 for a comprehensive/updated list of available NLP solvers. 76 (Defaults to 'ralg') 77 78 ftol : float 79 threshold for the stopping criterion of the solver, 80 which is mapped in OpenOpt NLP.ftol 81 (Defaults to 1.0e-3) 82 83 fixedHypers : numpy.ndarray (boolean array) 84 boolean vector of the same size of hyp_initial_guess; 85 'False' means that the corresponding hyperparameter must 86 be kept fixed (so not optimized). 87 (Defaults to None, which during means all True) 88 89 NOTE: the maximization of log_marginal_likelihood is a non-linear 90 optimization problem (NLP). This fact is confirmed by Dmitrey, 91 author of OpenOpt. 92 """ 93 self.problem = None 94 self.use_gradient = use_gradient 95 self.logscale = logscale # use log-scale on hyperparameters to enhance numerical stability 96 self.optimization_algorithm = optimization_algorithm 97 self.hyp_initial_guess = N.array(hyp_initial_guess) 98 self.hyp_initial_guess_log = N.log(self.hyp_initial_guess) 99 if fixedHypers is None: 100 fixedHypers = N.zeros(self.hyp_initial_guess.shape[0],dtype=bool) 101 pass 102 self.freeHypers = -fixedHypers 103 if self.logscale: 104 self.hyp_running_guess = self.hyp_initial_guess_log.copy() 105 else: 106 self.hyp_running_guess = self.hyp_initial_guess.copy() 107 pass 108 self.f_last_x = None 109 110 def f(x): 111 """ 112 Wrapper to the log_marginal_likelihood to be 113 maximized. 114 """ 115 # XXX EO: since some OpenOpt NLP solvers does not 116 # implement lower bounds the hyperparameters bounds are 117 # implemented inside PyMVPA: (see dmitrey's post on 118 # [SciPy-user] 20080628). 119 # 120 # XXX EO: OpenOpt does not implement optimization of a 121 # subset of the hyperparameters so it is implemented here. 122 # 123 # XXX EO: OpenOpt does not implement logrithmic scale of 124 # the hyperparameters (to enhance numerical stability), so 125 # it is implemented here. 126 self.f_last_x = x.copy() 127 self.hyp_running_guess[self.freeHypers] = x 128 # REMOVE print "guess:",self.hyp_running_guess,x 129 try: 130 if self.logscale: 131 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess)) 132 else: 133 self.parametric_model.set_hyperparameters(self.hyp_running_guess) 134 pass 135 except InvalidHyperparameterError: 136 if __debug__: debug("MOD_SEL","WARNING: invalid hyperparameters!") 137 return -N.inf 138 try: 139 self.parametric_model.train(self.dataset) 140 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError): 141 # Note that ValueError could be raised when Cholesky gets Inf or Nan. 142 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!") 143 return -N.inf 144 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood() 145 # REMOVE print log_marginal_likelihood 146 return log_marginal_likelihood
147 148 def df(x): 149 """ 150 Proxy to the log_marginal_likelihood first 151 derivative. Necessary for OpenOpt when using derivatives. 152 """ 153 self.hyp_running_guess[self.freeHypers] = x 154 # REMOVE print "df guess:",self.hyp_running_guess,x 155 # XXX EO: Most of the following lines can be skipped if 156 # df() is computed just after f() with the same 157 # hyperparameters. The partial results obtained during f() 158 # are what is needed for df(). For now, in order to avoid 159 # bugs difficult to trace, we keep this redunundancy. A 160 # deep check with how OpenOpt works or using memoization 161 # should solve this issue. 162 try: 163 if self.logscale: 164 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess)) 165 else: 166 self.parametric_model.set_hyperparameters(self.hyp_running_guess) 167 pass 168 except InvalidHyperparameterError: 169 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!") 170 return -N.inf 171 # Check if it is possible to avoid useless computations 172 # already done in f(). According to tests and information 173 # collected from OpenOpt people, it is sufficiently 174 # unexpected that the following test succeed: 175 if N.any(x!=self.f_last_x): 176 if __debug__: debug("MOD_SEL","UNEXPECTED: recomputing train+log_marginal_likelihood.") 177 try: 178 self.parametric_model.train(self.dataset) 179 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError): 180 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!") 181 # XXX EO: which value for the gradient to return to 182 # OpenOpt when hyperparameters are wrong? 183 return N.zeros(x.size) 184 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood() # recompute what's needed (to be safe) REMOVE IN FUTURE! 185 pass 186 if self.logscale: 187 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood_logscale() 188 else: 189 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood() 190 pass 191 # REMOVE print "grad:",gradient_log_marginal_likelihood 192 return gradient_log_marginal_likelihood[self.freeHypers]
193 194 195 if self.logscale: 196 # vector of hyperparameters' values where to start the search 197 x0 = self.hyp_initial_guess_log[self.freeHypers] 198 else: 199 x0 = self.hyp_initial_guess[self.freeHypers] 200 pass 201 self.contol = 1.0e-20 # Constraint tolerance level 202 # XXX EO: is it necessary to use contol when self.logscale is 203 # True and there is no lb? Ask dmitrey. 204 if self.use_gradient: 205 # actual instance of the OpenOpt non-linear problem 206 self.problem = NLP(f, x0, df=df, contol=self.contol, goal='maximum') 207 else: 208 self.problem = NLP(f, x0, contol=self.contol, goal='maximum') 209 pass 210 self.problem.name = "Max LogMargLikelihood" 211 if not self.logscale: 212 # set lower bound for hyperparameters: avoid negative 213 # hyperparameters. Note: problem.n is the size of 214 # hyperparameters' vector 215 self.problem.lb = N.zeros(self.problem.n)+self.contol 216 pass 217 # max number of iterations for the optimizer. 218 self.problem.maxiter = maxiter 219 # check whether the derivative of log_marginal_likelihood converged to 220 # zero before ending optimization 221 self.problem.checkdf = True 222 # set increment of log_marginal_likelihood under which the optimizer stops 223 self.problem.ftol = ftol 224 self.problem.iprint = _openopt_debug() 225 return self.problem 226 227
228 - def solve(self, problem=None):
229 """Solve the maximization problem, check outcome and collect results. 230 """ 231 # XXX: this method can be made more abstract in future in the 232 # sense that it could work not only for 233 # log_marginal_likelihood but other measures as well 234 # (e.g. cross-valideted error). 235 236 if N.all(self.freeHypers==False): # no optimization needed 237 self.hyperparameters_best = self.hyp_initial_guess.copy() 238 try: 239 self.parametric_model.set_hyperparameters(self.hyperparameters_best) 240 except InvalidHyperparameterError: 241 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!") 242 self.log_marginal_likelihood_best = -N.inf 243 return self.log_marginal_likelihood_best 244 self.parametric_model.train(self.dataset) 245 self.log_marginal_likelihood_best = self.parametric_model.compute_log_marginal_likelihood() 246 return self.log_marginal_likelihood_best 247 248 result = self.problem.solve(self.optimization_algorithm) # perform optimization! 249 if result.stopcase == -1: 250 # XXX: should we use debug() for the following messages? 251 # If so, how can we track the missing convergence to a 252 # solution? 253 print "Unable to find a maximum to log_marginal_likelihood" 254 elif result.stopcase == 0: 255 print "Limits exceeded" 256 elif result.stopcase == 1: 257 self.hyperparameters_best = self.hyp_initial_guess.copy() 258 if self.logscale: 259 self.hyperparameters_best[self.freeHypers] = N.exp(result.xf) # best hyperparameters found # NOTE is it better to return a copy? 260 else: 261 self.hyperparameters_best[self.freeHypers] = result.xf 262 pass 263 self.log_marginal_likelihood_best = result.ff # actual best vuale of log_marginal_likelihood 264 pass 265 self.stopcase = result.stopcase 266 return self.log_marginal_likelihood_best
267 268 pass 269 270 271 272 if __name__ == "__main__": 273 274 import gpr 275 import kernel 276 from mvpa.misc import data_generators 277 from mvpa.base import externals 278 N.random.seed(1) 279 280 if externals.exists("pylab", force=True): 281 import pylab 282 pylab.close("all") 283 pylab.ion() 284 285 from mvpa.datasets import Dataset 286 from mvpa.misc import data_generators 287 288 print "GPR:", 289 290 train_size = 40 291 test_size = 100 292 F = 1 293 294 dataset = data_generators.sinModulated(train_size, F) 295 # print label_train 296 297 dataset_test = data_generators.sinModulated(test_size, F, flat=True) 298 data_test = dataset_test.samples 299 label_test = dataset_test.labels 300 # print label_test 301 302 regression = True 303 logml = True 304 305 k = kernel.KernelSquaredExponential() 306 g = gpr.GPR(k,regression=regression) 307 g.states.enable("log_marginal_likelihood") 308 # g.train_fv = dataset.samples 309 # g.train_labels = dataset.labels 310 311 print "GPR hyperparameters' search through maximization of marginal likelihood on train data." 312 print 313 ms = ModelSelector(g,dataset) 314 315 sigma_noise_initial = 1.0e0 # 0.154142346606 316 sigma_f_initial = 1.0e0 # 0.687554871058 317 length_scale_initial = 1.0e0 # 0.263620251025 318 319 problem = ms.max_log_marginal_likelihood(hyp_initial_guess=[sigma_noise_initial,sigma_f_initial,length_scale_initial], optimization_algorithm="ralg", ftol=1.0e-8,fixedHypers=N.array([0,0,0],dtype=bool)) 320 # problem = ms.max_log_marginal_likelihood(hyp_initial_guess=[1.0,1.0], optimization_algorithm="ralg", ftol=1.0e-3) 321 322 lml = ms.solve() 323 # print ms.hyperparameters_best 324 sigma_noise_best, sigma_f_best, length_scale_best = ms.hyperparameters_best 325 print 326 print "Best sigma_noise:",sigma_noise_best 327 print "Best sigma_f:",sigma_f_best 328 print "Best length_scale:",length_scale_best 329 print "Best log_marginal_likelihood:",lml 330 331 # Best sigma_noise: 0.154142346606 332 # Best sigma_f: 0.687554871058 333 # Best length_scale: 0.263620251025 334 # Best log_marginal_likelihood: -3.54790161194 335 336 gpr.compute_prediction(sigma_noise_best,sigma_f_best,length_scale_best,regression,dataset,data_test,label_test,F) 337 338 339 print 340 print "GPR ARD on dataset from Williams and Rasmussen 1996:" 341 dataset = data_generators.wr1996() 342 # dataset.samples = N.hstack([dataset.samples]*10) # enlarge dataset's dimensionality, for testing high dimensions 343 344 # Uncomment the kernel you like: 345 346 # Squared Exponential kernel: 347 k = kernel.KernelSquaredExponential() 348 sigma_noise_initial = 1.0e-0 349 sigma_f_initial = 1.0e-0 350 length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e-0 351 hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial]) 352 # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool) 353 fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool) 354 355 # k = kernel.KernelLinear() 356 # sigma_noise_initial = 1.0e-0 357 # sigma_0_initial = 1.0e-0 358 # Sigma_p_initial = N.ones(dataset.samples.shape[1])*1.0e-3 359 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_0_initial,Sigma_p_initial]) 360 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool) 361 # fixedHypers = N.array([0]*hyp_initial_guess.size,dtype=bool) 362 363 # k = kernel.KernelConstant() 364 # sigma_noise_initial = 1.0e-0 365 # sigma_0_initial = 1.0e-0 366 # hyp_initial_guess = N.array([sigma_noise_initial, sigma_0_initial]) 367 # # fixedHypers = N.array([0,0],dtype=bool) 368 # fixedHypers = N.array([0]*hyp_initial_guess.size,dtype=bool) 369 370 # # Exponential kernel: 371 # k = kernel.KernelExponential() 372 # sigma_noise_initial = 1.0e0 373 # sigma_f_initial = 1.0e0 374 # length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e0 375 # # length_scale_initial = 1.0 376 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial]) 377 # print "hyp_initial_guess:",hyp_initial_guess 378 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool) 379 # fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool) 380 # # expected results (compute with use_gradient=False): 381 # # objFunValue: 161.97952 (feasible, max constraint = 0) 382 # # [ 6.72069299e-04 3.16515151e-01 3.11122154e+00 1.54833211e+00 383 # # 1.94703461e+00 3.11122835e+00 4.37916189e+01 2.65398676e+01] 384 385 386 # # Matern_3_2 kernel: 387 # k = kernel.KernelMatern_3_2() 388 # sigma_noise_initial = 1.0e0 389 # sigma_f_initial = 1.0e0 390 # length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e0 391 # # length_scale_initial = 1.0 392 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial]) 393 # print "hyp_initial_guess:",hyp_initial_guess 394 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool) 395 # fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool) 396 397 398 # # Rational Quadratic kernel: 399 # k = kernel.KernelRationalQuadratic(alpha=0.5) 400 # sigma_noise_initial = 1.0e0 401 # sigma_f_initial = 1.0e0 402 # length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e0 403 # # length_scale_initial = 1.0 404 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial]) 405 # print "hyp_initial_guess:",hyp_initial_guess 406 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool) 407 # fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool) 408 409 410 g = gpr.GPR(k,regression=regression) 411 g.states.enable("log_marginal_likelihood") 412 ms = ModelSelector(g,dataset) 413 414 # Note that some kernels does not have gradient yet! 415 problem = ms.max_log_marginal_likelihood(hyp_initial_guess=hyp_initial_guess, optimization_algorithm="ralg", ftol=1.0e-5,fixedHypers=fixedHypers,use_gradient=True, logscale=True) 416 lml = ms.solve() 417 print ms.hyperparameters_best 418