1
2
3
4
5
6
7
8
9
10 """Gaussian Process Regression (GPR)."""
11
12 __docformat__ = 'restructuredtext'
13
14
15 import numpy as N
16
17 from mvpa.base import externals
18
19 from mvpa.misc.state import StateVariable
20 from mvpa.clfs.base import Classifier
21 from mvpa.misc.param import Parameter
22 from mvpa.clfs.kernel import KernelSquaredExponential, KernelLinear
23 from mvpa.measures.base import Sensitivity
24 from mvpa.misc.exceptions import InvalidHyperparameterError
25 from mvpa.datasets import Dataset
26
27 externals.exists("scipy", raiseException=True)
28 from scipy.linalg import cho_factor as SLcho_factor
29 from scipy.linalg import cho_solve as SLcho_solve
30 from scipy.linalg import cholesky as SLcholesky
31 import scipy.linalg as SL
32
33 if __debug__:
34 from mvpa.base import debug
35
36
37 Nlog = N.log
38 Ndot = N.dot
39 Ndiag = N.diag
40 NLAcholesky = N.linalg.cholesky
41 NLAsolve = N.linalg.solve
42 NLAError = N.linalg.linalg.LinAlgError
43 SLAError = SL.basic.LinAlgError
44
45
46 _halflog2pi = 0.5 * Nlog(2 * N.pi)
47
48
49 -class GPR(Classifier):
50 """Gaussian Process Regression (GPR).
51
52 """
53
54 predicted_variances = StateVariable(enabled=False,
55 doc="Variance per each predicted value")
56
57 log_marginal_likelihood = StateVariable(enabled=False,
58 doc="Log Marginal Likelihood")
59
60 log_marginal_likelihood_gradient = StateVariable(enabled=False,
61 doc="Log Marginal Likelihood Gradient")
62
63 _clf_internals = [ 'gpr', 'regression', 'retrainable' ]
64
65
66
67
68
69
70
71
72
73
74
75
76
77 sigma_noise = Parameter(0.001, allowedtype='float', min=1e-10,
78 doc="the standard deviation of the gaussian noise.")
79
80
81
82
83
84
85
86 - def __init__(self, kernel=None, **kwargs):
122
123
125 """Reset some internal variables to None.
126
127 To be used in constructor and untrain()
128 """
129 self._train_fv = None
130 self._labels = None
131 self._km_train_train = None
132 self._train_labels = None
133 self._alpha = None
134 self._L = None
135 self._LL = None
136 self.__kernel.reset()
137 pass
138
139
141 """String summary of the object
142 """
143 return super(GPR, self).__repr__(
144 prefixes=['kernel=%s' % self.__kernel])
145
146
148 """
149 Compute log marginal likelihood using self.train_fv and self.labels.
150 """
151 if __debug__: debug("GPR", "Computing log_marginal_likelihood")
152 self.log_marginal_likelihood = \
153 -0.5*Ndot(self._train_labels, self._alpha) - \
154 Nlog(self._L.diagonal()).sum() - \
155 self._km_train_train.shape[0] * _halflog2pi
156 return self.log_marginal_likelihood
157
158
160 """Compute gradient of the log marginal likelihood. This
161 version use a more compact formula provided by Williams and
162 Rasmussen book.
163 """
164
165
166
167
168
169
170
171
172
173
174
175
176 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0]))
177
178 alphalphaT = N.dot(self._alpha[:,None],self._alpha[None,:])
179 tmp = alphalphaT - Kinv
180
181
182 grad_LML_hypers = self.__kernel.compute_lml_gradient(tmp,self._train_fv)
183 grad_K_sigma_n = 2.0*self.sigma_noise*N.eye(tmp.shape[0])
184
185
186
187 grad_LML_sigma_n = 0.5 * (tmp * (grad_K_sigma_n).T).sum()
188 lml_gradient = N.hstack([grad_LML_sigma_n, grad_LML_hypers])
189 self.log_marginal_likelihood_gradient = lml_gradient
190 return lml_gradient
191
192
194 """Compute gradient of the log marginal likelihood when
195 hyperparameters are in logscale. This version use a more
196 compact formula provided by Williams and Rasmussen book.
197 """
198
199
200 Kinv = SLcho_solve(self._LL,N.eye(self._L.shape[0]))
201 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:])
202 tmp = alphalphaT - Kinv
203 grad_LML_log_hypers = \
204 self.__kernel.compute_lml_gradient_logscale(tmp, self._train_fv)
205 grad_K_log_sigma_n = 2.0 * self.sigma_noise ** 2 * N.eye(Kinv.shape[0])
206
207
208
209 grad_LML_log_sigma_n = 0.5 * (tmp * (grad_K_log_sigma_n).T).sum()
210 lml_gradient = N.hstack([grad_LML_log_sigma_n, grad_LML_log_hypers])
211 self.log_marginal_likelihood_gradient = lml_gradient
212 return lml_gradient
213
214
216 """Returns a sensitivity analyzer for GPR.
217
218 :Parameters:
219 flavor : basestring
220 What sensitivity to provide. Valid values are
221 'linear', 'model_select', 'auto'.
222 In case of 'auto' selects 'linear' for linear kernel
223 and 'model_select' for the rest. 'linear' corresponds to
224 GPRLinearWeights and 'model_select' to GRPWeights
225 """
226
227
228
229
230
231 if flavor == 'auto':
232 flavor = ('model_select', 'linear')\
233 [int(isinstance(self.__kernel, KernelLinear))]
234 if __debug__:
235 debug("GPR", "Returning '%s' sensitivity analyzer" % flavor)
236
237
238 if flavor == 'linear':
239 return GPRLinearWeights(self, **kwargs)
240 elif flavor == 'model_select':
241
242 if not ('has_sensitivity' in self._clf_internals):
243 raise ValueError, \
244 "model_select flavor is not available probably " \
245 "due to not available 'openopt' module"
246 return GPRWeights(self, **kwargs)
247 else:
248 raise ValueError, "Flavor %s is not recognized" % flavor
249
250
252 """Train the classifier using `data` (`Dataset`).
253 """
254
255
256 retrainable = self.params.retrainable
257 if retrainable:
258 newkernel = False
259 newL = False
260 _changedData = self._changedData
261
262 self._train_fv = train_fv = data.samples
263 self._train_labels = train_labels = data.labels
264
265 if not retrainable or _changedData['traindata'] \
266 or _changedData.get('kernel_params', False):
267 if __debug__:
268 debug("GPR", "Computing train train kernel matrix")
269 self._km_train_train = km_train_train = self.__kernel.compute(train_fv)
270 newkernel = True
271 if retrainable:
272 self._km_train_test = None
273 else:
274 if __debug__:
275 debug("GPR", "Not recomputing kernel since retrainable and "
276 "nothing has changed")
277 km_train_train = self._km_train_train
278
279 if not retrainable or newkernel or _changedData['params']:
280 if __debug__:
281 debug("GPR", "Computing L. sigma_noise=%g" % self.sigma_noise)
282
283
284 self._C = km_train_train + \
285 self.sigma_noise**2 * N.identity(km_train_train.shape[0], 'd')
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304 try:
305 self._L = SLcholesky(self._C, lower=True)
306 self._LL = (self._L, True)
307 except SLAError:
308 epsilon = 1.0e-20 * N.eye(self._C.shape[0])
309 self._L = SLcholesky(self._C + epsilon, lower=True)
310 self._LL = (self._L, True)
311 pass
312 newL = True
313 else:
314 if __debug__:
315 debug("GPR", "Not computing L since kernel, data and params "
316 "stayed the same")
317 L = self._L
318
319
320
321
322 if __debug__:
323 debug("GPR", "Computing alpha")
324
325
326
327 self._alpha = SLcho_solve(self._LL, train_labels)
328
329
330 if self.states.isEnabled('log_marginal_likelihood'):
331 self.compute_log_marginal_likelihood()
332 pass
333
334 if retrainable:
335
336 self.states.retrained = not newkernel or not newL
337
338 if __debug__:
339 debug("GPR", "Done training")
340
341 pass
342
343
345 """
346 Predict the output for the provided data.
347 """
348 retrainable = self.params.retrainable
349
350 if not retrainable or self._changedData['testdata'] \
351 or self._km_train_test is None:
352 if __debug__:
353 debug('GPR', "Computing train test kernel matrix")
354 km_train_test = self.__kernel.compute(self._train_fv, data)
355 if retrainable:
356 self._km_train_test = km_train_test
357 self.states.repredicted = False
358 else:
359 if __debug__:
360 debug('GPR', "Not recomputing train test kernel matrix")
361 km_train_test = self._km_train_test
362 self.states.repredicted = True
363
364
365 predictions = Ndot(km_train_test.transpose(), self._alpha)
366
367 if self.states.isEnabled('predicted_variances'):
368
369 if not retrainable or self._km_test_test is None \
370 or self._changedData['testdata']:
371 if __debug__:
372 debug('GPR', "Computing test test kernel matrix")
373 km_test_test = self.__kernel.compute(data)
374 if retrainable:
375 self._km_test_test = km_test_test
376 else:
377 if __debug__:
378 debug('GPR', "Not recomputing test test kernel matrix")
379 km_test_test = self._km_test_test
380
381 if __debug__:
382 debug("GPR", "Computing predicted variances")
383 L = self._L
384
385
386 piv = N.arange(L.shape[0])
387 v = SL.lu_solve((L.T,piv), km_train_test, trans=1)
388
389
390
391
392 self.predicted_variances = Ndiag(km_test_test) - (v ** 2).sum(0) \
393 + self.sigma_noise ** 2
394 pass
395
396 if __debug__:
397 debug("GPR", "Done predicting")
398 return predictions
399
400
407
408
414
415
417 """
418 Set hyperparameters' values.
419
420 Note that 'hyperparameter' is a sequence so the order of its
421 values is important. First value must be sigma_noise, then
422 other kernel's hyperparameters values follow in the exact
423 order the kernel expect them to be.
424 """
425 if hyperparameter[0]<GPR.sigma_noise.min:
426 raise InvalidHyperparameterError()
427 self.sigma_noise = hyperparameter[0]
428 if hyperparameter.size > 1:
429 self.__kernel.set_hyperparameters(hyperparameter[1:])
430 pass
431 return
432
433 kernel = property(fget=lambda self:self.__kernel)
434 pass
435
436
438 """`SensitivityAnalyzer` that reports the weights GPR trained
439 on a given `Dataset`.
440
441 In case of KernelLinear compute explicitly the coefficients
442 of the linear regression, together with their variances (if
443 requested).
444
445 Note that the intercept is not computed.
446 """
447
448 variances = StateVariable(enabled=False,
449 doc="Variances of the weights (for KernelLinear)")
450
451 _LEGAL_CLFS = [ GPR ]
452
453
454 - def _call(self, dataset):
478
479
480 if externals.exists('openopt'):
481
482 from mvpa.clfs.model_selector import ModelSelector
483
485 """`SensitivityAnalyzer` that reports the weights GPR trained
486 on a given `Dataset`.
487 """
488
489 _LEGAL_CLFS = [ GPR ]
490
491 - def _call(self, dataset):
492 """Extract weights from GPR
493 """
494
495 clf = self.clf
496
497 clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \
498 / clf._train_labels.std()
499
500
501 dataset = Dataset(samples=clf._train_fv, labels=clf._train_labels)
502 clf.states.enable("log_marginal_likelihood")
503 ms = ModelSelector(clf, dataset)
504
505 sigma_noise_initial = 1.0e-5
506 sigma_f_initial = 1.0
507 length_scale_initial = N.ones(dataset.nfeatures)*1.0e4
508
509 hyp_initial_guess = N.hstack([sigma_noise_initial,
510 sigma_f_initial,
511 length_scale_initial])
512 fixedHypers = N.array([0]*hyp_initial_guess.size, dtype=bool)
513 fixedHypers = None
514 problem = ms.max_log_marginal_likelihood(
515 hyp_initial_guess=hyp_initial_guess,
516 optimization_algorithm="scipy_lbfgsb",
517 ftol=1.0e-3, fixedHypers=fixedHypers,
518 use_gradient=True, logscale=True)
519 if __debug__ and 'GPR_WEIGHTS' in debug.active:
520 problem.iprint = 1
521 lml = ms.solve()
522 weights = 1.0/ms.hyperparameters_best[2:]
523 if __debug__:
524 debug("GPR",
525 "%s, train: shape %s, labels %s, min:max %g:%g, "
526 "sigma_noise %g, sigma_f %g" %
527 (clf, clf._train_fv.shape, N.unique(clf._train_labels),
528 clf._train_fv.min(), clf._train_fv.max(),
529 ms.hyperparameters_best[0], ms.hyperparameters_best[1]))
530
531 return weights
532
533
534
535 if __name__ == "__main__":
536 import pylab
537 pylab.close("all")
538 pylab.ion()
539
540 from mvpa.misc.data_generators import sinModulated
541
542 - def compute_prediction(sigma_noise_best, sigma_f, length_scale_best,
543 regression, dataset, data_test, label_test, F,
544 logml=True):
545 """XXX Function which seems to be valid only for __main__...
546
547 TODO: remove reimporting of pylab etc. See pylint output for more
548 information
549 """
550
551 data_train = dataset.samples
552 label_train = dataset.labels
553 import pylab
554 kse = KernelSquaredExponential(length_scale=length_scale_best,
555 sigma_f=sigma_f)
556 g = GPR(kse, sigma_noise=sigma_noise_best, regression=regression)
557 print g
558 if regression:
559 g.states.enable("predicted_variances")
560 pass
561
562 if logml:
563 g.states.enable("log_marginal_likelihood")
564 pass
565
566 g.train(dataset)
567 prediction = g.predict(data_test)
568
569
570
571 accuracy = None
572 if regression:
573 accuracy = N.sqrt(((prediction-label_test)**2).sum()/prediction.size)
574 print "RMSE:", accuracy
575 else:
576 accuracy = (prediction.astype('l')==label_test.astype('l')).sum() \
577 / float(prediction.size)
578 print "accuracy:", accuracy
579 pass
580
581 if F == 1:
582 pylab.figure()
583 pylab.plot(data_train, label_train, "ro", label="train")
584 pylab.plot(data_test, prediction, "b-", label="prediction")
585 pylab.plot(data_test, label_test, "g+", label="test")
586 if regression:
587 pylab.plot(data_test, prediction-N.sqrt(g.predicted_variances),
588 "b--", label=None)
589 pylab.plot(data_test, prediction+N.sqrt(g.predicted_variances),
590 "b--", label=None)
591 pylab.text(0.5, -0.8, "RMSE="+"%f" %(accuracy))
592 else:
593 pylab.text(0.5, -0.8, "accuracy="+str(accuracy))
594 pass
595 pylab.legend()
596 pass
597
598 print "LML:", g.log_marginal_likelihood
599
600 train_size = 40
601 test_size = 100
602 F = 1
603
604 dataset = sinModulated(train_size, F)
605
606
607 dataset_test = sinModulated(test_size, F, flat=True)
608
609
610 regression = True
611 logml = True
612
613 if logml :
614 print "Looking for better hyperparameters: grid search"
615
616 sigma_noise_steps = N.linspace(0.1, 0.5, num=20)
617 length_scale_steps = N.linspace(0.05, 0.6, num=20)
618 lml = N.zeros((len(sigma_noise_steps), len(length_scale_steps)))
619 lml_best = -N.inf
620 length_scale_best = 0.0
621 sigma_noise_best = 0.0
622 i = 0
623 for x in sigma_noise_steps:
624 j = 0
625 for y in length_scale_steps:
626 kse = KernelSquaredExponential(length_scale=y)
627 g = GPR(kse, sigma_noise=x, regression=regression)
628 g.states.enable("log_marginal_likelihood")
629 g.train(dataset)
630 lml[i, j] = g.log_marginal_likelihood
631
632
633
634
635 if lml[i, j] > lml_best:
636 lml_best = lml[i, j]
637 length_scale_best = y
638 sigma_noise_best = x
639
640 pass
641 j += 1
642 pass
643 i += 1
644 pass
645 pylab.figure()
646 X = N.repeat(sigma_noise_steps[:, N.newaxis], sigma_noise_steps.size,
647 axis=1)
648 Y = N.repeat(length_scale_steps[N.newaxis, :], length_scale_steps.size,
649 axis=0)
650 step = (lml.max()-lml.min())/30
651 pylab.contour(X, Y, lml, N.arange(lml.min(), lml.max()+step, step),
652 colors='k')
653 pylab.plot([sigma_noise_best], [length_scale_best], "k+",
654 markeredgewidth=2, markersize=8)
655 pylab.xlabel("noise standard deviation")
656 pylab.ylabel("characteristic length_scale")
657 pylab.title("log marginal likelihood")
658 pylab.axis("tight")
659 print "lml_best", lml_best
660 print "sigma_noise_best", sigma_noise_best
661 print "length_scale_best", length_scale_best
662 print "number of expected upcrossing on the unitary intervale:", \
663 1.0/(2*N.pi*length_scale_best)
664 pass
665
666
667
668 compute_prediction(sigma_noise_best, 1.0, length_scale_best, regression, dataset,
669 dataset_test.samples, dataset_test.labels, F, logml)
670 pylab.show()
671