Package mvpa :: Package clfs :: Module kernel
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.kernel

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   Copyright (c) 2008 Emanuele Olivetti <emanuele@relativita.com> 
  6  #   See COPYING file distributed along with the PyMVPA package for the 
  7  #   copyright and license terms. 
  8  # 
  9  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
 10  """Kernels for Gaussian Process Regression and Classification.""" 
 11   
 12  __docformat__ = 'restructuredtext' 
 13   
 14   
 15  import numpy as N 
 16   
 17  if __debug__: 
 18      from mvpa.misc import debug 
 19   
20 -class Kernel(object):
21 """Kernel function base class. 22 23 """ 24
25 - def __init__(self):
26 self.euclidean_distance_matrix = None
27
28 - def __repr__(self):
29 return "Kernel()"
30
31 - def euclidean_distance(self, data1, data2=None, weight=None, 32 symmetric=False):
33 """Compute weighted euclidean distance matrix between two datasets. 34 35 36 :Parameters: 37 data1 : numpy.ndarray 38 first dataset 39 data2 : numpy.ndarray 40 second dataset. If None set symmetric to True. 41 (Defaults to None) 42 weight : numpy.ndarray 43 vector of weights, each one associated to each dimension of the 44 dataset (Defaults to None) 45 symmetric : bool 46 compute the euclidean distance between the first dataset versus 47 itself (True) or the second one (False). Note that 48 (Defaults to False) 49 """ 50 51 if data2 is None: 52 data2 = data1 53 symmetric = True 54 pass 55 56 size1 = data1.shape[0] 57 size2 = data2.shape[0] 58 F = data1.shape[1] 59 if weight is None: 60 weight = N.ones(F,'d') # unitary weight 61 pass 62 63 euclidean_distance_matrix = N.zeros((data1.shape[0], data2.shape[0]), 64 'd') 65 # In the following you can find faster implementations of the 66 # following code: 67 # 68 # for i in range(size1): 69 # for j in range(size2): 70 # euclidean_distance_matrix[i,j] = ((data1[i,:]-data2[j,:])**2*weight).sum() 71 # pass 72 # pass 73 74 # Fast computation of distance matrix in Python+NumPy, 75 # adapted from Bill Baxter's post on [numpy-discussion]. 76 # Basically: (x-y)**2*w = x*w*x - 2*x*w*y + y*y*w 77 data1w = data1*weight 78 euclidean_distance_matrix = (data1w*data1).sum(1)[:,None] \ 79 -2*N.dot(data1w,data2.T)+ \ 80 (data2*data2*weight).sum(1) 81 # correction to some possible numerical instabilities: 82 euclidean_distance_matrix[euclidean_distance_matrix<0] = 0 83 self.euclidean_distance_matrix = euclidean_distance_matrix 84 return self.euclidean_distance_matrix
85 pass
86 87
88 -class KernelSquaredExponential(Kernel):
89 """The Squared Exponential kernel class. 90 91 Note that it can handle a length scale for each dimension for 92 Automtic Relevance Determination. 93 94 """
95 - def __init__(self, length_scale=0.01, **kwargs):
96 """Initialize a Squared Exponential kernel instance. 97 98 :Parameters: 99 length_scale : float OR numpy.ndarray 100 the characteristic length-scale (or length-scales) of the 101 phenomenon under investigation. 102 (Defaults to 0.01) 103 """ 104 # init base class first 105 Kernel.__init__(self, **kwargs) 106 107 self.length_scale = length_scale 108 self.kernel_matrix = None
109 110
111 - def __repr__(self):
112 return "%s(length_scale=%s)" % (self.__class__.__name__, str(self.length_scale))
113
114 - def compute(self, data1, data2=None):
115 """Compute kernel matrix. 116 117 :Parameters: 118 data1 : numpy.ndarray 119 data 120 data2 : numpy.ndarray 121 data 122 (Defaults to None) 123 """ 124 self.kernel_matrix = N.exp(-self.euclidean_distance(data1, data2, weight=(0.5/self.length_scale**2))) 125 return self.kernel_matrix
126
127 - def gradient(self,data1,data2):
128 """Compute gradient of the kernel matrix. A must for fast 129 model selection with high-dimensional data. 130 """ 131 # TODO SOON 132 grad = None 133 return grad
134
135 - def set_hyperparameters(self,*length_scale):
136 """Facility to set lengthscales. Used model selection. 137 """ 138 self.length_scale = N.array(length_scale) 139 return
140 pass
141 142
143 -class KernelConstant(Kernel):
144 """The constant kernel class. 145 """
146 - def __init__(self, coefficient=None, **kwargs):
147 """Initialize the constant kernel instance. 148 149 :Parameters: 150 coefficient : numpy.ndarray 151 the coefficients of the linear kernel 152 (Defaults to None) 153 """ 154 # init base class first 155 Kernel.__init__(self, **kwargs) 156 157 self.coefficient = coefficient 158 self.kernel_matrix = None
159 160
161 - def __repr__(self):
162 return "%s(coefficient=%s)" % (self.__class__.__name__, str(self.coefficient))
163
164 - def compute(self, data1, data2=None):
165 """Compute kernel matrix. 166 167 :Parameters: 168 data1 : numpy.ndarray 169 data 170 data2 : numpy.ndarray 171 data 172 (Defaults to None) 173 """ 174 if data2 == None: 175 data2 = data1 176 pass 177 self.kernel_matrix = (self.coefficient**2)*N.ones((data1.shape[0],data2.shape[0])) 178 return self.kernel_matrix
179 180
181 - def set_hyperparameters(self,*coefficient):
182 self.coefficient = N.array(coefficient) 183 return
184 pass
185 186
187 -class KernelLinear(KernelConstant):
188 """The linear kernel class. 189 """
190 - def compute(self, data1, data2=None):
191 """Compute kernel matrix. 192 193 :Parameters: 194 data1 : numpy.ndarray 195 data 196 data2 : numpy.ndarray 197 data 198 (Defaults to None) 199 """ 200 if data2 == None: 201 data2 = data1 202 pass 203 self.kernel_matrix = N.dot(data1*(self.coefficient**2),data2.T) 204 return self.kernel_matrix
205 206 pass
207 208
209 -class KernelMatern(Kernel):
210 """The Matern kernel class. 211 """ 212 # TODO
213 - def __init__(self):
214 raise NotImplementedError
215 216 217 if __name__ == "__main__": 218 219 from mvpa.misc import data_generators 220 221 N.random.seed(1) 222 data = N.random.rand(4, 2) 223 224 k = Kernel() 225 print k 226 edm = k.euclidean_distance(data) 227 228 kse = KernelSquaredExponential() 229 print kse 230 ksem = kse.compute(data) 231 232 dataset = data_generators.wr1996() 233 print dataset 234 data = dataset.samples 235 labels = dataset.labels 236 237 kl = KernelLinear(coefficient=N.ones(data.shape[1])) 238 print kl 239 240 kc = KernelConstant(coefficient=1.0) 241 print kc 242