Package mvpa :: Package clfs :: Module plr
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.plr

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Penalized logistic regression classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13   
 14  import numpy as N 
 15   
 16  from mvpa.misc.exceptions import ConvergenceError 
 17  from mvpa.clfs.base import Classifier 
 18   
 19  if __debug__: 
 20      from mvpa.misc import debug 
 21   
 22   
23 -class PLR(Classifier):
24 """Penalized logistic regression `Classifier`. 25 """ 26
27 - def __init__(self, lm=1, criterion=1, reduced=False, maxiter=20, **kwargs):
28 """ 29 Initialize a penalized logistic regression analysis 30 31 :Parameters: 32 lm : int 33 the penalty term lambda. 34 criterion : int 35 the criterion applied to judge convergence. 36 reduced : Bool 37 if not False, the rank of the data is reduced before 38 performing the calculations. In that case, reduce is taken 39 as the fraction of the first singular value, at which a 40 dimension is not considered significant anymore. A 41 reasonable criterion is reduced=0.01 42 maxiter : int 43 maximum number of iterations. If no convergence occurs 44 after this number of iterations, an exception is raised. 45 46 """ 47 # init base class first 48 Classifier.__init__(self, **kwargs) 49 50 self.__lm = lm 51 self.__criterion = criterion 52 self.__reduced = reduced 53 self.__maxiter = maxiter
54 55
56 - def __repr__(self):
57 """String summary over the object 58 """ 59 return """PLR(lm=%f, criterion=%d, reduced=%s, maxiter=%d, enable_states=%s)""" % \ 60 (self.__lm, self.__criterion, self.__reduced, self.__maxiter, 61 str(self.states.enabled))
62 63
64 - def _train(self, data):
65 """Train the classifier using `data` (`Dataset`). 66 """ 67 # Set up the environment for fitting the data 68 X = data.samples.T 69 d = data.labels 70 if not list(set(d)) == [0, 1]: 71 raise ValueError, \ 72 "Regressors for logistic regression should be [0,1]" 73 74 if self.__reduced: 75 # Data have reduced rank 76 from scipy.linalg import svd 77 78 # Compensate for reduced rank: 79 # Select only the n largest eigenvectors 80 U, S, V = svd(X.T) 81 S /= S[0] 82 V = N.matrix(V[:, :N.max(N.where(S > self.__reduced)) + 1]) 83 # Map Data to the subspace spanned by the eigenvectors 84 X = (X.T * V).T 85 86 nfeatures, npatterns = X.shape 87 88 # Weighting vector 89 w = N.matrix(N.zeros( (nfeatures + 1, 1), 'd')) 90 # Error for convergence criterion 91 dw = N.matrix(N.ones( (nfeatures + 1, 1), 'd')) 92 # Patterns of interest in the columns 93 X = N.matrix( \ 94 N.concatenate((X, N.ones((1, npatterns), 'd')), 0) \ 95 ) 96 p = N.matrix(N.zeros((1, npatterns), 'd')) 97 # Matrix implementation of penalty term 98 Lambda = self.__lm * N.identity(nfeatures + 1, 'd') 99 Lambda[nfeatures, nfeatures] = 0 100 # Gradient 101 g = N.matrix(N.zeros((nfeatures + 1, 1), 'd')) 102 # Fisher information matrix 103 H = N.matrix(N.identity(nfeatures + 1, 'd')) 104 105 # Optimize 106 k = 0 107 while N.sum(N.ravel(dw.A ** 2)) > self.__criterion: 108 p[:, :] = self.__f(w.T * X) 109 g[:, :] = X * (d - p).T - Lambda * w 110 H[:, :] = X * N.diag(p.A1 * (1 - p.A1)) * X.T + Lambda 111 dw[:, :] = H.I * g 112 w += dw 113 k += 1 114 if k > self.__maxiter: 115 raise ConvergenceError, \ 116 "More than %d Iterations without convergence" % \ 117 (self.__maxiter) 118 119 if __debug__: 120 debug("PLR", \ 121 "PLR converged after %d steps. Error: %g" % \ 122 (k, N.sum(N.ravel(dw.A ** 2)))) 123 124 if self.__reduced: 125 # We have computed in rank reduced space -> 126 # Project to original space 127 self.w = V * w[:-1] 128 self.offset = w[-1] 129 else: 130 self.w = w[:-1] 131 self.offset = w[-1]
132 133
134 - def __f(self, y):
135 """This is the logistic function f, that is used for determination of 136 the vector w""" 137 return 1. / (1 + N.exp(-y))
138 139
140 - def _predict(self, data):
141 """ 142 Predict the class labels for the provided data 143 144 Returns a list of class labels 145 """ 146 # make sure the data are in matrix form 147 data = N.matrix(N.asarray(data)) 148 149 # get the values and then predictions 150 values = N.ravel(self.__f(self.offset + data * self.w)) 151 predictions = values > 0.5 152 153 # save the state if desired, relying on State._setitem_ to 154 # decide if we will actually save the values 155 self.predictions = predictions 156 self.values = values 157 158 return predictions
159