1
2
3
4
5
6
7
8
9 """Penalized logistic regression classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15
16 from mvpa.misc.exceptions import ConvergenceError
17 from mvpa.clfs.base import Classifier
18
19 if __debug__:
20 from mvpa.misc import debug
21
22
23 -class PLR(Classifier):
24 """Penalized logistic regression `Classifier`.
25 """
26
27 - def __init__(self, lm=1, criterion=1, reduced=False, maxiter=20, **kwargs):
28 """
29 Initialize a penalized logistic regression analysis
30
31 :Parameters:
32 lm : int
33 the penalty term lambda.
34 criterion : int
35 the criterion applied to judge convergence.
36 reduced : Bool
37 if not False, the rank of the data is reduced before
38 performing the calculations. In that case, reduce is taken
39 as the fraction of the first singular value, at which a
40 dimension is not considered significant anymore. A
41 reasonable criterion is reduced=0.01
42 maxiter : int
43 maximum number of iterations. If no convergence occurs
44 after this number of iterations, an exception is raised.
45
46 """
47
48 Classifier.__init__(self, **kwargs)
49
50 self.__lm = lm
51 self.__criterion = criterion
52 self.__reduced = reduced
53 self.__maxiter = maxiter
54
55
57 """String summary over the object
58 """
59 return """PLR(lm=%f, criterion=%d, reduced=%s, maxiter=%d, enable_states=%s)""" % \
60 (self.__lm, self.__criterion, self.__reduced, self.__maxiter,
61 str(self.states.enabled))
62
63
65 """Train the classifier using `data` (`Dataset`).
66 """
67
68 X = data.samples.T
69 d = data.labels
70 if not list(set(d)) == [0, 1]:
71 raise ValueError, \
72 "Regressors for logistic regression should be [0,1]"
73
74 if self.__reduced:
75
76 from scipy.linalg import svd
77
78
79
80 U, S, V = svd(X.T)
81 S /= S[0]
82 V = N.matrix(V[:, :N.max(N.where(S > self.__reduced)) + 1])
83
84 X = (X.T * V).T
85
86 nfeatures, npatterns = X.shape
87
88
89 w = N.matrix(N.zeros( (nfeatures + 1, 1), 'd'))
90
91 dw = N.matrix(N.ones( (nfeatures + 1, 1), 'd'))
92
93 X = N.matrix( \
94 N.concatenate((X, N.ones((1, npatterns), 'd')), 0) \
95 )
96 p = N.matrix(N.zeros((1, npatterns), 'd'))
97
98 Lambda = self.__lm * N.identity(nfeatures + 1, 'd')
99 Lambda[nfeatures, nfeatures] = 0
100
101 g = N.matrix(N.zeros((nfeatures + 1, 1), 'd'))
102
103 H = N.matrix(N.identity(nfeatures + 1, 'd'))
104
105
106 k = 0
107 while N.sum(N.ravel(dw.A ** 2)) > self.__criterion:
108 p[:, :] = self.__f(w.T * X)
109 g[:, :] = X * (d - p).T - Lambda * w
110 H[:, :] = X * N.diag(p.A1 * (1 - p.A1)) * X.T + Lambda
111 dw[:, :] = H.I * g
112 w += dw
113 k += 1
114 if k > self.__maxiter:
115 raise ConvergenceError, \
116 "More than %d Iterations without convergence" % \
117 (self.__maxiter)
118
119 if __debug__:
120 debug("PLR", \
121 "PLR converged after %d steps. Error: %g" % \
122 (k, N.sum(N.ravel(dw.A ** 2))))
123
124 if self.__reduced:
125
126
127 self.w = V * w[:-1]
128 self.offset = w[-1]
129 else:
130 self.w = w[:-1]
131 self.offset = w[-1]
132
133
135 """This is the logistic function f, that is used for determination of
136 the vector w"""
137 return 1. / (1 + N.exp(-y))
138
139
159