1
2
3
4
5
6
7
8
9 """Least angle regression (LARS) classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15
16 import mvpa.base.externals as externals
17 externals.exists('rpy', raiseException=True)
18 externals.exists('lars', raiseException=True)
19
20 import rpy
21 rpy.r.library('lars')
22
23
24
25 from mvpa.clfs.base import Classifier
26 from mvpa.measures.base import Sensitivity
27
28 if __debug__:
29 from mvpa.base import debug
30
31 known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise')
32
33 -class LARS(Classifier):
34 """Least angle regression (LARS) `Classifier`.
35
36 LARS is the model selection algorithm from:
37
38 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert
39 Tibshirani, Least Angle Regression Annals of Statistics (with
40 discussion) (2004) 32(2), 407-499. A new method for variable
41 subset selection, with the lasso and 'epsilon' forward stagewise
42 methods as special cases.
43
44 Similar to SMLR, it performs a feature selection while performing
45 classification, but instead of starting with all features, it
46 starts with none and adds them in, which is similar to boosting.
47
48 This classifier behaves more like a ridge regression in that it
49 returns prediction values and it treats the training labels as
50 continuous.
51
52 In the true nature of the PyMVPA framework, this algorithm is
53 actually implemented in R by Trevor Hastie and wrapped via RPy.
54 To make use of LARS, you must have R and RPy installed as well as
55 the LARS contributed package. You can install the R and RPy with
56 the following command on Debian-based machines:
57
58 sudo aptitude install python-rpy python-rpy-doc r-base-dev
59
60 You can then install the LARS package by running R as root and
61 calling:
62
63 install.packages()
64
65 """
66
67
68 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity',
69
70 ]
71 - def __init__(self, model_type="lasso", trace=False, normalize=True,
72 intercept=True, max_steps=None, use_Gram=False, **kwargs):
73 """
74 Initialize LARS.
75
76 See the help in R for further details on the following parameters:
77
78 :Parameters:
79 model_type : string
80 Type of LARS to run. Can be one of ('lasso', 'lar',
81 'forward.stagewise', 'stepwise').
82 trace : boolean
83 Whether to print progress in R as it works.
84 normalize : boolean
85 Whether to normalize the L2 Norm.
86 intercept : boolean
87 Whether to add a non-penalized intercept to the model.
88 max_steps : None or int
89 If not None, specify the total number of iterations to run. Each
90 iteration adds a feature, but leaving it none will add until
91 convergence.
92 use_Gram : boolean
93 Whether to compute the Gram matrix (this should be false if you
94 have more features than samples.)
95 """
96
97 Classifier.__init__(self, **kwargs)
98
99 if not model_type in known_models:
100 raise ValueError('Unknown model %s for LARS is specified. Known' %
101 model_type + 'are %s' % `known_models`)
102
103
104 self.__type = model_type
105 self.__normalize = normalize
106 self.__intercept = intercept
107 self.__trace = trace
108 self.__max_steps = max_steps
109 self.__use_Gram = use_Gram
110
111
112 self.__weights = None
113 """The beta weights for each feature."""
114 self.__trained_model = None
115 """The model object after training that will be used for
116 predictions."""
117
118
119
120 self.states.enable('training_confusion', False)
121
123 """String summary of the object
124 """
125 return """LARS(type=%s, normalize=%s, intercept=%s, trace=%s, max_steps=%s, use_Gram=%s, enable_states=%s)""" % \
126 (self.__type,
127 self.__normalize,
128 self.__intercept,
129 self.__trace,
130 self.__max_steps,
131 self.__use_Gram,
132 str(self.states.enabled))
133
134
136 """Train the classifier using `data` (`Dataset`).
137 """
138 if self.__max_steps is None:
139
140 self.__trained_model = rpy.r.lars(data.samples,
141 data.labels[:,N.newaxis],
142 type=self.__type,
143 normalize=self.__normalize,
144 intercept=self.__intercept,
145 trace=self.__trace,
146 use_Gram=self.__use_Gram)
147 else:
148
149 self.__trained_model = rpy.r.lars(data.samples,
150 data.labels[:,N.newaxis],
151 type=self.__type,
152 normalize=self.__normalize,
153 intercept=self.__intercept,
154 trace=self.__trace,
155 use_Gram=self.__use_Gram,
156 max_steps=self.__max_steps)
157
158
159 self.__weights = self.__trained_model['beta'][-1,:]
160
162 """
163 Predict the output for the provided data.
164 """
165
166 res = rpy.r.predict_lars(self.__trained_model,
167 data,
168 mode='step',
169 s=self.__trained_model['beta'].shape[0])
170
171 fit = N.asarray(res['fit'])
172 if len(fit.shape) == 0:
173
174 fit = fit.reshape( (1,) )
175 return fit
176
177
178
179
180
181
182
183
184
186 """Returns a sensitivity analyzer for LARS."""
187 return LARSWeights(self, **kwargs)
188
189 weights = property(lambda self: self.__weights)
190
191
192
194 """`SensitivityAnalyzer` that reports the weights LARS trained
195 on a given `Dataset`.
196
197 By default LARS provides multiple weights per feature (one per label in
198 training dataset). By default, all weights are combined into a single
199 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor
200 arguments how to custmize this behavior.
201 """
202
203 _LEGAL_CLFS = [ LARS ]
204
205 - def _call(self, dataset=None):
206 """Extract weights from LARS classifier.
207
208 LARS always has weights available, so nothing has to be computed here.
209 """
210 clf = self.clf
211 weights = clf.weights
212
213 if __debug__:
214 debug('LARS',
215 "Extracting weights for LARS - "+
216 "Result: min=%f max=%f" %\
217 (N.min(weights), N.max(weights)))
218
219 return weights
220