Package mvpa :: Package clfs :: Module lars
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.lars

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Least angle regression (LARS) classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  # system imports 
 14  import numpy as N 
 15   
 16  import mvpa.base.externals as externals 
 17  externals.exists('rpy', raiseException=True) 
 18  externals.exists('lars', raiseException=True) 
 19   
 20  import rpy 
 21  rpy.r.library('lars') 
 22   
 23   
 24  # local imports 
 25  from mvpa.clfs.base import Classifier 
 26  from mvpa.measures.base import Sensitivity 
 27   
 28  if __debug__: 
 29      from mvpa.base import debug 
 30   
 31  known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise') 
 32   
33 -class LARS(Classifier):
34 """Least angle regression (LARS) `Classifier`. 35 36 LARS is the model selection algorithm from: 37 38 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert 39 Tibshirani, Least Angle Regression Annals of Statistics (with 40 discussion) (2004) 32(2), 407-499. A new method for variable 41 subset selection, with the lasso and 'epsilon' forward stagewise 42 methods as special cases. 43 44 Similar to SMLR, it performs a feature selection while performing 45 classification, but instead of starting with all features, it 46 starts with none and adds them in, which is similar to boosting. 47 48 This classifier behaves more like a ridge regression in that it 49 returns prediction values and it treats the training labels as 50 continuous. 51 52 In the true nature of the PyMVPA framework, this algorithm is 53 actually implemented in R by Trevor Hastie and wrapped via RPy. 54 To make use of LARS, you must have R and RPy installed as well as 55 the LARS contributed package. You can install the R and RPy with 56 the following command on Debian-based machines: 57 58 sudo aptitude install python-rpy python-rpy-doc r-base-dev 59 60 You can then install the LARS package by running R as root and 61 calling: 62 63 install.packages() 64 65 """ 66 67 # XXX from yoh: it is linear, isn't it? 68 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity', 69 # 'does_feature_selection', 70 ]
71 - def __init__(self, model_type="lasso", trace=False, normalize=True, 72 intercept=True, max_steps=None, use_Gram=False, **kwargs):
73 """ 74 Initialize LARS. 75 76 See the help in R for further details on the following parameters: 77 78 :Parameters: 79 model_type : string 80 Type of LARS to run. Can be one of ('lasso', 'lar', 81 'forward.stagewise', 'stepwise'). 82 trace : boolean 83 Whether to print progress in R as it works. 84 normalize : boolean 85 Whether to normalize the L2 Norm. 86 intercept : boolean 87 Whether to add a non-penalized intercept to the model. 88 max_steps : None or int 89 If not None, specify the total number of iterations to run. Each 90 iteration adds a feature, but leaving it none will add until 91 convergence. 92 use_Gram : boolean 93 Whether to compute the Gram matrix (this should be false if you 94 have more features than samples.) 95 """ 96 # init base class first 97 Classifier.__init__(self, **kwargs) 98 99 if not model_type in known_models: 100 raise ValueError('Unknown model %s for LARS is specified. Known' % 101 model_type + 'are %s' % `known_models`) 102 103 # set up the params 104 self.__type = model_type 105 self.__normalize = normalize 106 self.__intercept = intercept 107 self.__trace = trace 108 self.__max_steps = max_steps 109 self.__use_Gram = use_Gram 110 111 # pylint friendly initializations 112 self.__weights = None 113 """The beta weights for each feature.""" 114 self.__trained_model = None 115 """The model object after training that will be used for 116 predictions.""" 117 118 # It does not make sense to calculate a confusion matrix for a 119 # regression 120 self.states.enable('training_confusion', False)
121
122 - def __repr__(self):
123 """String summary of the object 124 """ 125 return """LARS(type=%s, normalize=%s, intercept=%s, trace=%s, max_steps=%s, use_Gram=%s, enable_states=%s)""" % \ 126 (self.__type, 127 self.__normalize, 128 self.__intercept, 129 self.__trace, 130 self.__max_steps, 131 self.__use_Gram, 132 str(self.states.enabled))
133 134
135 - def _train(self, data):
136 """Train the classifier using `data` (`Dataset`). 137 """ 138 if self.__max_steps is None: 139 # train without specifying max_steps 140 self.__trained_model = rpy.r.lars(data.samples, 141 data.labels[:,N.newaxis], 142 type=self.__type, 143 normalize=self.__normalize, 144 intercept=self.__intercept, 145 trace=self.__trace, 146 use_Gram=self.__use_Gram) 147 else: 148 # train with specifying max_steps 149 self.__trained_model = rpy.r.lars(data.samples, 150 data.labels[:,N.newaxis], 151 type=self.__type, 152 normalize=self.__normalize, 153 intercept=self.__intercept, 154 trace=self.__trace, 155 use_Gram=self.__use_Gram, 156 max_steps=self.__max_steps) 157 158 # set the weights to the final state 159 self.__weights = self.__trained_model['beta'][-1,:]
160
161 - def _predict(self, data):
162 """ 163 Predict the output for the provided data. 164 """ 165 # predict with the final state (i.e., the last step) 166 res = rpy.r.predict_lars(self.__trained_model, 167 data, 168 mode='step', 169 s=self.__trained_model['beta'].shape[0]) 170 171 fit = N.asarray(res['fit']) 172 if len(fit.shape) == 0: 173 # if we just got 1 sample with a scalar 174 fit = fit.reshape( (1,) ) 175 return fit
176 177 178 # def _getFeatureIds(self): 179 # """Per Per's description it does feature selection internally, 180 # so we need to implement this function and add 181 # 'does_feature_selection' into _clf_internals""" 182 # raise NotImplementedError 183 184
185 - def getSensitivityAnalyzer(self, **kwargs):
186 """Returns a sensitivity analyzer for LARS.""" 187 return LARSWeights(self, **kwargs)
188 189 weights = property(lambda self: self.__weights)
190 191 192
193 -class LARSWeights(Sensitivity):
194 """`SensitivityAnalyzer` that reports the weights LARS trained 195 on a given `Dataset`. 196 197 By default LARS provides multiple weights per feature (one per label in 198 training dataset). By default, all weights are combined into a single 199 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor 200 arguments how to custmize this behavior. 201 """ 202 203 _LEGAL_CLFS = [ LARS ] 204
205 - def _call(self, dataset=None):
206 """Extract weights from LARS classifier. 207 208 LARS always has weights available, so nothing has to be computed here. 209 """ 210 clf = self.clf 211 weights = clf.weights 212 213 if __debug__: 214 debug('LARS', 215 "Extracting weights for LARS - "+ 216 "Result: min=%f max=%f" %\ 217 (N.min(weights), N.max(weights))) 218 219 return weights
220