Package mvpa :: Package featsel :: Module helpers
[hide private]
[frames] | no frames]

Source Code for Module mvpa.featsel.helpers

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  from math import floor 
 14  import numpy as N 
 15   
 16  from mvpa.misc.state import Stateful, StateVariable 
 17   
 18  # 
 19  # Functors to be used for FeatureSelection 
 20  # 
 21   
22 -class BestDetector(object):
23 """Determine whether the last value in a sequence is the best one given 24 some criterion. 25 """
26 - def __init__(self, func=min, lastminimum=False):
27 """Initialize with number of steps 28 29 :Parameters: 30 fun : functor 31 Functor to select the best results. Defaults to min 32 lastminimum : bool 33 Toggle whether the latest or the earliest minimum is used as 34 optimal value to determine the stopping criterion. 35 """ 36 self.__func = func 37 self.__lastminimum = lastminimum 38 self.__bestindex = None 39 """Stores the index of the last detected best value."""
40 41
42 - def __call__(self, errors):
43 """Returns True if the last value in `errors` is the best or False 44 otherwise. 45 """ 46 isbest = False 47 48 # just to prevent ValueError 49 if len(errors)==0: 50 return isbest 51 52 minerror = self.__func(errors) 53 54 if self.__lastminimum: 55 # make sure it is an array 56 errors = N.array(errors) 57 # to find out the location of the minimum but starting from the 58 # end! 59 minindex = N.array((errors == minerror).nonzero()).max() 60 else: 61 minindex = errors.index(minerror) 62 63 self.__bestindex = minindex 64 65 # if minimal is the last one reported -- it is the best 66 if minindex == len(errors)-1: 67 isbest = True 68 69 return isbest
70 71 bestindex = property(fget=lambda self:self.__bestindex)
72 73 74
75 -class StoppingCriterion(object):
76 """Base class for all functors to decide when to stop RFE (or may 77 be general optimization... so it probably will be moved out into 78 some other module 79 """ 80
81 - def __call__(self, errors):
82 """Instruct when to stop. 83 84 Every implementation should return `False` when an empty list is 85 passed as argument. 86 87 Returns tuple `stop`. 88 """ 89 raise NotImplementedError
90 91 92
93 -class MultiStopCrit(StoppingCriterion):
94 """Stop computation if the latest error drops below a certain threshold. 95 """
96 - def __init__(self, crits, mode='or'):
97 """ 98 :Parameters: 99 crits : list of StoppingCriterion instances 100 For each call to MultiStopCrit all of these criterions will 101 be evaluated. 102 mode : any of ('and', 'or') 103 Logical function to determine the multi criterion from the set 104 of base criteria. 105 """ 106 if not mode in ('and', 'or'): 107 raise ValueError, \ 108 "A mode '%s' is not supported." % `mode` 109 110 self.__mode = mode 111 self.__crits = crits
112 113
114 - def __call__(self, errors):
115 """Evaluate all criteria to determine the value of the multi criterion. 116 """ 117 # evaluate all crits 118 crits = [ c(errors) for c in self.__crits ] 119 120 if self.__mode == 'and': 121 return N.all(crits) 122 else: 123 return N.any(crits)
124 125 126
127 -class FixedErrorThresholdStopCrit(StoppingCriterion):
128 """Stop computation if the latest error drops below a certain threshold. 129 """
130 - def __init__(self, threshold):
131 """Initialize with threshold. 132 133 :Parameters: 134 threshold : float [0,1] 135 Error threshold. 136 """ 137 StoppingCriterion.__init__(self) 138 if threshold > 1.0 or threshold < 0.0: 139 raise ValueError, \ 140 "Threshold %f is out of a reasonable range [0,1]." \ 141 % `threshold` 142 self.__threshold = threshold
143 144
145 - def __call__(self, errors):
146 """Nothing special.""" 147 if len(errors)==0: 148 return False 149 if errors[-1] < self.__threshold: 150 return True 151 else: 152 return False
153 154 155 threshold = property(fget=lambda x:x.__threshold)
156 157 158
159 -class NStepsStopCrit(StoppingCriterion):
160 """Stop computation after a certain number of steps. 161 """
162 - def __init__(self, steps):
163 """Initialize with number of steps. 164 165 :Parameters: 166 steps : int 167 Number of steps after which to stop. 168 """ 169 StoppingCriterion.__init__(self) 170 if steps < 0: 171 raise ValueError, \ 172 "Number of steps %i is out of a reasonable range." \ 173 % `steps` 174 self.__steps = steps
175 176
177 - def __call__(self, errors):
178 """Nothing special.""" 179 if len(errors) >= self.__steps: 180 return True 181 else: 182 return False
183 184 185 steps = property(fget=lambda x:x.__steps)
186 187 188
189 -class NBackHistoryStopCrit(StoppingCriterion):
190 """Stop computation if for a number of steps error was increasing 191 """ 192
193 - def __init__(self, bestdetector=BestDetector(), steps=10):
194 """Initialize with number of steps 195 196 :Parameters: 197 bestdetector : BestDetector instance 198 used to determine where the best error is located. 199 steps : int 200 How many steps to check after optimal value. 201 """ 202 StoppingCriterion.__init__(self) 203 if steps < 0: 204 raise ValueError, \ 205 "Number of steps (got %d) should be non-negative" % steps 206 self.__bestdetector = bestdetector 207 self.__steps = steps
208 209
210 - def __call__(self, errors):
211 stop = False 212 213 # just to prevent ValueError 214 if len(errors)==0: 215 return stop 216 217 # charge best detector 218 self.__bestdetector(errors) 219 220 # if number of elements after the min >= len -- stop 221 if len(errors) - self.__bestdetector.bestindex > self.__steps: 222 stop = True 223 224 return stop
225 226 steps = property(fget=lambda x:x.__steps)
227 228 229
230 -class ElementSelector(Stateful):
231 """Base class to implement functors to select some elements based on a 232 sequence of values. 233 """ 234 235 ndiscarded = StateVariable(True, 236 doc="Store number of discarded elements.") 237
238 - def __init__(self, mode='discard', **kwargs):
239 """Cheap initialization. 240 241 :Parameters: 242 mode : ['discard', 'select'] 243 Decides whether to `select` or to `discard` features. 244 """ 245 Stateful.__init__(self, **kwargs) 246 247 self._setMode(mode) 248 """Flag whether to select or to discard elements."""
249 250
251 - def _setMode(self, mode):
252 """Choose `select` or `discard` mode.""" 253 254 if not mode in ['discard', 'select']: 255 raise ValueError, "Unkown selection mode [%s]. Can only be one " \ 256 "of 'select' or 'discard'." % mode 257 258 self.__mode = mode
259 260
261 - def __call__(self, seq):
262 """Implementations in derived classed have to return a list of selected 263 element IDs based on the given sequence. 264 """ 265 raise NotImplementedError
266 267 mode = property(fget=lambda self:self.__mode, fset=_setMode)
268 269
270 -class RangeElementSelector(ElementSelector):
271 """Select elements based on specified range of values""" 272
273 - def __init__(self, lower=None, upper=None, inclusive=False, 274 mode='select', **kwargs):
275 """Initialization `RangeElementSelector` 276 277 :Parameters: 278 lower 279 If not None -- select elements which are above of 280 specified value 281 upper 282 If not None -- select elements which are lower of 283 specified value 284 inclusive 285 Either to include end points 286 mode 287 overrides parent's default to be 'select' since it is more 288 native for RangeElementSelector 289 XXX TODO -- unify?? 290 291 `upper` could be lower than `lower` -- then selection is done 292 on values <= lower or >=upper (ie tails). This would produce 293 the same result if called with flipped values for mode and 294 inclusive. 295 296 If no upper no lower is set, assuming upper,lower=0, thus 297 outputing non-0 elements 298 """ 299 300 if lower is None and upper is None: 301 lower, upper = 0, 0 302 """Lets better return non-0 values if none of bounds is set""" 303 304 # init State before registering anything 305 ElementSelector.__init__(self, mode=mode, **kwargs) 306 307 self.__range = (lower, upper) 308 """Values on which to base selection""" 309 310 self.__inclusive = inclusive
311
312 - def __call__(self, seq):
313 """Returns selected IDs. 314 """ 315 lower, upper = self.__range 316 len_seq = len(seq) 317 if not lower is None: 318 if self.__inclusive: 319 selected = seq >= lower 320 else: 321 selected = seq > lower 322 else: 323 selected = N.ones( (len_seq), dtype=N.bool ) 324 325 if not upper is None: 326 if self.__inclusive: 327 selected_upper = seq <= upper 328 else: 329 selected_upper = seq < upper 330 if not lower is None: 331 if lower < upper: 332 # regular range 333 selected = N.logical_and(selected, selected_upper) 334 else: 335 # outside, though that would be similar to exclude 336 selected = N.logical_or(selected, selected_upper) 337 else: 338 selected = selected_upper 339 340 if self.mode == 'discard': 341 selected = N.logical_not(selected) 342 343 return N.where(selected)[0]
344 345
346 -class TailSelector(ElementSelector):
347 """Select elements from a tail of a distribution. 348 349 The default behaviour is to discard the lower tail of a given distribution. 350 """ 351 352 # TODO: 'both' to select from both tails
353 - def __init__(self, tail='lower', sort=True, **kwargs):
354 """Initialize TailSelector 355 356 :Parameters: 357 tail : ['lower', 'upper'] 358 Choose the tail to be processed. 359 sort : bool 360 Flag whether selected IDs will be sorted. Disable if not 361 necessary to save some CPU cycles. 362 363 """ 364 # init State before registering anything 365 ElementSelector.__init__(self, **kwargs) 366 367 self._setTail(tail) 368 """Know which tail to select.""" 369 370 self.__sort = sort
371 372
373 - def _setTail(self, tail):
374 """Set the tail to be processed.""" 375 if not tail in ['lower', 'upper']: 376 raise ValueError, "Unkown tail argument [%s]. Can only be one " \ 377 "of 'lower' or 'upper'." % tail 378 379 self.__tail = tail
380 381
382 - def _getNElements(self, seq):
383 """In derived classes has to return the number of elements to be 384 processed given a sequence values forming the distribution. 385 """ 386 raise NotImplementedError
387 388
389 - def __call__(self, seq):
390 """Returns selected IDs. 391 """ 392 # TODO: Think about selecting features which have equal values but 393 # some are selected and some are not 394 len_seq = len(seq) 395 # how many to select (cannot select more than available) 396 nelements = min(self._getNElements(seq), len_seq) 397 398 # make sure that data is ndarray and compute a sequence rank matrix 399 # lowest value is first 400 seqrank = N.array(seq).argsort() 401 402 if self.mode == 'discard' and self.__tail == 'upper': 403 good_ids = seqrank[:-1*nelements] 404 self.ndiscarded = nelements 405 elif self.mode == 'discard' and self.__tail == 'lower': 406 good_ids = seqrank[nelements:] 407 self.ndiscarded = nelements 408 elif self.mode == 'select' and self.__tail == 'upper': 409 good_ids = seqrank[-1*nelements:] 410 self.ndiscarded = len_seq - nelements 411 else: # select lower tail 412 good_ids = seqrank[:nelements] 413 self.ndiscarded = len_seq - nelements 414 415 # sort ids to keep order 416 # XXX should we do here are leave to other place 417 if self.__sort: 418 good_ids.sort() 419 420 return good_ids
421 422 423
424 -class FixedNElementTailSelector(TailSelector):
425 """Given a sequence, provide set of IDs for a fixed number of to be selected 426 elements. 427 """ 428
429 - def __init__(self, nelements, **kwargs):
430 """Cheap initialization. 431 432 :Parameters: 433 nselect : int 434 Number of elements to select/discard. 435 """ 436 TailSelector.__init__(self, **kwargs) 437 self._setNElements(nelements)
438 439
440 - def __repr__(self):
441 return "%s number=%f" % ( 442 TailSelector.__repr__(self), self.__nselect)
443 444
445 - def _getNElements(self, seq):
446 return self.__nelements
447 448
449 - def _setNElements(self, nelements):
450 if __debug__: 451 if nelements <= 0: 452 raise ValueError, "Number of elements less or equal to zero " \ 453 "does not make sense." 454 455 self.__nelements = nelements
456 457 458 nelements = property(fget=lambda x:x.__nelements, 459 fset=_setNElements)
460 461 462
463 -class FractionTailSelector(TailSelector):
464 """Given a sequence, provide Ids for a fraction of elements 465 """ 466
467 - def __init__(self, felements, **kwargs):
468 """Cheap initialization. 469 470 :Parameters: 471 felements : float (0,1.0] 472 Fraction of elements to select/discard. Note: Even when 0.0 is 473 specified at least one element will be selected. 474 """ 475 TailSelector.__init__(self, **kwargs) 476 self._setFElements(felements)
477 478
479 - def __repr__(self):
480 return "%s fraction=%f" % ( 481 TailSelector.__repr__(self), self.__felements)
482 483
484 - def _getNElements(self, seq):
485 num = int(floor(self.__felements * len(seq))) 486 num = max(1, num) # remove at least 1 487 # no need for checks as base class will do anyway 488 #return min(num, nselect) 489 return num
490 491
492 - def _setFElements(self, felements):
493 """What fraction to discard""" 494 if felements > 1.0 or felements < 0.0: 495 raise ValueError, \ 496 "Fraction (%f) cannot be outside of [0.0,1.0]" \ 497 % felements 498 499 self.__felements = felements
500 501 502 felements = property(fget=lambda x:x.__felements, 503 fset=_setFElements)
504