Package mvpa :: Package mappers :: Module mask
[hide private]
[frames] | no frames]

Source Code for Module mvpa.mappers.mask

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Data mapper""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14  from operator import isSequenceType 
 15   
 16  from mvpa.mappers.metric import MetricMapper 
 17  from mvpa.datasets.metric import DescreteMetric, cartesianDistance 
 18  from mvpa.base.dochelpers import enhancedDocString 
 19   
 20  if __debug__: 
 21      from mvpa.misc import warning 
 22      from mvpa.misc.support import isSorted 
 23   
 24   
 25   
26 -class MaskMapper(MetricMapper):
27 """Mapper which uses a binary mask to select "Features" """ 28
29 - def __init__(self, mask, metric=None, 30 distance_function=cartesianDistance, elementsize=None):
31 """Initialize MaskMapper 32 33 :Parameters: 34 mask : array 35 an array in the original dataspace and its nonzero elements are 36 used to define the features included in the dataset 37 metric : Metric 38 Corresponding metric for the space. No attempt is made to 39 determine whether a certain metric is reasonable for this 40 mapper. If `metric` is None -- `DescreteMetric` 41 is constructed that assumes an equal (1) spacing of all mask 42 elements with a `distance_function` given as a parameter listed 43 below. 44 distance_function : functor 45 Distance function to use as the parameter to 46 `DescreteMetric` if `metric` is not specified, 47 elementsize : list or scalar 48 Determines spacing within `DescreteMetric`. If it is given as a 49 scalar, corresponding value is assigned to all dimensions, which 50 are found within `mask` 51 52 :Note: parameters `elementsize` and `distance_function` are relevant 53 only if `metric` is None 54 """ 55 if metric == None: 56 if elementsize is None: 57 elementsize = [1]*len(mask.shape) 58 else: 59 if isSequenceType(elementsize): 60 if len(elementsize) != len(mask.shape): 61 raise ValueError, \ 62 "Number of elements in elementsize [%d]" % \ 63 len(elementsize) + " doesn't match shape " + \ 64 "of the mask [%s]" % (`mask.shape`) 65 else: 66 elementsize = [ elementsize ] * len(mask.shape) 67 metric = DescreteMetric(elementsize=[1]*len(mask.shape), 68 distance_function=distance_function) 69 70 MetricMapper.__init__(self, metric) 71 72 73 self.__mask = self.__maskdim = self.__masksize = \ 74 self.__masknonzerosize = self.__forwardmap = \ 75 self.__masknonzero = None # to make pylint happy 76 self._initMask(mask)
77 78 79 __doc__ = enhancedDocString('MaskMapper', locals(), MetricMapper) 80 81
82 - def __str__(self):
83 return "MaskMapper: %d -> %d" \ 84 % (self.__masksize, self.__masknonzerosize)
85 86
87 - def __deepcopy__(self, memo=None):
88 # XXX memo does not seem to be used 89 if memo is None: 90 memo = {} 91 from copy import deepcopy 92 # XXX might be necessary to deepcopy 'self.metric' as well 93 # to some degree reimplement the constructor to prevent calling the 94 # expensive _initMask() again 95 out = MaskMapper.__new__(MaskMapper) 96 MetricMapper.__init__(out, self.metric) 97 out.__mask = self.__mask.copy() 98 out.__maskdim = self.__maskdim 99 out.__masksize = self.__masksize 100 out.__masknonzero = deepcopy(self.__masknonzero) 101 out.__masknonzerosize = self.__masknonzerosize 102 out.__forwardmap = self.__forwardmap.copy() 103 104 return out
105 106
107 - def _initMask(self, mask):
108 """Initialize internal state with mask-derived information 109 110 It is needed to initialize structures for the fast 111 and reverse lookup to don't impose performance hit on any 112 future operation 113 """ 114 # NOTE: If any new class member are added here __deepcopy__() has to 115 # be adjusted accordingly! 116 117 self.__mask = (mask != 0) 118 self.__maskdim = len(mask.shape) 119 self.__masksize = N.prod(mask.shape) 120 121 # Following introduces space penalty but are needed 122 # for efficient processing. 123 # Store all coordinates for backward mapping 124 self.__masknonzero = mask.nonzero() 125 self.__masknonzerosize = len(self.__masknonzero[0]) 126 #from IPython.Shell import IPShellEmbed 127 #ipshell = IPShellEmbed() 128 #ipshell() 129 #import pydb; pydb.debugger() 130 # Store forward mapping (ie from coord into outId) 131 # TODO to save space might take appropriate int type 132 # depending on masknonzerosize 133 # it could be done with a dictionary, but since mask 134 # might be relatively big, it is better to simply use 135 # a chunk of RAM ;-) 136 self.__forwardmap = N.zeros(mask.shape, dtype=N.int64) 137 # under assumption that we +1 values in forwardmap so that 138 # 0 can be used to signal outside of mask 139 140 self.__forwardmap[self.__masknonzero] = \ 141 N.arange(self.__masknonzerosize)
142 143
144 - def forward(self, data):
145 """Map data from the original dataspace into featurespace. 146 """ 147 datadim = len(data.shape) 148 datashape = data.shape[(-1)*self.__maskdim:] 149 if not datashape == self.__mask.shape: 150 raise ValueError, \ 151 "The shape of data to be mapped %s " % `datashape` \ 152 + " does not match the mapper's mask shape %s" \ 153 % `self.__mask.shape` 154 155 if self.__maskdim == datadim: 156 # we had to select by __masknonzero if we didn't sort 157 # Ids and wanted to preserve the order 158 #return data[ self.__masknonzero ] 159 return data[ self.__mask ] 160 elif self.__maskdim+1 == datadim: 161 # XXX XXX XXX below line should be accomodated also 162 # to make use of self.__masknonzero instead of 163 # plain mask if we want to preserve the (re)order 164 return data[ :, self.__mask ] 165 else: 166 raise ValueError, \ 167 "Shape of the to be mapped data, does not match the " \ 168 "mapper mask. Only one (optional) additional dimension " \ 169 "exceeding the mask shape is supported."
170 171
172 - def reverse(self, data):
173 """Reverse map data from featurespace into the original dataspace. 174 """ 175 datadim = len(data.shape) 176 if not datadim in [1, 2]: 177 raise ValueError, \ 178 "Only 2d or 1d data can be reverse mapped." 179 180 if datadim == 1: 181 mapped = N.zeros(self.__mask.shape, dtype=data.dtype) 182 mapped[self.__mask] = data 183 elif datadim == 2: 184 mapped = N.zeros(data.shape[:1] + self.__mask.shape, 185 dtype=data.dtype) 186 mapped[:, self.__mask] = data 187 188 return mapped
189 190
191 - def getInShape(self):
192 """InShape is a shape of original mask""" 193 return self.__mask.shape
194 195
196 - def getInSize(self):
197 """InShape is a shape of original mask""" 198 return self.__masksize
199 200
201 - def getOutShape(self):
202 """OutShape is a shape of target dataset""" 203 # should worry about state-full class. 204 # TODO: add exception 'InvalidStateError' which is raised 205 # by some class if state is not yet defined: 206 # classifier has not yet been trained 207 # mapped yet see the dataset 208 raise NotImplementedError
209 210
211 - def getOutSize(self):
212 """OutSize is a number of non-0 elements in the mask""" 213 return self.__masknonzerosize
214 215
216 - def getMask(self, copy = True):
217 """By default returns a copy of the current mask. 218 219 If 'copy' is set to False a reference to the mask is returned instead. 220 This shared mask must not be modified! 221 """ 222 if copy: 223 return self.__mask.copy() 224 else: 225 return self.__mask
226 227
228 - def getInId(self, outId):
229 """Returns a features coordinate in the original data space 230 for a given feature id. 231 232 If this method is called with a list of feature ids it returns a 233 2d-array where the first axis corresponds the dimensions in 'In' 234 dataspace and along the second axis are the coordinates of the features 235 on this dimension (like the output of NumPy.array.nonzero()). 236 237 XXX it might become __get_item__ access method 238 239 """ 240 # XXX Might be improved by storing also transpose of 241 # __masknonzero 242 return N.array([self.__masknonzero[i][outId] 243 for i in xrange(self.__maskdim)])
244 245
246 - def getInIds(self):
247 """Returns a 2d array where each row contains the coordinate of the 248 feature with the corresponding id. 249 """ 250 return N.transpose(self.__masknonzero)
251 252
253 - def getOutId(self, coord):
254 """Translate a feature mask coordinate into a feature ID. 255 """ 256 # FIXME Since lists/arrays accept negative indexes to go from 257 # the end -- we need to check coordinates explicitely. Otherwise 258 # we would get warping effect 259 try: 260 tcoord = tuple(coord) 261 if self.__mask[tcoord] == 0: 262 raise ValueError, \ 263 "The point %s didn't belong to the mask" % (`coord`) 264 return self.__forwardmap[tcoord] 265 except TypeError: 266 raise ValueError, \ 267 "Coordinates %s are of incorrect dimension. " % `coord` + \ 268 "The mask has %d dimensions." % self.__maskdim 269 except IndexError: 270 raise ValueError, \ 271 "Coordinates %s are out of mask boundary. " % `coord` + \ 272 "The mask is of %s shape." % `self.__mask.shape`
273 274
275 - def selectOut(self, outIds, sort=False):
276 """Only listed outIds would remain. 277 278 The function used to accept a matrix-mask as the input but now 279 it really has to be a list of IDs 280 281 Function assumes that outIds are sorted. If not - please set 282 sort to True. While in __debug__ mode selectOut would check if 283 obtained IDs are sorted and would warn the user if they are 284 not. 285 286 If you feel strongly that you need to remap features 287 internally (ie to allow Ids with mixed order) please contact 288 developers of mvpa to discuss your use case. 289 290 See `tests.test_maskmapper.testSelectOrder` for basic testing 291 292 Feature/Bug: 293 * Negative outIds would not raise exception - just would be 294 treated 'from the tail' 295 296 Older comments on 'order' - might be useful in future if 297 reordering gets ressurrected 298 Order will be taken into account -- ie items will be 299 remapped if order was changed... need to check if neighboring 300 still works... no -- it doesn't. For the data without samples 301 .forward can be easily adjusted by using masknonzero instead of 302 plain mask, but for data with samplesI don't see a clean way... 303 see forward() above... there is no testcase for order preservation 304 for DIM+1 case 305 """ 306 if sort: 307 outIds.sort() 308 elif __debug__: 309 # per short conversation with Michael -- we should not 310 # allow reordering since we saw no viable use case for 311 # it. Thus -- warn user is outIds are not in sorted order 312 # and no sorting was requested may be due to performance 313 # considerations 314 if not isSorted(outIds): 315 warning("IDs for selectOut must be provided " + 316 "in sorted order, otherwise .forward() would fail"+ 317 " on the data with multiple samples") 318 319 # adjust mask and forwardmap 320 discarded = N.array([ True ] * self.nfeatures) 321 discarded[outIds] = False # create a map of discarded Ids 322 discardedin = tuple(self.getInId(discarded)) 323 self.__mask[discardedin] = False 324 325 self.__masknonzerosize = len(outIds) 326 self.__masknonzero = [ x[outIds] for x in self.__masknonzero ] 327 328 # adjust/remap not discarded in forwardmap 329 # since we merged _tent/maskmapper-init-noloop it is not necessary 330 # to zero-out discarded entries since we anyway would check with mask 331 # in getOutId(s) 332 self.__forwardmap[self.__masknonzero] = \ 333 N.arange(self.__masknonzerosize)
334 335
336 - def discardOut(self, outIds):
337 """Listed outIds would be discarded 338 339 """ 340 341 # adjust mask and forwardmap 342 discardedin = tuple(self.getInId(outIds)) 343 self.__mask[discardedin] = False 344 # since we merged _tent/maskmapper-init-noloop it is not necessary 345 # to zero-out discarded entries since we anyway would check with mask 346 # in getOutId(s) 347 # self.__forwardmap[discardedin] = 0 348 349 self.__masknonzerosize -= len(outIds) 350 self.__masknonzero = [ N.delete(x, outIds) 351 for x in self.__masknonzero ] 352 353 # adjust/remap not discarded in forwardmap 354 self.__forwardmap[self.__masknonzero] = \ 355 N.arange(self.__masknonzerosize)
356 357 # OPT: we can adjust __forwardmap only for ids which are higher than 358 # the smallest outId among discarded. Similar strategy could be done 359 # for selectOut but such index has to be figured out first there 360 # .... 361 362
363 - def getNeighborIn(self, inId, radius=0):
364 """Return the list of coordinates for the neighbors. 365 XXX See TODO below: what to return -- list of arrays or list of tuples? 366 """ 367 mask = self.mask 368 maskshape = mask.shape 369 # TODO Check dimensionality of inId 370 for neighbor in self.metric.getNeighbor(inId, radius): 371 tneighbor = tuple(neighbor) 372 if ( isInVolume(neighbor, maskshape) and 373 self.mask[tneighbor] != 0 ): 374 yield neighbor
375 376
377 - def getNeighbor(self, outId, radius=0):
378 """Return the list of Ids for the neighbors. 379 380 Returns a list of outIds 381 """ 382 # TODO Check dimensionality of outId 383 inId = self.getInId(outId) 384 for inId in self.getNeighborIn(inId, radius): 385 yield self.getOutId(inId)
386 387 388 # comment out for now... introduce when needed 389 # def getInEmpty(self): 390 # """Returns empty instance of input object""" 391 # raise NotImplementedError 392 # 393 # 394 # def getOutEmpty(self): 395 # """Returns empty instance of output object""" 396 # raise NotImplementedError 397 398
399 - def convertOutIds2OutMask(self, outIds):
400 """Returns a boolean mask with all features in `outIds` selected. 401 402 :Parameters: 403 outIds: list or 1d array 404 To be selected features ids in out-space. 405 406 :Returns: 407 ndarray: dtype='bool' 408 All selected features are set to True; False otherwise. 409 """ 410 fmask = N.repeat(False, self.nfeatures) 411 fmask[outIds] = True 412 413 return fmask
414 415
416 - def convertOutIds2InMask(self, outIds):
417 """Returns a boolean mask with all features in `ouIds` selected. 418 419 This method works exactly like Mapper.convertOutIds2OutMask(), but the 420 feature mask is finally (reverse) mapped into in-space. 421 422 :Parameters: 423 outIds: list or 1d array 424 To be selected features ids in out-space. 425 426 :Returns: 427 ndarray: dtype='bool' 428 All selected features are set to True; False otherwise. 429 """ 430 return self.reverse(self.convertOutIds2OutMask(outIds))
431 432 433 # Read-only props 434 # TODO: refactor the property names? make them vproperty? 435 dsshape = property(fget=getInShape) 436 mask = property(fget=lambda self:self.getMask(False))
437 438 439 # TODO Unify tuple/array conversion of coordinates. tuples are needed 440 # for easy reference, arrays are needed when doing computation on 441 # coordinates: for some reason numpy doesn't handle casting into 442 # array from tuples while performing arithm operations... 443 444 # helper functions which might be absorbed later on by some module or a class 445 446 447
448 -def isInVolume(coord, shape):
449 """For given coord check if it is within a specified volume size. 450 451 Returns True/False. Assumes that volume coordinates start at 0. 452 No more generalization (arbitrary minimal coord) is done to save 453 on performance 454 455 XXX: should move somewhere else. 456 """ 457 for i in xrange(len(coord)): 458 if coord[i] < 0 or coord[i] >= shape[i]: 459 return False 460 return True
461