1
2
3
4
5
6
7
8
9 """Data mapper"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14 from operator import isSequenceType
15
16 from mvpa.mappers.metric import MetricMapper
17 from mvpa.datasets.metric import DescreteMetric, cartesianDistance
18 from mvpa.base.dochelpers import enhancedDocString
19
20 if __debug__:
21 from mvpa.misc import warning
22 from mvpa.misc.support import isSorted
23
24
25
27 """Mapper which uses a binary mask to select "Features" """
28
31 """Initialize MaskMapper
32
33 :Parameters:
34 mask : array
35 an array in the original dataspace and its nonzero elements are
36 used to define the features included in the dataset
37 metric : Metric
38 Corresponding metric for the space. No attempt is made to
39 determine whether a certain metric is reasonable for this
40 mapper. If `metric` is None -- `DescreteMetric`
41 is constructed that assumes an equal (1) spacing of all mask
42 elements with a `distance_function` given as a parameter listed
43 below.
44 distance_function : functor
45 Distance function to use as the parameter to
46 `DescreteMetric` if `metric` is not specified,
47 elementsize : list or scalar
48 Determines spacing within `DescreteMetric`. If it is given as a
49 scalar, corresponding value is assigned to all dimensions, which
50 are found within `mask`
51
52 :Note: parameters `elementsize` and `distance_function` are relevant
53 only if `metric` is None
54 """
55 if metric == None:
56 if elementsize is None:
57 elementsize = [1]*len(mask.shape)
58 else:
59 if isSequenceType(elementsize):
60 if len(elementsize) != len(mask.shape):
61 raise ValueError, \
62 "Number of elements in elementsize [%d]" % \
63 len(elementsize) + " doesn't match shape " + \
64 "of the mask [%s]" % (`mask.shape`)
65 else:
66 elementsize = [ elementsize ] * len(mask.shape)
67 metric = DescreteMetric(elementsize=[1]*len(mask.shape),
68 distance_function=distance_function)
69
70 MetricMapper.__init__(self, metric)
71
72
73 self.__mask = self.__maskdim = self.__masksize = \
74 self.__masknonzerosize = self.__forwardmap = \
75 self.__masknonzero = None
76 self._initMask(mask)
77
78
79 __doc__ = enhancedDocString('MaskMapper', locals(), MetricMapper)
80
81
83 return "MaskMapper: %d -> %d" \
84 % (self.__masksize, self.__masknonzerosize)
85
86
88
89 if memo is None:
90 memo = {}
91 from copy import deepcopy
92
93
94
95 out = MaskMapper.__new__(MaskMapper)
96 MetricMapper.__init__(out, self.metric)
97 out.__mask = self.__mask.copy()
98 out.__maskdim = self.__maskdim
99 out.__masksize = self.__masksize
100 out.__masknonzero = deepcopy(self.__masknonzero)
101 out.__masknonzerosize = self.__masknonzerosize
102 out.__forwardmap = self.__forwardmap.copy()
103
104 return out
105
106
108 """Initialize internal state with mask-derived information
109
110 It is needed to initialize structures for the fast
111 and reverse lookup to don't impose performance hit on any
112 future operation
113 """
114
115
116
117 self.__mask = (mask != 0)
118 self.__maskdim = len(mask.shape)
119 self.__masksize = N.prod(mask.shape)
120
121
122
123
124 self.__masknonzero = mask.nonzero()
125 self.__masknonzerosize = len(self.__masknonzero[0])
126
127
128
129
130
131
132
133
134
135
136 self.__forwardmap = N.zeros(mask.shape, dtype=N.int64)
137
138
139
140 self.__forwardmap[self.__masknonzero] = \
141 N.arange(self.__masknonzerosize)
142
143
145 """Map data from the original dataspace into featurespace.
146 """
147 datadim = len(data.shape)
148 datashape = data.shape[(-1)*self.__maskdim:]
149 if not datashape == self.__mask.shape:
150 raise ValueError, \
151 "The shape of data to be mapped %s " % `datashape` \
152 + " does not match the mapper's mask shape %s" \
153 % `self.__mask.shape`
154
155 if self.__maskdim == datadim:
156
157
158
159 return data[ self.__mask ]
160 elif self.__maskdim+1 == datadim:
161
162
163
164 return data[ :, self.__mask ]
165 else:
166 raise ValueError, \
167 "Shape of the to be mapped data, does not match the " \
168 "mapper mask. Only one (optional) additional dimension " \
169 "exceeding the mask shape is supported."
170
171
173 """Reverse map data from featurespace into the original dataspace.
174 """
175 datadim = len(data.shape)
176 if not datadim in [1, 2]:
177 raise ValueError, \
178 "Only 2d or 1d data can be reverse mapped."
179
180 if datadim == 1:
181 mapped = N.zeros(self.__mask.shape, dtype=data.dtype)
182 mapped[self.__mask] = data
183 elif datadim == 2:
184 mapped = N.zeros(data.shape[:1] + self.__mask.shape,
185 dtype=data.dtype)
186 mapped[:, self.__mask] = data
187
188 return mapped
189
190
192 """InShape is a shape of original mask"""
193 return self.__mask.shape
194
195
197 """InShape is a shape of original mask"""
198 return self.__masksize
199
200
202 """OutShape is a shape of target dataset"""
203
204
205
206
207
208 raise NotImplementedError
209
210
212 """OutSize is a number of non-0 elements in the mask"""
213 return self.__masknonzerosize
214
215
217 """By default returns a copy of the current mask.
218
219 If 'copy' is set to False a reference to the mask is returned instead.
220 This shared mask must not be modified!
221 """
222 if copy:
223 return self.__mask.copy()
224 else:
225 return self.__mask
226
227
229 """Returns a features coordinate in the original data space
230 for a given feature id.
231
232 If this method is called with a list of feature ids it returns a
233 2d-array where the first axis corresponds the dimensions in 'In'
234 dataspace and along the second axis are the coordinates of the features
235 on this dimension (like the output of NumPy.array.nonzero()).
236
237 XXX it might become __get_item__ access method
238
239 """
240
241
242 return N.array([self.__masknonzero[i][outId]
243 for i in xrange(self.__maskdim)])
244
245
247 """Returns a 2d array where each row contains the coordinate of the
248 feature with the corresponding id.
249 """
250 return N.transpose(self.__masknonzero)
251
252
254 """Translate a feature mask coordinate into a feature ID.
255 """
256
257
258
259 try:
260 tcoord = tuple(coord)
261 if self.__mask[tcoord] == 0:
262 raise ValueError, \
263 "The point %s didn't belong to the mask" % (`coord`)
264 return self.__forwardmap[tcoord]
265 except TypeError:
266 raise ValueError, \
267 "Coordinates %s are of incorrect dimension. " % `coord` + \
268 "The mask has %d dimensions." % self.__maskdim
269 except IndexError:
270 raise ValueError, \
271 "Coordinates %s are out of mask boundary. " % `coord` + \
272 "The mask is of %s shape." % `self.__mask.shape`
273
274
276 """Only listed outIds would remain.
277
278 The function used to accept a matrix-mask as the input but now
279 it really has to be a list of IDs
280
281 Function assumes that outIds are sorted. If not - please set
282 sort to True. While in __debug__ mode selectOut would check if
283 obtained IDs are sorted and would warn the user if they are
284 not.
285
286 If you feel strongly that you need to remap features
287 internally (ie to allow Ids with mixed order) please contact
288 developers of mvpa to discuss your use case.
289
290 See `tests.test_maskmapper.testSelectOrder` for basic testing
291
292 Feature/Bug:
293 * Negative outIds would not raise exception - just would be
294 treated 'from the tail'
295
296 Older comments on 'order' - might be useful in future if
297 reordering gets ressurrected
298 Order will be taken into account -- ie items will be
299 remapped if order was changed... need to check if neighboring
300 still works... no -- it doesn't. For the data without samples
301 .forward can be easily adjusted by using masknonzero instead of
302 plain mask, but for data with samplesI don't see a clean way...
303 see forward() above... there is no testcase for order preservation
304 for DIM+1 case
305 """
306 if sort:
307 outIds.sort()
308 elif __debug__:
309
310
311
312
313
314 if not isSorted(outIds):
315 warning("IDs for selectOut must be provided " +
316 "in sorted order, otherwise .forward() would fail"+
317 " on the data with multiple samples")
318
319
320 discarded = N.array([ True ] * self.nfeatures)
321 discarded[outIds] = False
322 discardedin = tuple(self.getInId(discarded))
323 self.__mask[discardedin] = False
324
325 self.__masknonzerosize = len(outIds)
326 self.__masknonzero = [ x[outIds] for x in self.__masknonzero ]
327
328
329
330
331
332 self.__forwardmap[self.__masknonzero] = \
333 N.arange(self.__masknonzerosize)
334
335
337 """Listed outIds would be discarded
338
339 """
340
341
342 discardedin = tuple(self.getInId(outIds))
343 self.__mask[discardedin] = False
344
345
346
347
348
349 self.__masknonzerosize -= len(outIds)
350 self.__masknonzero = [ N.delete(x, outIds)
351 for x in self.__masknonzero ]
352
353
354 self.__forwardmap[self.__masknonzero] = \
355 N.arange(self.__masknonzerosize)
356
357
358
359
360
361
362
364 """Return the list of coordinates for the neighbors.
365 XXX See TODO below: what to return -- list of arrays or list of tuples?
366 """
367 mask = self.mask
368 maskshape = mask.shape
369
370 for neighbor in self.metric.getNeighbor(inId, radius):
371 tneighbor = tuple(neighbor)
372 if ( isInVolume(neighbor, maskshape) and
373 self.mask[tneighbor] != 0 ):
374 yield neighbor
375
376
378 """Return the list of Ids for the neighbors.
379
380 Returns a list of outIds
381 """
382
383 inId = self.getInId(outId)
384 for inId in self.getNeighborIn(inId, radius):
385 yield self.getOutId(inId)
386
387
388
389
390
391
392
393
394
395
396
397
398
400 """Returns a boolean mask with all features in `outIds` selected.
401
402 :Parameters:
403 outIds: list or 1d array
404 To be selected features ids in out-space.
405
406 :Returns:
407 ndarray: dtype='bool'
408 All selected features are set to True; False otherwise.
409 """
410 fmask = N.repeat(False, self.nfeatures)
411 fmask[outIds] = True
412
413 return fmask
414
415
417 """Returns a boolean mask with all features in `ouIds` selected.
418
419 This method works exactly like Mapper.convertOutIds2OutMask(), but the
420 feature mask is finally (reverse) mapped into in-space.
421
422 :Parameters:
423 outIds: list or 1d array
424 To be selected features ids in out-space.
425
426 :Returns:
427 ndarray: dtype='bool'
428 All selected features are set to True; False otherwise.
429 """
430 return self.reverse(self.convertOutIds2OutMask(outIds))
431
432
433
434
435 dsshape = property(fget=getInShape)
436 mask = property(fget=lambda self:self.getMask(False))
437
438
439
440
441
442
443
444
445
446
447
449 """For given coord check if it is within a specified volume size.
450
451 Returns True/False. Assumes that volume coordinates start at 0.
452 No more generalization (arbitrary minimal coord) is done to save
453 on performance
454
455 XXX: should move somewhere else.
456 """
457 for i in xrange(len(coord)):
458 if coord[i] < 0 or coord[i] >= shape[i]:
459 return False
460 return True
461