1
2
3
4
5
6
7
8
9 """Data mapper"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from mvpa.mappers.metric import Metric
16
17 from mvpa.misc.vproperty import VProperty
18 from mvpa.base.dochelpers import enhancedDocString
19
20 if __debug__:
21 from mvpa.base import warning
22 from mvpa.base import debug
23
24
26 """Interface to provide mapping between two spaces: IN and OUT.
27 Methods are prefixed correspondingly. forward/reverse operate
28 on the entire dataset. get(In|Out)Id[s] operate per element::
29
30 forward
31 --------->
32 IN OUT
33 <--------/
34 reverse
35 """
37 """
38 :Parameters:
39 metric : Metric
40 Optional metric
41 """
42 self.__metric = None
43 """Pylint happiness"""
44 self.setMetric(metric)
45 """Actually assign the metric"""
46
47
48
49
50
51
52
53
55 """Map data from the IN dataspace into OUT space.
56 """
57 raise NotImplementedError
58
59
61 """Reverse map data from OUT space into the IN space.
62 """
63 raise NotImplementedError
64
65
67 """Returns the size of the entity in input space"""
68 raise NotImplementedError
69
70
72 """Returns the size of the entity in output space"""
73 raise NotImplementedError
74
75
77 """Limit the OUT space to a certain set of features.
78
79 :Parameters:
80 outIds: sequence
81 Subset of ids of the current feature in OUT space to keep.
82 """
83 raise NotImplementedError
84
85
87 """Translate a feature id into a coordinate/index in input space.
88
89 Such a translation might not be meaningful or even possible for a
90 particular mapping algorithm and therefore cannot be relied upon.
91 """
92 raise NotImplementedError
93
94
95
96
97
98
100 """Validate feature id in OUT space.
101
102 Override if OUT space is not simly a 1D vector
103 """
104 return(outId >= 0 and outId < self.getOutSize())
105
106
108 """Validate id in IN space.
109
110 Override if IN space is not simly a 1D vector
111 """
112 return(inId >= 0 and inId < self.getInSize())
113
114
115 - def train(self, dataset):
116 """Perform training of the mapper.
117
118 This method is called to put the mapper in a state that allows it to
119 perform to intended mapping.
120
121 :Parameter:
122 dataset: Dataset or subclass
123
124 .. note::
125 The default behavior of this method is to do nothing.
126 """
127 pass
128
129
131 """Get feature neighbors in input space, given an id in output space.
132
133 This method has to be reimplemented whenever a derived class does not
134 provide an implementation for :meth:`~mvpa.mappers.base.Mapper.getInId`.
135 """
136 if self.metric is None:
137 raise RuntimeError, "No metric was assigned to %s, thus no " \
138 "neighboring information is present" % self
139
140 if self.isValidOutId(outId):
141 inId = self.getInId(outId)
142 for inId in self.getNeighborIn(inId, *args, **kwargs):
143 yield self.getOutId(inId)
144
145
146
147
148
149
151 """Return the list of coordinates for the neighbors.
152
153 :Parameters:
154 inId
155 id (index) of an element in input dataspace.
156 *args, **kwargs
157 Any additional arguments are passed to the embedded metric of the
158 mapper.
159
160 XXX See TODO below: what to return -- list of arrays or list
161 of tuples?
162 """
163 if self.metric is None:
164 raise RuntimeError, "No metric was assigned to %s, thus no " \
165 "neighboring information is present" % self
166
167 isValidInId = self.isValidInId
168 if isValidInId(inId):
169 for neighbor in self.metric.getNeighbor(inId, *args, **kwargs):
170 if isValidInId(neighbor):
171 yield neighbor
172
173
175 """Return the list of coordinates for the neighbors.
176
177 By default it simply constructs the list based on
178 the generator returned by getNeighbor()
179 """
180 return [ x for x in self.getNeighbor(outId, *args, **kwargs) ]
181
182
184 if self.__metric is not None:
185 s = "metric=%s" % repr(self.__metric)
186 else:
187 s = ''
188 return "%s(%s)" % (self.__class__.__name__, s)
189
190
192 """Calls the mappers forward() method.
193 """
194 return self.forward(data)
195
196
198 """To make pylint happy"""
199 return self.__metric
200
201
203 """To make pylint happy"""
204 if metric is not None and not isinstance(metric, Metric):
205 raise ValueError, "metric for Mapper must be an " \
206 "instance of a Metric class . Got %s" \
207 % `type(metric)`
208 self.__metric = metric
209
210
211 metric = property(fget=getMetric, fset=setMetric)
212 nfeatures = VProperty(fget=getOutSize)
213
214
215
217 """Mapper using a projection matrix to transform the data.
218
219 This class cannot be used directly. Sub-classes have to implement
220 the `_train()` method, which has to compute the projection matrix
221 given a dataset (see `_train()` docstring for more information).
222
223 Once the projection matrix is available, this class provides
224 functionality to perform forward and backwards mapping of data, the
225 latter using the hermitian (conjugate) transpose of the projection
226 matrix. Additionally, `ProjectionMapper` supports optional (but done
227 by default) demeaning of the data and selection of arbitrary
228 component (i.e. columns of the projection matrix) of the projection.
229
230 Forward and back-projection matrices (a.k.a. *projection* and
231 *reconstruction*) are available via the `proj` and `recon`
232 properties. the latter only after it has been computed (after first
233 call to `reverse`).
234 """
235
236 - def __init__(self, selector=None, demean=True):
237 """Initialize the ProjectionMapper
238
239 :Parameters:
240 selector: None | list
241 Which components (i.e. columns of the projection matrix)
242 should be used for mapping. If `selector` is `None` all
243 components are used. If a list is provided, all list
244 elements are treated as component ids and the respective
245 components are selected (all others are discarded).
246 demean: bool
247 Either data should be demeaned while computing
248 projections and applied back while doing reverse()
249
250 """
251 Mapper.__init__(self)
252
253 self._selector = selector
254 self._proj = None
255 """Forward projection matrix."""
256 self._recon = None
257 """Reverse projection (reconstruction) matrix."""
258 self._demean = demean
259 """Flag whether to demean the to be projected data, prior to projection.
260 """
261 self._mean = None
262 """Data mean"""
263 self._mean_out = None
264 """Forward projected data mean."""
265
266 __doc__ = enhancedDocString('ProjectionMapper', locals(), Mapper)
267
268
269 - def train(self, dataset):
270 """Determine the projection matrix."""
271
272 self._mean = dataset.samples.mean(axis=0)
273
274 self._train(dataset)
275
276
277 if self._selector is not None:
278 self.selectOut(self._selector)
279
280
282 """Worker method. Needs to be implemented by subclass.
283
284 This method has to train the mapper and store the resulting
285 transformation matrix in `self._proj`.
286 """
287 raise NotImplementedError
288
289
290 - def forward(self, data, demean=None):
291 """Perform forward projection.
292
293 :Parameters:
294 data: ndarray
295 Data array to map
296 demean: boolean | None
297 Override demean setting for this method call.
298
299 :Returns:
300 NumPy array
301 """
302
303 if demean is None:
304 demean = self._demean
305
306 if self._proj is None:
307 raise RuntimeError, "Mapper needs to be train before used."
308 if demean and self._mean is not None:
309 return ((N.asmatrix(data) - self._mean) * self._proj).A
310 else:
311 return (N.asmatrix(data) * self._proj).A
312
313
315 """Reproject (reconstruct) data into the original feature space.
316
317 :Returns:
318 NumPy array
319 """
320 if self._proj is None:
321 raise RuntimeError, "Mapper needs to be trained before used."
322
323
324 if self._demean and self._mean_out is None:
325
326 self._mean_out = self.forward(self._mean, demean=False)
327 if __debug__:
328 debug("MAP_",
329 "Mean of data in input space %s became %s in " \
330 "outspace" % (self._mean, self._mean_out))
331
332
333
334 if self._recon is None:
335 self._recon = self._proj.H
336
337 if self._demean:
338 return ((N.asmatrix(data) + self._mean_out) * self._recon).A
339 else:
340 return ((N.asmatrix(data)) * self._recon).A
341
342
344 """Returns the number of original features."""
345 return self._proj.shape[0]
346
347
349 """Returns the number of components to project on."""
350 return self._proj.shape[1]
351
352
354 """Choose a subset of components (and remove all others)."""
355 self._proj = self._proj[:, outIds]
356
357 self._recon = None
358 self._mean_out = None
359
360
361 proj = property(fget=lambda self: self._proj, doc="Projection matrix")
362 recon = property(fget=lambda self: self._recon, doc="Backprojection matrix")
363
364
365
367 """Meta mapper that combines several embedded mappers.
368
369 This mapper can be used the map from several input dataspaces into a common
370 output dataspace. When :meth:`~mvpa.mappers.base.CombinedMapper.forward`
371 is called with a sequence of data, each element in that sequence is passed
372 to the corresponding mapper, which in turned forward-maps the data. The
373 output of all mappers is finally stacked (horizontally or column or
374 feature-wise) into a single large 2D matrix (nsamples x nfeatures).
375
376 .. note::
377 This mapper can only embbed mappers that transform data into a 2D
378 (nsamples x nfeatures) representation. For mappers not supporting this
379 transformation, consider wrapping them in a
380 :class:`~mvpa.mappers.base.ChainMapper` with an appropriate
381 post-processing mapper.
382
383 CombinedMapper fully supports forward and backward mapping, training,
384 runtime selection of a feature subset (in output dataspace) and retrieval
385 of neighborhood information.
386 """
388 """
389 :Parameters:
390 mappers: list of Mapper instances
391 The order of the mappers in the list is important, as it will define
392 the order in which data snippets have to be passed to
393 :meth:`~mvpa.mappers.base.CombinedMapper.forward`.
394 **kwargs
395 All additional arguments are passed to the base-class constructor.
396 """
397 Mapper.__init__(self, **kwargs)
398
399 if not len(mappers):
400 raise ValueError, \
401 'CombinedMapper needs at least one embedded mapper.'
402
403 self._mappers = mappers
404
405
407 """Map data from the IN spaces into to common OUT space.
408
409 :Parameter:
410 data: sequence
411 Each element in the `data` sequence is passed to the corresponding
412 embedded mapper and is mapped individually by it. The number of
413 elements in `data` has to match the number of embedded mappers. Each
414 element is `data` has to provide the same number of samples
415 (first dimension).
416
417 :Returns:
418 array: nsamples x nfeatures
419 Horizontally stacked array of all embedded mapper outputs.
420 """
421 if not len(data) == len(self._mappers):
422 raise ValueError, \
423 "CombinedMapper needs a sequence with data for each " \
424 "Mapper"
425
426
427
428 try:
429 return N.hstack(
430 [self._mappers[i].forward(d) for i, d in enumerate(data)])
431 except ValueError:
432 raise ValueError, \
433 "Embedded mappers do not generate same number of samples. " \
434 "Check input data."
435
436
438 """Reverse map data from OUT space into the IN spaces.
439
440 :Parameter:
441 data: array
442 Single data array to be reverse mapped into a sequence of data
443 snippets in their individual IN spaces.
444
445 :Returns:
446 list
447 """
448
449
450
451 data = N.asanyarray(data).T
452
453 if not len(data) == self.getOutSize():
454 raise ValueError, \
455 "Data shape does match mapper reverse mapping properties."
456
457 result = []
458 fsum = 0
459 for m in self._mappers:
460
461 fsum_new = fsum + m.getOutSize()
462
463 result.append(m.reverse(data[fsum:fsum_new].T))
464
465 fsum = fsum_new
466
467 return result
468
469
470 - def train(self, dataset):
471 """Trains all embedded mappers.
472
473 The provided training dataset is splitted appropriately and the
474 corresponding pieces are passed to the
475 :meth:`~mvpa.mappers.base.Mapper.train` method of each embedded mapper.
476
477 :Parameter:
478 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
479 A dataset with the number of features matching the `outSize` of the
480 `CombinedMapper`.
481 """
482 if dataset.nfeatures != self.getOutSize():
483 raise ValueError, "Training dataset does not match the mapper " \
484 "properties."
485
486 fsum = 0
487 for m in self._mappers:
488
489 fsum_new = fsum + m.getOutSize()
490 m.train(dataset.selectFeatures(range(fsum, fsum_new)))
491 fsum = fsum_new
492
493
495 """Returns the size of the entity in input space"""
496 return N.sum(m.getInSize() for m in self._mappers)
497
498
500 """Returns the size of the entity in output space"""
501 return N.sum(m.getOutSize() for m in self._mappers)
502
503
505 """Remove some elements and leave only ids in 'out'/feature space.
506
507 .. note::
508 The subset selection is done inplace
509
510 :Parameter:
511 outIds: sequence
512 All output feature ids to be selected/kept.
513 """
514
515
516 ids = N.asanyarray(outIds)
517 fsum = 0
518 for m in self._mappers:
519
520 selector = N.logical_and(ids < fsum + m.getOutSize(), ids >= fsum)
521
522 selected = ids[selector] - fsum
523 fsum += m.getOutSize()
524
525 m.selectOut(selected)
526
527
529 """Get the ids of the neighbors of a single feature in output dataspace.
530
531 :Parameters:
532 outId: int
533 Single id of a feature in output space, whos neighbors should be
534 determined.
535 *args, **kwargs
536 Additional arguments are passed to the metric of the embedded
537 mapper, that is responsible for the corresponding feature.
538
539 Returns a list of outIds
540 """
541 fsum = 0
542 for m in self._mappers:
543 fsum_new = fsum + m.getOutSize()
544 if outId >= fsum and outId < fsum_new:
545 return m.getNeighbor(outId - fsum, *args, **kwargs)
546 fsum = fsum_new
547
548 raise ValueError, "Invalid outId passed to CombinedMapper.getNeighbor()"
549
550
551
553 """Meta mapper that embedded a chain of other mappers.
554
555 Each mapper in the chain is called successively to perform forward or
556 reverse mapping.
557
558 .. note::
559
560 In its current implementation the `ChainMapper` treats all but the last
561 mapper as simple pre-processing (in forward()) or post-processing (in
562 reverse()) steps. All other capabilities, e.g. training and neighbor
563 metrics are provided by or affect *only the last mapper in the chain*.
564
565 With respect to neighbor metrics this means that they are determined
566 based on the input space of the *last mapper* in the chain and *not* on
567 the input dataspace of the `ChainMapper` as a whole
568 """
570 """
571 :Parameters:
572 mappers: list of Mapper instances
573 **kwargs
574 All additional arguments are passed to the base-class constructor.
575 """
576 Mapper.__init__(self, **kwargs)
577
578 if not len(mappers):
579 raise ValueError, 'ChainMapper needs at least one embedded mapper.'
580
581 self._mappers = mappers
582
583
585 """Calls all mappers in the chain successively.
586
587 :Parameter:
588 data
589 data to be chain-mapped.
590 """
591 mp = data
592 for m in self._mappers:
593 mp = m.forward(mp)
594
595 return mp
596
597
599 """Calls all mappers in the chain successively, in reversed order.
600
601 :Parameter:
602 data: array
603 data array to be reverse mapped into the orginal dataspace.
604 """
605 mp = data
606 for m in reversed(self._mappers):
607 mp = m.reverse(mp)
608
609 return mp
610
611
612 - def train(self, dataset):
613 """Trains the *last* mapper in the chain.
614
615 :Parameter:
616 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass
617 A dataset with the number of features matching the `outSize` of the
618 last mapper in the chain (which is identical to the one of the
619 `ChainMapper` itself).
620 """
621 if dataset.nfeatures != self.getOutSize():
622 raise ValueError, "Training dataset does not match the mapper " \
623 "properties."
624
625 self._mappers[-1].train(dataset)
626
627
629 """Returns the size of the entity in input space"""
630 return self._mappers[0].getInSize()
631
632
634 """Returns the size of the entity in output space"""
635 return self._mappers[-1].getOutSize()
636
637
639 """Remove some elements from the *last* mapper in the chain.
640
641 :Parameter:
642 outIds: sequence
643 All output feature ids to be selected/kept.
644 """
645 self._mappers[-1].selectOut(outIds)
646
647
649 """Get the ids of the neighbors of a single feature in output dataspace.
650
651 .. note::
652
653 The neighbors are determined based on the input space of the *last
654 mapper* in the chain and *not* on the input dataspace of the
655 `ChainMapper` as a whole!
656
657 :Parameters:
658 outId: int
659 Single id of a feature in output space, whos neighbors should be
660 determined.
661 *args, **kwargs
662 Additional arguments are passed to the metric of the embedded
663 mapper, that is responsible for the corresponding feature.
664
665 Returns a list of outIds
666 """
667 return self._mappers[-1].getNeighbor(outId, *args, **kwargs)
668