Package mvpa :: Package mappers :: Module base
[hide private]
[frames] | no frames]

Source Code for Module mvpa.mappers.base

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Data mapper""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from mvpa.mappers.metric import Metric 
 16   
 17  from mvpa.misc.vproperty import VProperty 
 18  from mvpa.base.dochelpers import enhancedDocString 
 19   
 20  if __debug__: 
 21      from mvpa.base import warning 
 22      from mvpa.base import debug 
 23   
 24   
25 -class Mapper(object):
26 """Interface to provide mapping between two spaces: IN and OUT. 27 Methods are prefixed correspondingly. forward/reverse operate 28 on the entire dataset. get(In|Out)Id[s] operate per element:: 29 30 forward 31 ---------> 32 IN OUT 33 <--------/ 34 reverse 35 """
36 - def __init__(self, metric=None):
37 """ 38 :Parameters: 39 metric : Metric 40 Optional metric 41 """ 42 self.__metric = None 43 """Pylint happiness""" 44 self.setMetric(metric) 45 """Actually assign the metric"""
46 47 # 48 # The following methods are abstract and merely define the intended 49 # interface of a mapper and have to be implemented in derived classes. See 50 # the docstrings of the respective methods for details about what they 51 # should do. 52 # 53
54 - def forward(self, data):
55 """Map data from the IN dataspace into OUT space. 56 """ 57 raise NotImplementedError
58 59
60 - def reverse(self, data):
61 """Reverse map data from OUT space into the IN space. 62 """ 63 raise NotImplementedError
64 65
66 - def getInSize(self):
67 """Returns the size of the entity in input space""" 68 raise NotImplementedError
69 70
71 - def getOutSize(self):
72 """Returns the size of the entity in output space""" 73 raise NotImplementedError
74 75
76 - def selectOut(self, outIds):
77 """Limit the OUT space to a certain set of features. 78 79 :Parameters: 80 outIds: sequence 81 Subset of ids of the current feature in OUT space to keep. 82 """ 83 raise NotImplementedError
84 85
86 - def getInId(self, outId):
87 """Translate a feature id into a coordinate/index in input space. 88 89 Such a translation might not be meaningful or even possible for a 90 particular mapping algorithm and therefore cannot be relied upon. 91 """ 92 raise NotImplementedError
93 94 95 # 96 # The following methods are candidates for reimplementation in derived 97 # classes, in cases where the provided default behavior is not appropriate. 98 #
99 - def isValidOutId(self, outId):
100 """Validate feature id in OUT space. 101 102 Override if OUT space is not simly a 1D vector 103 """ 104 return(outId >= 0 and outId < self.getOutSize())
105 106
107 - def isValidInId(self, inId):
108 """Validate id in IN space. 109 110 Override if IN space is not simly a 1D vector 111 """ 112 return(inId >= 0 and inId < self.getInSize())
113 114
115 - def train(self, dataset):
116 """Perform training of the mapper. 117 118 This method is called to put the mapper in a state that allows it to 119 perform to intended mapping. 120 121 :Parameter: 122 dataset: Dataset or subclass 123 124 .. note:: 125 The default behavior of this method is to do nothing. 126 """ 127 pass
128 129
130 - def getNeighbor(self, outId, *args, **kwargs):
131 """Get feature neighbors in input space, given an id in output space. 132 133 This method has to be reimplemented whenever a derived class does not 134 provide an implementation for :meth:`~mvpa.mappers.base.Mapper.getInId`. 135 """ 136 if self.metric is None: 137 raise RuntimeError, "No metric was assigned to %s, thus no " \ 138 "neighboring information is present" % self 139 140 if self.isValidOutId(outId): 141 inId = self.getInId(outId) 142 for inId in self.getNeighborIn(inId, *args, **kwargs): 143 yield self.getOutId(inId)
144 145 146 # 147 # The following methods provide common functionality for all mappers 148 # and there should be no immediate need to reimplement them 149 #
150 - def getNeighborIn(self, inId, *args, **kwargs):
151 """Return the list of coordinates for the neighbors. 152 153 :Parameters: 154 inId 155 id (index) of an element in input dataspace. 156 *args, **kwargs 157 Any additional arguments are passed to the embedded metric of the 158 mapper. 159 160 XXX See TODO below: what to return -- list of arrays or list 161 of tuples? 162 """ 163 if self.metric is None: 164 raise RuntimeError, "No metric was assigned to %s, thus no " \ 165 "neighboring information is present" % self 166 167 isValidInId = self.isValidInId 168 if isValidInId(inId): 169 for neighbor in self.metric.getNeighbor(inId, *args, **kwargs): 170 if isValidInId(neighbor): 171 yield neighbor
172 173
174 - def getNeighbors(self, outId, *args, **kwargs):
175 """Return the list of coordinates for the neighbors. 176 177 By default it simply constructs the list based on 178 the generator returned by getNeighbor() 179 """ 180 return [ x for x in self.getNeighbor(outId, *args, **kwargs) ]
181 182
183 - def __repr__(self):
184 if self.__metric is not None: 185 s = "metric=%s" % repr(self.__metric) 186 else: 187 s = '' 188 return "%s(%s)" % (self.__class__.__name__, s)
189 190
191 - def __call__(self, data):
192 """Calls the mappers forward() method. 193 """ 194 return self.forward(data)
195 196
197 - def getMetric(self):
198 """To make pylint happy""" 199 return self.__metric
200 201
202 - def setMetric(self, metric):
203 """To make pylint happy""" 204 if metric is not None and not isinstance(metric, Metric): 205 raise ValueError, "metric for Mapper must be an " \ 206 "instance of a Metric class . Got %s" \ 207 % `type(metric)` 208 self.__metric = metric
209 210 211 metric = property(fget=getMetric, fset=setMetric) 212 nfeatures = VProperty(fget=getOutSize)
213 214 215
216 -class ProjectionMapper(Mapper):
217 """Mapper using a projection matrix to transform the data. 218 219 This class cannot be used directly. Sub-classes have to implement 220 the `_train()` method, which has to compute the projection matrix 221 given a dataset (see `_train()` docstring for more information). 222 223 Once the projection matrix is available, this class provides 224 functionality to perform forward and backwards mapping of data, the 225 latter using the hermitian (conjugate) transpose of the projection 226 matrix. Additionally, `ProjectionMapper` supports optional (but done 227 by default) demeaning of the data and selection of arbitrary 228 component (i.e. columns of the projection matrix) of the projection. 229 230 Forward and back-projection matrices (a.k.a. *projection* and 231 *reconstruction*) are available via the `proj` and `recon` 232 properties. the latter only after it has been computed (after first 233 call to `reverse`). 234 """ 235
236 - def __init__(self, selector=None, demean=True):
237 """Initialize the ProjectionMapper 238 239 :Parameters: 240 selector: None | list 241 Which components (i.e. columns of the projection matrix) 242 should be used for mapping. If `selector` is `None` all 243 components are used. If a list is provided, all list 244 elements are treated as component ids and the respective 245 components are selected (all others are discarded). 246 demean: bool 247 Either data should be demeaned while computing 248 projections and applied back while doing reverse() 249 250 """ 251 Mapper.__init__(self) 252 253 self._selector = selector 254 self._proj = None 255 """Forward projection matrix.""" 256 self._recon = None 257 """Reverse projection (reconstruction) matrix.""" 258 self._demean = demean 259 """Flag whether to demean the to be projected data, prior to projection. 260 """ 261 self._mean = None 262 """Data mean""" 263 self._mean_out = None 264 """Forward projected data mean."""
265 266 __doc__ = enhancedDocString('ProjectionMapper', locals(), Mapper) 267 268
269 - def train(self, dataset):
270 """Determine the projection matrix.""" 271 # store the feature wise mean 272 self._mean = dataset.samples.mean(axis=0) 273 # compute projection matrix with subclass logic 274 self._train(dataset) 275 276 # perform component selection 277 if self._selector is not None: 278 self.selectOut(self._selector)
279 280
281 - def _train(self, dataset):
282 """Worker method. Needs to be implemented by subclass. 283 284 This method has to train the mapper and store the resulting 285 transformation matrix in `self._proj`. 286 """ 287 raise NotImplementedError
288 289
290 - def forward(self, data, demean=None):
291 """Perform forward projection. 292 293 :Parameters: 294 data: ndarray 295 Data array to map 296 demean: boolean | None 297 Override demean setting for this method call. 298 299 :Returns: 300 NumPy array 301 """ 302 # let arg overwrite instance flag 303 if demean is None: 304 demean = self._demean 305 306 if self._proj is None: 307 raise RuntimeError, "Mapper needs to be train before used." 308 if demean and self._mean is not None: 309 return ((N.asmatrix(data) - self._mean) * self._proj).A 310 else: 311 return (N.asmatrix(data) * self._proj).A
312 313
314 - def reverse(self, data):
315 """Reproject (reconstruct) data into the original feature space. 316 317 :Returns: 318 NumPy array 319 """ 320 if self._proj is None: 321 raise RuntimeError, "Mapper needs to be trained before used." 322 323 # get feature-wise mean in out-space 324 if self._demean and self._mean_out is None: 325 # forward project mean and cache result 326 self._mean_out = self.forward(self._mean, demean=False) 327 if __debug__: 328 debug("MAP_", 329 "Mean of data in input space %s became %s in " \ 330 "outspace" % (self._mean, self._mean_out)) 331 332 333 # (re)build reconstruction matrix 334 if self._recon is None: 335 self._recon = self._proj.H 336 337 if self._demean: 338 return ((N.asmatrix(data) + self._mean_out) * self._recon).A 339 else: 340 return ((N.asmatrix(data)) * self._recon).A
341 342
343 - def getInSize(self):
344 """Returns the number of original features.""" 345 return self._proj.shape[0]
346 347
348 - def getOutSize(self):
349 """Returns the number of components to project on.""" 350 return self._proj.shape[1]
351 352
353 - def selectOut(self, outIds):
354 """Choose a subset of components (and remove all others).""" 355 self._proj = self._proj[:, outIds] 356 # invalidate reconstruction matrix 357 self._recon = None 358 self._mean_out = None
359 360 361 proj = property(fget=lambda self: self._proj, doc="Projection matrix") 362 recon = property(fget=lambda self: self._recon, doc="Backprojection matrix")
363 364 365
366 -class CombinedMapper(Mapper):
367 """Meta mapper that combines several embedded mappers. 368 369 This mapper can be used the map from several input dataspaces into a common 370 output dataspace. When :meth:`~mvpa.mappers.base.CombinedMapper.forward` 371 is called with a sequence of data, each element in that sequence is passed 372 to the corresponding mapper, which in turned forward-maps the data. The 373 output of all mappers is finally stacked (horizontally or column or 374 feature-wise) into a single large 2D matrix (nsamples x nfeatures). 375 376 .. note:: 377 This mapper can only embbed mappers that transform data into a 2D 378 (nsamples x nfeatures) representation. For mappers not supporting this 379 transformation, consider wrapping them in a 380 :class:`~mvpa.mappers.base.ChainMapper` with an appropriate 381 post-processing mapper. 382 383 CombinedMapper fully supports forward and backward mapping, training, 384 runtime selection of a feature subset (in output dataspace) and retrieval 385 of neighborhood information. 386 """
387 - def __init__(self, mappers, **kwargs):
388 """ 389 :Parameters: 390 mappers: list of Mapper instances 391 The order of the mappers in the list is important, as it will define 392 the order in which data snippets have to be passed to 393 :meth:`~mvpa.mappers.base.CombinedMapper.forward`. 394 **kwargs 395 All additional arguments are passed to the base-class constructor. 396 """ 397 Mapper.__init__(self, **kwargs) 398 399 if not len(mappers): 400 raise ValueError, \ 401 'CombinedMapper needs at least one embedded mapper.' 402 403 self._mappers = mappers
404 405
406 - def forward(self, data):
407 """Map data from the IN spaces into to common OUT space. 408 409 :Parameter: 410 data: sequence 411 Each element in the `data` sequence is passed to the corresponding 412 embedded mapper and is mapped individually by it. The number of 413 elements in `data` has to match the number of embedded mappers. Each 414 element is `data` has to provide the same number of samples 415 (first dimension). 416 417 :Returns: 418 array: nsamples x nfeatures 419 Horizontally stacked array of all embedded mapper outputs. 420 """ 421 if not len(data) == len(self._mappers): 422 raise ValueError, \ 423 "CombinedMapper needs a sequence with data for each " \ 424 "Mapper" 425 426 # return a big array for the result of the forward mapped data 427 # of each embedded mapper 428 try: 429 return N.hstack( 430 [self._mappers[i].forward(d) for i, d in enumerate(data)]) 431 except ValueError: 432 raise ValueError, \ 433 "Embedded mappers do not generate same number of samples. " \ 434 "Check input data."
435 436
437 - def reverse(self, data):
438 """Reverse map data from OUT space into the IN spaces. 439 440 :Parameter: 441 data: array 442 Single data array to be reverse mapped into a sequence of data 443 snippets in their individual IN spaces. 444 445 :Returns: 446 list 447 """ 448 # assure array and transpose 449 # i.e. transpose of 1D does nothing, but of 2D puts features 450 # along first dimension 451 data = N.asanyarray(data).T 452 453 if not len(data) == self.getOutSize(): 454 raise ValueError, \ 455 "Data shape does match mapper reverse mapping properties." 456 457 result = [] 458 fsum = 0 459 for m in self._mappers: 460 # calculate upper border 461 fsum_new = fsum + m.getOutSize() 462 463 result.append(m.reverse(data[fsum:fsum_new].T)) 464 465 fsum = fsum_new 466 467 return result
468 469
470 - def train(self, dataset):
471 """Trains all embedded mappers. 472 473 The provided training dataset is splitted appropriately and the 474 corresponding pieces are passed to the 475 :meth:`~mvpa.mappers.base.Mapper.train` method of each embedded mapper. 476 477 :Parameter: 478 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass 479 A dataset with the number of features matching the `outSize` of the 480 `CombinedMapper`. 481 """ 482 if dataset.nfeatures != self.getOutSize(): 483 raise ValueError, "Training dataset does not match the mapper " \ 484 "properties." 485 486 fsum = 0 487 for m in self._mappers: 488 # need to split the dataset 489 fsum_new = fsum + m.getOutSize() 490 m.train(dataset.selectFeatures(range(fsum, fsum_new))) 491 fsum = fsum_new
492 493
494 - def getInSize(self):
495 """Returns the size of the entity in input space""" 496 return N.sum(m.getInSize() for m in self._mappers)
497 498
499 - def getOutSize(self):
500 """Returns the size of the entity in output space""" 501 return N.sum(m.getOutSize() for m in self._mappers)
502 503
504 - def selectOut(self, outIds):
505 """Remove some elements and leave only ids in 'out'/feature space. 506 507 .. note:: 508 The subset selection is done inplace 509 510 :Parameter: 511 outIds: sequence 512 All output feature ids to be selected/kept. 513 """ 514 # determine which features belong to what mapper 515 # and call its selectOut() accordingly 516 ids = N.asanyarray(outIds) 517 fsum = 0 518 for m in self._mappers: 519 # bool which meta feature ids belongs to this mapper 520 selector = N.logical_and(ids < fsum + m.getOutSize(), ids >= fsum) 521 # make feature ids relative to this dataset 522 selected = ids[selector] - fsum 523 fsum += m.getOutSize() 524 # finally apply to mapper 525 m.selectOut(selected)
526 527
528 - def getNeighbor(self, outId, *args, **kwargs):
529 """Get the ids of the neighbors of a single feature in output dataspace. 530 531 :Parameters: 532 outId: int 533 Single id of a feature in output space, whos neighbors should be 534 determined. 535 *args, **kwargs 536 Additional arguments are passed to the metric of the embedded 537 mapper, that is responsible for the corresponding feature. 538 539 Returns a list of outIds 540 """ 541 fsum = 0 542 for m in self._mappers: 543 fsum_new = fsum + m.getOutSize() 544 if outId >= fsum and outId < fsum_new: 545 return m.getNeighbor(outId - fsum, *args, **kwargs) 546 fsum = fsum_new 547 548 raise ValueError, "Invalid outId passed to CombinedMapper.getNeighbor()"
549 550 551
552 -class ChainMapper(Mapper):
553 """Meta mapper that embedded a chain of other mappers. 554 555 Each mapper in the chain is called successively to perform forward or 556 reverse mapping. 557 558 .. note:: 559 560 In its current implementation the `ChainMapper` treats all but the last 561 mapper as simple pre-processing (in forward()) or post-processing (in 562 reverse()) steps. All other capabilities, e.g. training and neighbor 563 metrics are provided by or affect *only the last mapper in the chain*. 564 565 With respect to neighbor metrics this means that they are determined 566 based on the input space of the *last mapper* in the chain and *not* on 567 the input dataspace of the `ChainMapper` as a whole 568 """
569 - def __init__(self, mappers, **kwargs):
570 """ 571 :Parameters: 572 mappers: list of Mapper instances 573 **kwargs 574 All additional arguments are passed to the base-class constructor. 575 """ 576 Mapper.__init__(self, **kwargs) 577 578 if not len(mappers): 579 raise ValueError, 'ChainMapper needs at least one embedded mapper.' 580 581 self._mappers = mappers
582 583
584 - def forward(self, data):
585 """Calls all mappers in the chain successively. 586 587 :Parameter: 588 data 589 data to be chain-mapped. 590 """ 591 mp = data 592 for m in self._mappers: 593 mp = m.forward(mp) 594 595 return mp
596 597
598 - def reverse(self, data):
599 """Calls all mappers in the chain successively, in reversed order. 600 601 :Parameter: 602 data: array 603 data array to be reverse mapped into the orginal dataspace. 604 """ 605 mp = data 606 for m in reversed(self._mappers): 607 mp = m.reverse(mp) 608 609 return mp
610 611
612 - def train(self, dataset):
613 """Trains the *last* mapper in the chain. 614 615 :Parameter: 616 dataset: :class:`~mvpa.datasets.base.Dataset` or subclass 617 A dataset with the number of features matching the `outSize` of the 618 last mapper in the chain (which is identical to the one of the 619 `ChainMapper` itself). 620 """ 621 if dataset.nfeatures != self.getOutSize(): 622 raise ValueError, "Training dataset does not match the mapper " \ 623 "properties." 624 625 self._mappers[-1].train(dataset)
626 627
628 - def getInSize(self):
629 """Returns the size of the entity in input space""" 630 return self._mappers[0].getInSize()
631 632
633 - def getOutSize(self):
634 """Returns the size of the entity in output space""" 635 return self._mappers[-1].getOutSize()
636 637
638 - def selectOut(self, outIds):
639 """Remove some elements from the *last* mapper in the chain. 640 641 :Parameter: 642 outIds: sequence 643 All output feature ids to be selected/kept. 644 """ 645 self._mappers[-1].selectOut(outIds)
646 647
648 - def getNeighbor(self, outId, *args, **kwargs):
649 """Get the ids of the neighbors of a single feature in output dataspace. 650 651 .. note:: 652 653 The neighbors are determined based on the input space of the *last 654 mapper* in the chain and *not* on the input dataspace of the 655 `ChainMapper` as a whole! 656 657 :Parameters: 658 outId: int 659 Single id of a feature in output space, whos neighbors should be 660 determined. 661 *args, **kwargs 662 Additional arguments are passed to the metric of the embedded 663 mapper, that is responsible for the corresponding feature. 664 665 Returns a list of outIds 666 """ 667 return self._mappers[-1].getNeighbor(outId, *args, **kwargs)
668