Package mvpa :: Package mappers :: Module pca
[hide private]
[frames] | no frames]

Source Code for Module mvpa.mappers.pca

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Data mapper""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from mvpa.base.dochelpers import enhancedDocString 
 16  from mvpa.mappers.base import Mapper 
 17  from mvpa.featsel.helpers import ElementSelector 
 18   
 19   
20 -class PCAMapper(Mapper):
21 """Mapper to project data onto PCA components estimated from some dataset. 22 23 After the mapper has been instantiated, it has to be train first. When 24 `train()` is called with a 2D (samples x features) matrix the PCA 25 components are determined by performing singular value decomposition 26 on the covariance matrix. 27 28 The PCA mapper only handle 2D data matrices. 29 """
30 - def __init__(self, selector=None):
31 """Initialize the PCAMapper 32 33 :Parameters: 34 selector: None, list of ElementSelector 35 Which PCA components should be used for mapping. If `selector` 36 is `None` all components are used. If a list is provided, all 37 list elements are treated as component ids and the respective 38 components are selected (all others are discarded). 39 Alternatively an `ElementSelector` instance can be provided 40 which chooses components based on the corresponding eigenvalues 41 of each component. 42 """ 43 Mapper.__init__(self) 44 45 self.__selector = selector 46 self.mix = None 47 """Transformation matrix from orginal features onto PCA-components.""" 48 self.unmix = None 49 """Un-mixing matrix for projecting from the PCA space back onto the 50 original features.""" 51 self.sv = None 52 """Eigenvalues of the covariance matrix."""
53 54 55 __doc__ = enhancedDocString('PCAMapper', locals(), Mapper) 56 57
58 - def __deepcopy__(self, memo=None):
59 """Yes, this is it.""" 60 if memo is None: 61 memo = {} 62 out = PCAMapper() 63 if self.mix is not None: 64 out.mix = self.mix.copy() 65 out.sv = self.sv.copy() 66 67 return out
68 69
70 - def train(self, dataset):
71 """Determine the projection matrix onto the PCA components from 72 a 2D samples x feature data matrix. 73 """ 74 X = N.asmatrix(dataset.samples) 75 76 # demean the training data 77 X = X - X.mean(axis=0) 78 79 # compute covariance matrix 80 R = X.T * X / X.shape[0] 81 82 # singular value decomposition 83 # note: U and V are equal in this case, as R is a covanriance matrix 84 U, SV, V = N.linalg.svd(R) 85 86 # store the final matrix with the new basis vectors to project the 87 # features onto the PCA components 88 self.mix = U.T 89 90 # also store eigenvalues of all components 91 self.sv = SV 92 93 if not self.__selector == None: 94 if isinstance(self.__selector, list): 95 self.selectOut(self.__selector) 96 elif isinstance(self.__selector, ElementSelector): 97 self.selectOut(self.__selector(SV)) 98 else: 99 raise ValueError, 'Unknown type of selector.'
100 101
102 - def forward(self, data):
103 """Project a 2D samples x features matrix onto the PCA components. 104 105 :Returns: 106 NumPy array 107 """ 108 if self.mix is None: 109 raise RuntimeError, "PCAMapper needs to be train before used." 110 111 return N.asarray(self.mix * N.asmatrix(data).T).T
112 113
114 - def reverse(self, data):
115 """Projects feature vectors or matrices with feature vectors back 116 onto the original features. 117 118 :Returns: 119 NumPy array 120 """ 121 if self.unmix is None: 122 self.unmix = self.mix.I 123 return (self.unmix * N.asmatrix(data).T).T.A
124 125
126 - def getInShape(self):
127 """Returns a one-tuple with the number of original features.""" 128 return (self.mix.shape[1], )
129 130
131 - def getOutShape(self):
132 """Returns a one-tuple with the number of PCA components.""" 133 return (self.mix.shape[0], )
134 135
136 - def getInSize(self):
137 """Returns the number of original features.""" 138 return self.mix.shape[1]
139 140
141 - def getOutSize(self):
142 """Returns the number of PCA components.""" 143 return self.mix.shape[0]
144 145
146 - def selectOut(self, outIds):
147 """Choose a subset of PCA components (and remove all others).""" 148 self.mix = self.mix[outIds] 149 # invalidate unmixing matrix 150 self.unmix = None
151