1
2
3
4
5
6
7
8
9 """Data mapper"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from mvpa.base.dochelpers import enhancedDocString
16 from mvpa.mappers.base import Mapper
17 from mvpa.featsel.helpers import ElementSelector
18
19
21 """Mapper to project data onto PCA components estimated from some dataset.
22
23 After the mapper has been instantiated, it has to be train first. When
24 `train()` is called with a 2D (samples x features) matrix the PCA
25 components are determined by performing singular value decomposition
26 on the covariance matrix.
27
28 The PCA mapper only handle 2D data matrices.
29 """
31 """Initialize the PCAMapper
32
33 :Parameters:
34 selector: None, list of ElementSelector
35 Which PCA components should be used for mapping. If `selector`
36 is `None` all components are used. If a list is provided, all
37 list elements are treated as component ids and the respective
38 components are selected (all others are discarded).
39 Alternatively an `ElementSelector` instance can be provided
40 which chooses components based on the corresponding eigenvalues
41 of each component.
42 """
43 Mapper.__init__(self)
44
45 self.__selector = selector
46 self.mix = None
47 """Transformation matrix from orginal features onto PCA-components."""
48 self.unmix = None
49 """Un-mixing matrix for projecting from the PCA space back onto the
50 original features."""
51 self.sv = None
52 """Eigenvalues of the covariance matrix."""
53
54
55 __doc__ = enhancedDocString('PCAMapper', locals(), Mapper)
56
57
59 """Yes, this is it."""
60 if memo is None:
61 memo = {}
62 out = PCAMapper()
63 if self.mix is not None:
64 out.mix = self.mix.copy()
65 out.sv = self.sv.copy()
66
67 return out
68
69
70 - def train(self, dataset):
71 """Determine the projection matrix onto the PCA components from
72 a 2D samples x feature data matrix.
73 """
74 X = N.asmatrix(dataset.samples)
75
76
77 X = X - X.mean(axis=0)
78
79
80 R = X.T * X / X.shape[0]
81
82
83
84 U, SV, V = N.linalg.svd(R)
85
86
87
88 self.mix = U.T
89
90
91 self.sv = SV
92
93 if not self.__selector == None:
94 if isinstance(self.__selector, list):
95 self.selectOut(self.__selector)
96 elif isinstance(self.__selector, ElementSelector):
97 self.selectOut(self.__selector(SV))
98 else:
99 raise ValueError, 'Unknown type of selector.'
100
101
103 """Project a 2D samples x features matrix onto the PCA components.
104
105 :Returns:
106 NumPy array
107 """
108 if self.mix is None:
109 raise RuntimeError, "PCAMapper needs to be train before used."
110
111 return N.asarray(self.mix * N.asmatrix(data).T).T
112
113
115 """Projects feature vectors or matrices with feature vectors back
116 onto the original features.
117
118 :Returns:
119 NumPy array
120 """
121 if self.unmix is None:
122 self.unmix = self.mix.I
123 return (self.unmix * N.asmatrix(data).T).T.A
124
125
127 """Returns a one-tuple with the number of original features."""
128 return (self.mix.shape[1], )
129
130
132 """Returns a one-tuple with the number of PCA components."""
133 return (self.mix.shape[0], )
134
135
137 """Returns the number of original features."""
138 return self.mix.shape[1]
139
140
142 """Returns the number of PCA components."""
143 return self.mix.shape[0]
144
145
147 """Choose a subset of PCA components (and remove all others)."""
148 self.mix = self.mix[outIds]
149
150 self.unmix = None
151