1
2
3
4
5
6
7
8
9 """Data mapper"""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as N
14
15 from mvpa.base.dochelpers import enhancedDocString
16 from mvpa.mappers.base import Mapper
17 from mvpa.featsel.helpers import ElementSelector
18
19 if __debug__:
20 from mvpa.misc import debug
21
23 """Mapper to project data onto SVD components estimated from some dataset.
24 """
25 - def __init__(self, selector=None, demean=True):
26 """Initialize the SVDMapper
27
28 :Parameters:
29 selector: None, list of ElementSelector
30 Which SVD components should be used for mapping. If `selector`
31 is `None` all components are used. If a list is provided, all
32 list elements are treated as component ids and the respective
33 components are selected (all others are discarded).
34 Alternatively an `ElementSelector` instance can be provided
35 which chooses components based on the corresponding eigenvalues
36 of each component.
37 demean: bool
38 Either data should be demeaned while computing projections and
39 applied back while doing reverse()
40 """
41 Mapper.__init__(self)
42
43 self.__selector = selector
44 self.mix = None
45 """Transformation matrix from orginal features onto SVD-components."""
46 self.unmix = None
47 """Un-mixing matrix for projecting from the SVD space back onto the
48 original features."""
49 self.sv = None
50 """Singular values of the training matrix."""
51 self.__demean = demean
52 self.mean = None
53 """Data mean"""
54
55 __doc__ = enhancedDocString('SVDMapper', locals(), Mapper)
56
57
59 """Yes, this is it.
60 XXX But do we need it really? copy.deepcopy wouldn't have a problem copying stuff
61 """
62 if memo is None:
63 memo = {}
64 out = SVDMapper()
65 if self.mix is not None:
66 out.mix = self.mix.copy()
67 out.sv = self.sv.copy()
68 if self.mean is not None:
69 out.mean = self.mean.copy()
70
71 return out
72
73
74 - def train(self, dataset):
75 """Determine the projection matrix onto the SVD components from
76 a 2D samples x feature data matrix.
77 """
78 X = N.asmatrix(dataset.samples)
79
80 if self.__demean:
81
82 self.mean = X.mean(axis=0)
83 X = X - self.mean
84
85 if __debug__:
86 debug("MAP_",
87 "Mean of data in input space %s was subtracted" %
88 (self.mean))
89
90
91
92 U, SV, Vh = N.linalg.svd(X, full_matrices=0)
93
94
95
96
97 self.mix = Vh.H
98
99
100 self.sv = SV
101
102 if __debug__:
103 debug("MAP", "SVD was done on %s and obtained %d SVs " %
104 (dataset, len(SV)) + " (%d non-0, max=%f)" %
105 (len(SV.nonzero()), SV[0]))
106
107 debug("MAP_", "Mixing matrix has %s shape and norm=%f" %
108 (self.mix.shape, N.linalg.norm(self.mix)))
109
110 if not self.__selector == None:
111 if isinstance(self.__selector, list):
112 self.selectOut(self.__selector)
113 elif isinstance(self.__selector, ElementSelector):
114 self.selectOut(self.__selector(SV))
115 else:
116 raise ValueError, \
117 'Unknown type of selector %s' % self.__selector
118
119
120 - def forward(self, data, demean=True):
121 """Project a 2D samples x features matrix onto the SVD components.
122
123 :Parameters:
124 data: array
125 Data arry to map
126 demean: bool
127 Flag whether to substract the training data mean before mapping.
128 XXX: Not sure if this is the right place. Maybe better move to
129 constructor as it would be difficult to set this flag.
130 :Returns:
131 NumPy array
132 """
133 if self.mix is None:
134 raise RuntimeError, "SVDMapper needs to be train before used."
135 if demean and self.mean is not None:
136 return ((N.asmatrix(data) - self.mean)*self.mix).A
137 else:
138 return (N.asmatrix(data) * self.mix).A
139
140
142 """Projects feature vectors or matrices with feature vectors back
143 onto the original features.
144
145 :Returns:
146 NumPy array
147 """
148 if self.mix is None:
149 raise RuntimeError, "SVDMapper needs to be train before used."
150
151 if self.unmix is None:
152 self.unmix = self.mix.H
153
154 if self.__demean:
155
156 self.mean_out = self.forward(self.mean, demean=False)
157 if __debug__:
158 debug("MAP_",
159 "Mean of data in input space %s bacame %s in " \
160 "outspace" % (self.mean, self.mean_out))
161
162 if self.__demean:
163 return ((N.asmatrix(data) + self.mean_out) * self.unmix).A
164 else:
165 return ((N.asmatrix(data)) * self.unmix).A
166
167
169 """Returns a one-tuple with the number of original features."""
170 return (self.mix.shape[0], )
171
172
174 """Returns a one-tuple with the number of SVD components."""
175 return (self.mix.shape[1], )
176
177
179 """Returns the number of original features."""
180 return self.mix.shape[0]
181
182
184 """Returns the number of SVD components."""
185 return self.mix.shape[1]
186
187
189 """Choose a subset of SVD components (and remove all others)."""
190 self.mix = self.mix[:, outIds]
191
192 self.unmix = None
193