1
2
3
4
5
6
7
8
9 """Misc function performing operations on datasets which are based on scipy
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from mvpa.base import externals
15 externals.exists('scipy', raiseException=True)
16
17 import numpy as N
18
19 from operator import isSequenceType
20
21 from mvpa.datasets.base import Dataset, datasetmethod
22 from mvpa.misc.support import getBreakPoints
23
24 from scipy import signal
25 from scipy.linalg import lstsq
26 from scipy.special import legendre
27
28 @datasetmethod
29 -def detrend(dataset, perchunk=False, model='linear',
30 polyord=None, opt_reg=None):
31 """
32 Given a dataset, detrend the data inplace either entirely
33 or per each chunk
34
35 :Parameters:
36 `dataset` : `Dataset`
37 dataset to operate on
38 `perchunk` : bool
39 either to operate on whole dataset at once or on each chunk
40 separately
41 `model`
42 Type of detrending model to run. If 'linear' or 'constant',
43 scipy.signal.detrend is used to perform a linear or demeaning
44 detrend. If 'regress', then you specify the polyord and opt_reg
45 arguments to define regressors to regress out of the dataset.
46 `polyord` : int or list
47 Order of the Legendre polynomial to remove from the data. This
48 will remove every polynomial up to and including the provided
49 value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order
50 polynomials from the data. N.B.: The 0th polynomial is the
51 baseline shift, the 1st is the linear trend.
52 If you specify a single int and perchunk is True, then this value
53 is used for each chunk. You can also specify a differnt polyord
54 value for each chunk by providing a list or ndarray of polyord
55 values the length of the number of chunks.
56 `opt_reg` : ndarray
57 Optional ndarray of additional information to regress out from the
58 dataset. One example would be to regress out motion parameters.
59 As with the data, time is on the first axis.
60
61 """
62
63 if model in ['linear', 'constant']:
64
65 bp = 0
66
67 if perchunk:
68 try:
69 bp = getBreakPoints(dataset.chunks)
70 except ValueError, e:
71 raise ValueError, \
72 "Failed to assess break points between chunks. Often " \
73 "that is due to discontinuities within a chunk, which " \
74 "ruins idea of per-chunk detrending. Original " \
75 "exception was: %s" % str(e)
76
77 dataset.samples[:] = signal.detrend(dataset.samples, axis=0,
78 type=model, bp=bp)
79 elif model in ['regress']:
80
81 return __detrend_regress(dataset, perchunk=perchunk,
82 polyord=polyord, opt_reg=opt_reg)
83 else:
84
85 raise ValueError('Specified model type (%s) is unknown.'
86 % (model))
87
91 """
92 Given a dataset, perform a detrend inplace, regressing out polynomial
93 terms as well as optional regressors, such as motion parameters.
94
95 :Parameters:
96 `dataset`: `Dataset`
97 Dataset to operate on
98 `perchunk` : bool
99 Either to operate on whole dataset at once or on each chunk
100 separately. If perchunk is True, all the samples within a
101 chunk should be contiguous and the chunks should be sorted in
102 order from low to high.
103 `polyord` : int
104 Order of the Legendre polynomial to remove from the data. This
105 will remove every polynomial up to and including the provided
106 value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order
107 polynomials from the data. N.B.: The 0th polynomial is the
108 baseline shift, the 1st is the linear trend.
109 If you specify a single int and perchunk is True, then this value
110 is used for each chunk. You can also specify a differnt polyord
111 value for each chunk by providing a list or ndarray of polyord
112 values the length of the number of chunks.
113 `opt_reg` : ndarray
114 Optional ndarray of additional information to regress out from the
115 dataset. One example would be to regress out motion parameters.
116 As with the data, time is on the first axis.
117 """
118
119
120 if not polyord is None:
121 if not isSequenceType(polyord):
122
123 polyord = [polyord]*len(dataset.uniquechunks)
124 elif perchunk and len(polyord) != len(dataset.uniquechunks):
125 raise ValueError("If you specify a sequence of polyord values " + \
126 "they sequence length must match the " + \
127 "number of unique chunks in the dataset.")
128
129
130 if perchunk:
131
132 uchunks = dataset.uniquechunks
133
134
135 reg = []
136 for n, chunk in enumerate(uchunks):
137
138 cinds = dataset.chunks == chunk
139
140
141 if not polyord is None:
142
143 x = N.linspace(-1, 1, cinds.sum())
144
145 for n in range(polyord[n] + 1):
146 newreg = N.zeros((dataset.nsamples, 1))
147 newreg[cinds, 0] = legendre(n)(x)
148 reg.append(newreg)
149 else:
150
151 reg = []
152
153 if not polyord is None:
154
155 x = N.linspace(-1, 1, dataset.nsamples)
156 for n in range(polyord[0] + 1):
157 reg.append(legendre(n)(x)[:, N.newaxis])
158
159
160 if not opt_reg is None:
161
162 reg.append(opt_reg)
163
164
165 if len(reg) > 0:
166 if len(reg) > 1:
167 regs = N.hstack(reg)
168 else:
169 regs = reg[0]
170 else:
171
172 raise ValueError("You must specify at least one " + \
173 "regressor to regress out.")
174
175
176 res = lstsq(regs, dataset.samples)
177
178
179 yhat = N.dot(regs, res[0])
180 dataset.samples -= yhat
181
182
183 return res, regs
184