1
2
3
4
5
6
7
8
9 """Misc function performing operations on datasets which are based on scipy
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from mvpa.base import externals
15
16 import numpy as N
17
18 from operator import isSequenceType
19
20 from mvpa.datasets.base import Dataset, datasetmethod
21 from mvpa.misc.support import getBreakPoints
22
23 if externals.exists('scipy', raiseException=True):
24 from scipy import signal
25 from scipy.linalg import lstsq
26 from scipy.special import legendre
27
28
29 @datasetmethod
30 -def detrend(dataset, perchunk=False, model='linear',
31 polyord=None, opt_reg=None):
32 """
33 Given a dataset, detrend the data inplace either entirely
34 or per each chunk
35
36 :Parameters:
37 dataset : Dataset
38 dataset to operate on
39 perchunk : bool
40 either to operate on whole dataset at once or on each chunk
41 separately
42 model
43 Type of detrending model to run. If 'linear' or 'constant',
44 scipy.signal.detrend is used to perform a linear or demeaning
45 detrend. Polynomial detrending is activated when 'regress' is
46 used or when polyord or opt_reg are specified.
47 polyord : int or list
48 Order of the Legendre polynomial to remove from the data. This
49 will remove every polynomial up to and including the provided
50 value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order
51 polynomials from the data. N.B.: The 0th polynomial is the
52 baseline shift, the 1st is the linear trend.
53 If you specify a single int and perchunk is True, then this value
54 is used for each chunk. You can also specify a different polyord
55 value for each chunk by providing a list or ndarray of polyord
56 values the length of the number of chunks.
57 opt_reg : ndarray
58 Optional ndarray of additional information to regress out from the
59 dataset. One example would be to regress out motion parameters.
60 As with the data, time is on the first axis.
61
62 """
63 if polyord is not None or opt_reg is not None:
64 model='regress'
65
66 if model in ['linear', 'constant']:
67
68 bp = 0
69
70 if perchunk:
71 try:
72 bp = getBreakPoints(dataset.chunks)
73 except ValueError, e:
74 raise ValueError, \
75 "Failed to assess break points between chunks. Often " \
76 "that is due to discontinuities within a chunk, which " \
77 "ruins idea of per-chunk detrending. Original " \
78 "exception was: %s" % str(e)
79
80 dataset.samples[:] = signal.detrend(dataset.samples, axis=0,
81 type=model, bp=bp)
82 elif model in ['regress']:
83
84 return __detrend_regress(dataset, perchunk=perchunk,
85 polyord=polyord, opt_reg=opt_reg)
86 else:
87
88 raise ValueError('Specified model type (%s) is unknown.'
89 % (model))
90
94 """
95 Given a dataset, perform a detrend inplace, regressing out polynomial
96 terms as well as optional regressors, such as motion parameters.
97
98 :Parameters:
99 dataset : Dataset
100 Dataset to operate on
101 perchunk : bool
102 Either to operate on whole dataset at once or on each chunk
103 separately. If perchunk is True, all the samples within a
104 chunk should be contiguous and the chunks should be sorted in
105 order from low to high.
106 polyord : int
107 Order of the Legendre polynomial to remove from the data. This
108 will remove every polynomial up to and including the provided
109 value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order
110 polynomials from the data. N.B.: The 0th polynomial is the
111 baseline shift, the 1st is the linear trend.
112 If you specify a single int and perchunk is True, then this value
113 is used for each chunk. You can also specify a different polyord
114 value for each chunk by providing a list or ndarray of polyord
115 values the length of the number of chunks.
116 opt_reg : ndarray
117 Optional ndarray of additional information to regress out from the
118 dataset. One example would be to regress out motion parameters.
119 As with the data, time is on the first axis.
120 """
121
122
123 if not polyord is None:
124 if not isSequenceType(polyord):
125
126 polyord = [polyord]*len(dataset.uniquechunks)
127 elif perchunk and len(polyord) != len(dataset.uniquechunks):
128 raise ValueError("If you specify a sequence of polyord values " + \
129 "they sequence length must match the " + \
130 "number of unique chunks in the dataset.")
131
132
133 if perchunk:
134
135 uchunks = dataset.uniquechunks
136
137
138 reg = []
139 for n, chunk in enumerate(uchunks):
140
141 cinds = dataset.chunks == chunk
142
143
144 if not polyord is None:
145
146 x = N.linspace(-1, 1, cinds.sum())
147
148 for n in range(polyord[n] + 1):
149 newreg = N.zeros((dataset.nsamples, 1))
150 newreg[cinds, 0] = legendre(n)(x)
151 reg.append(newreg)
152 else:
153
154 reg = []
155
156 if not polyord is None:
157
158 x = N.linspace(-1, 1, dataset.nsamples)
159 for n in range(polyord[0] + 1):
160 reg.append(legendre(n)(x)[:, N.newaxis])
161
162
163 if not opt_reg is None:
164
165 reg.append(opt_reg)
166
167
168 if len(reg) > 0:
169 if len(reg) > 1:
170 regs = N.hstack(reg)
171 else:
172 regs = reg[0]
173 else:
174
175 raise ValueError("You must specify at least one " + \
176 "regressor to regress out.")
177
178
179 res = lstsq(regs, dataset.samples)
180
181
182 yhat = N.dot(regs, res[0])
183 dataset.samples -= yhat
184
185
186 return res, regs
187