1
2
3
4
5
6
7
8
9 """Some little helper for reading (and writing) common formats from and to
10 disk."""
11
12 __docformat__ = 'restructuredtext'
13
14 import copy
15 from sets import Set
16
17 from mvpa.misc import warning
18
19 if __debug__:
20 from mvpa.misc import debug
21
23 """Read data that is stored in columns of text files.
24
25 All read data is available via a dictionary-like interface. If
26 column headers are available, the column names serve as dictionary keys.
27 If no header exists an articfical key is generated: str(number_of_column).
28
29 Splitting of text file lines is performed by the standard split() function
30 (which gets passed the `sep` argument as separator string) and each
31 element is converted into the desired datatype.
32
33 Because data is read into a dictionary no two columns can have the same
34 name in the header! Each column is stored as a list in the dictionary.
35 """
36 - def __init__(self, source, header=True, sep=None, dtype=float):
37 """Read data from file into a dictionary.
38
39 Parameters
40 ----------
41 - `source`: Can be a filename or a dictionary. In the case of the
42 first all data is read from that file and additonal
43 keyword arguments can be sued to customize the read
44 procedure. If a dictionary is passed a deepcopy is
45 performed.
46 - `header`: Indicates whether the column names should be read from the
47 first line (`header=True`). If `header=False` unique
48 column names will be generated (see class docs). If
49 `header` is a python list, it's content is used as column
50 header names and its length has to match the number of
51 columns in the file.
52 - `sep`: Separator string. The actual meaning depends on the output
53 format (see class docs).
54 - `dtype`: Desired datatype.
55 """
56
57 dict.__init__(self)
58
59
60 self._header_order = None
61
62 if isinstance(source, str):
63 self._fromFile(source, header=header, sep=sep, dtype=dtype)
64
65 elif isinstance(source, dict):
66 for k, v in source.iteritems():
67 self[k] = v
68
69 self._check()
70
71 else:
72 raise ValueError, 'Unkown source for ColumnData [%s]' \
73 % `type(source)`
74
75
76 classdict = self.__class__.__dict__
77 for k in self.keys():
78 if not classdict.has_key(k):
79 getter = "lambda self: self._getAttrib('%s')" % (k)
80 if __debug__:
81 debug("IOH", "Registering property %s for ColumnData" % `k`)
82 exec "%s.%s = property(fget=%s)" % \
83 (self.__class__.__name__, k, getter)
84
85
86
87
88
89
90
91
92
94 """Return corresponding value if given key is known to current instance
95
96 Is used for automatically added properties to the class.
97
98 :raises: ValueError, if `key` is not known to given instance
99
100 :return: value if `key` is known
101 """
102 if self.has_key(key):
103 return self[key]
104 else:
105 raise ValueError, "Instance %s has no data about %s" \
106 % (`self`, `key`)
107
108
110 s = self.__class__.__name__
111 if len(self.keys())>0:
112 s += " %d rows, %d columns [" % \
113 (self.getNRows(), self.getNColumns())
114 s += reduce(lambda x, y: x+" %s" % y, self.keys())
115 s += "]"
116 return s
117
119 """Performs some checks for data integrity.
120 """
121 length = None
122 for k in self.keys():
123 if length == None:
124 length = len(self[k])
125 else:
126 if not len(self[k]) == length:
127 raise ValueError, "Data integrity lost. Columns do not " \
128 "have equal length."
129
130
131 - def _fromFile(self, filename, header, sep, dtype):
132 """Loads column data from file -- clears object first.
133 """
134
135 self.clear()
136
137 file_ = open(filename, 'r')
138
139 self._header_order = None
140
141 if header == True:
142
143 hdr = file_.readline().split(sep)
144 self._header_order = hdr
145 elif isinstance(header, list):
146 hdr = header
147 else:
148 hdr = [ str(i) for i in xrange(len(file_.readline().split(sep))) ]
149
150 file_.seek(0)
151
152
153 tbl = [ [] for i in xrange(len(hdr)) ]
154
155
156 for line in file_:
157
158 line = line.strip()
159
160 if not line or line.startswith('#'):
161 continue
162 l = line.split(sep)
163
164 if not len(l) == len(hdr):
165 raise RuntimeError, \
166 "Number of entries in line [%i] does not match number " \
167 "of columns in header [%i]." % (len(l), len(hdr))
168
169 for i, v in enumerate(l):
170 if not dtype is None:
171 try:
172 v = dtype(v)
173 except ValueError:
174 warning("Can't convert %s to desired datatype %s." %
175 (`v`, `dtype`) + " Leaving original type")
176 tbl[i].append(v)
177
178
179 if not len(tbl) == len(hdr):
180 raise RuntimeError, "Number of columns read from file does not " \
181 "match the number of header entries."
182
183
184 for i, v in enumerate(hdr):
185 self[v] = tbl[i]
186
187
189 """Merge column data.
190 """
191
192 for k, v in other.iteritems():
193 if not self.has_key(k):
194 raise ValueError, 'Unknown key [%s].' % `k`
195 if not isinstance(v, list):
196 raise ValueError, 'Can only merge list data, but got [%s].' \
197 % `type(v)`
198
199
200 self[k] += v
201
202
203 self._check()
204
205 return self
206
207
209 """Return new ColumnData with selected samples"""
210
211 data = copy.deepcopy(self)
212 for k, v in data.iteritems():
213 data[k] = [v[x] for x in selection]
214
215 data._check()
216 return data
217
218
220 """Returns the number of columns.
221 """
222 return len(self.keys())
223
224
225 - def tofile(self, filename, header=True, header_order=None, sep=' '):
226 """Write column data to a text file.
227
228 Parameter
229 ---------
230
231 - `filename`: Think about it!
232 - `header`: If `True` a column header is written, using the column
233 keys. If `False` no header is written.
234 - `header_order`: If it is a list of strings, they will be used instead
235 of simply asking for the dictionary keys. However
236 these strings must match the dictionary keys in number
237 and identity. This argument type can be used to
238 determine the order of the columns in the output file.
239 The default value is `None`. In this case the columns
240 will be in an arbitrary order.
241 - `sep`: String that is written as a separator between to data columns.
242 """
243
244 file_ = open(filename, 'w')
245
246
247 if header_order == None:
248 if self._header_order is None:
249 col_hdr = self.keys()
250 else:
251
252 col_hdr = self._header_order + \
253 list(Set(self.keys()).difference(
254 Set(self._header_order)))
255 else:
256 if not len(header_order) == self.getNColumns():
257 raise ValueError, 'Header list does not match number of ' \
258 'columns.'
259 for k in header_order:
260 if not self.has_key(k):
261 raise ValueError, 'Unknown key [%s]' % `k`
262 col_hdr = header_order
263
264 if header == True:
265 file_.write(sep.join(col_hdr) + '\n')
266
267
268 for r in xrange(self.getNRows()):
269
270 l = [str(self[k][r]) for k in col_hdr]
271
272 file_.write(sep.join(l) + '\n')
273
274 file_.close()
275
276
278 """Returns the number of rows.
279 """
280
281 if not len(self.keys()):
282 return 0
283
284 else:
285 return len(self[self.keys()[0]])
286
287
288
290 """IO helper to read FSL's EV3 files.
291
292 This is a three-column textfile format that is used to specify stimulation
293 protocols for fMRI data analysis in FSL's FEAT module.
294
295 Data is always read as `float`.
296 """
298 """Read and write FSL EV3 files.
299
300 Parameter
301 ---------
302
303 - `source`: filename of an EV3 file
304 """
305
306 ColumnData.__init__(self, source,
307 header=['onsets', 'durations', 'intensities'],
308 sep=None, dtype=float)
309
310
312 """Returns the number of EVs in the file.
313 """
314 return self.getNRows()
315
316
318 """Returns a tuple of (onset time, simulus duration, intensity) for a
319 certain EV.
320 """
321 return (self['onsets'][evid],
322 self['durations'][evid],
323 self['intensities'][evid])
324
325
327 """Write data to a FSL EV3 file.
328 """
329 ColumnData.tofile(self, filename,
330 header=False,
331 header_order=['onsets', 'durations', 'intensities'],
332 sep=' ')
333
334
335 onsets = property(fget=lambda self: self['onsets'])
336 durations = property(fget=lambda self: self['durations'])
337 intensities = property(fget=lambda self: self['intensities'])
338 nevs = property(fget=getNEVs)
339
340
341
343 """Read and write PyMVPA sample attribute definitions from and to text
344 files.
345 """
347 """Read PyMVPA sample attributes from disk.
348
349 Parameter
350 ---------
351
352 - `source`: filename of an atrribute file
353 """
354 ColumnData.__init__(self, source,
355 header=['labels', 'chunks'],
356 sep=None, dtype=float)
357
358
360 """Write sample attributes to a text file.
361 """
362 ColumnData.tofile(self, filename,
363 header=False,
364 header_order=['labels', 'chunks'],
365 sep=' ')
366
367
369 """Returns the number of samples in the file.
370 """
371 return self.getNRows()
372
373
374 nsamples = property(fget=getNSamples)
375
376
377
379 """Read and write McFlirt's motion estimation parameters from and to text
380 files.
381 """
382 header_def = ['rot1', 'rot2', 'rot3', 'x', 'y', 'z']
383
394
395
403