1
2
3
4
5
6
7
8
9 """Dataset container"""
10
11 __docformat__ = 'restructuredtext'
12
13 import operator
14 import random
15 import mvpa.support.copy as copy
16 import numpy as N
17
18 from sets import Set
19
20
21
22
23
24
25 from mvpa.misc.exceptions import DatasetError
26 from mvpa.misc.support import idhash as idhash_
27 from mvpa.base.dochelpers import enhancedDocString, table2string
28
29 from mvpa.base import warning
30
31 if __debug__:
32 from mvpa.base import debug
35 """Helper function to validate that seq contains unique sorted values
36 """
37 if operator.isSequenceType(seq):
38 seq_unique = N.unique(seq)
39 if len(seq) != len(seq_unique):
40 warning("%s() operates only with indexes for %s without"
41 " repetitions. Repetitions were removed."
42 % (fname, item))
43 if N.any(N.sort(seq) != seq_unique):
44 warning("%s() does not guarantee the original order"
45 " of selected %ss. Use selectSamples() and "
46 " selectFeatures(sort=False) instead" % (fname, item))
47
51 """*The* Dataset.
52
53 This class provides a container to store all necessary data to
54 perform MVPA analyses. These are the data samples, as well as the
55 labels associated with the samples. Additionally, samples can be
56 grouped into chunks.
57
58 :Groups:
59 - `Creators`: `__init__`, `selectFeatures`, `selectSamples`,
60 `applyMapper`
61 - `Mutators`: `permuteLabels`
62
63 Important: labels assumed to be immutable, i.e. no one should modify
64 them externally by accessing indexed items, ie something like
65 ``dataset.labels[1] += 100`` should not be used. If a label has
66 to be modified, full copy of labels should be obtained, operated on,
67 and assigned back to the dataset, otherwise dataset.uniquelabels
68 would not work. The same applies to any other attribute which has
69 corresponding unique* access property.
70
71 """
72
73
74
75
76
77
78
79
80
81
82
83
84
85 _uniqueattributes = []
86 """Unique attributes associated with the data"""
87
88 _registeredattributes = []
89 """Registered attributes (stored in _data)"""
90
91 _requiredattributes = ['samples', 'labels']
92 """Attributes which have to be provided to __init__, or otherwise
93 no default values would be assumed and construction of the
94 instance would fail"""
95
96
97
98
99
100
101
102
103
104
105 - def __init__(self,
106
107 data=None,
108 dsattr=None,
109
110 dtype=None,
111
112 samples=None,
113 labels=None,
114 labels_map=None,
115 chunks=None,
116 origids=None,
117
118 check_data=True,
119 copy_samples=False,
120 copy_data=True,
121 copy_dsattr=True):
122 """Initialize dataset instance
123
124 There are basically two different way to create a dataset:
125
126 1. Create a new dataset from samples and sample attributes. In
127 this mode a two-dimensional `ndarray` has to be passed to the
128 `samples` keyword argument and the corresponding samples
129 attributes are provided via the `labels` and `chunks`
130 arguments.
131
132 2. Copy contructor mode
133 The second way is used internally to perform quick coyping
134 of datasets, e.g. when performing feature selection. In this
135 mode and the two dictionaries (`data` and `dsattr`) are
136 required. For performance reasons this mode bypasses most of
137 the sanity check performed by the previous mode, as for
138 internal operations data integrity is assumed.
139
140
141 :Parameters:
142 data : dict
143 Dictionary with an arbitrary number of entries. The value for
144 each key in the dict has to be an ndarray with the
145 same length as the number of rows in the samples array.
146 A special entry in this dictionary is 'samples', a 2d array
147 (samples x features). A shallow copy is stored in the object.
148 dsattr : dict
149 Dictionary of dataset attributes. An arbitrary number of
150 arbitrarily named and typed objects can be stored here. A
151 shallow copy of the dictionary is stored in the object.
152 dtype: type | None
153 If None -- do not change data type if samples
154 is an ndarray. Otherwise convert samples to dtype.
155
156
157 :Keywords:
158 samples : ndarray
159 2d array (samples x features)
160 labels
161 An array or scalar value defining labels for each samples.
162 Generally `labels` should be numeric, unless `labels_map`
163 is used
164 labels_map : None or bool or dict
165 Map original labels into numeric labels. If True, the
166 mapping is computed if labels are literal. If is False,
167 no mapping is computed. If dict instance -- provided
168 mapping is verified and applied. If you want to have
169 labels_map just be present given already numeric labels,
170 just assign labels_map dictionary to existing dataset
171 instance
172 chunks
173 An array or scalar value defining chunks for each sample
174
175 Each of the Keywords arguments overwrites what is/might be
176 already in the `data` container.
177
178 """
179
180
181
182
183 if data is None:
184 data = {}
185 if dsattr is None:
186 dsattr = {}
187
188
189
190
191 if copy_data:
192
193
194
195
196 lcl_data = data.copy()
197 for k, v in data.iteritems():
198
199 if k == 'samples' and not copy_samples:
200 continue
201 lcl_data[k] = v.copy()
202 else:
203
204
205
206 lcl_data = data.copy()
207
208 if copy_dsattr and len(dsattr)>0:
209
210 if __debug__:
211 debug('DS', "Deep copying dsattr %s" % `dsattr`)
212 lcl_dsattr = copy.deepcopy(dsattr)
213
214 else:
215
216 lcl_dsattr = copy.copy(dsattr)
217
218
219
220
221 self._data = lcl_data
222 """What makes a dataset."""
223
224 self._dsattr = lcl_dsattr
225 """Dataset attriibutes."""
226
227
228 if not samples is None:
229 if __debug__:
230 if lcl_data.has_key('samples'):
231 debug('DS',
232 "`Data` dict has `samples` (%s) but there is also" \
233 " __init__ parameter `samples` which overrides " \
234 " stored in `data`" % (`lcl_data['samples'].shape`))
235 lcl_data['samples'] = self._shapeSamples(samples, dtype,
236 copy_samples)
237
238
239
240
241
242 if not labels is None:
243 if __debug__:
244 if lcl_data.has_key('labels'):
245 debug('DS',
246 "`Data` dict has `labels` (%s) but there is also" +
247 " __init__ parameter `labels` which overrides " +
248 " stored in `data`" % (`lcl_data['labels']`))
249 if lcl_data.has_key('samples'):
250 lcl_data['labels'] = \
251 self._expandSampleAttribute(labels, 'labels')
252
253
254 for attr in self._requiredattributes:
255 if not lcl_data.has_key(attr):
256 raise DatasetError, \
257 "Attribute %s is required to initialize dataset" % \
258 attr
259
260 nsamples = self.nsamples
261
262
263 if not chunks == None:
264 lcl_data['chunks'] = \
265 self._expandSampleAttribute(chunks, 'chunks')
266 elif not lcl_data.has_key('chunks'):
267
268
269 lcl_data['chunks'] = N.arange(nsamples)
270
271
272 if not origids is None:
273
274 lcl_data['origids'] = origids
275 elif not lcl_data.has_key('origids'):
276
277 lcl_data['origids'] = N.arange(len(lcl_data['labels']))
278 else:
279
280
281
282 pass
283
284
285 for attr in self._registeredattributes:
286 if not lcl_data.has_key(attr):
287 if __debug__:
288 debug("DS", "Initializing attribute %s" % attr)
289 lcl_data[attr] = N.zeros(nsamples)
290
291
292 labels_ = N.asarray(lcl_data['labels'])
293 labels_map_known = lcl_dsattr.has_key('labels_map')
294 if labels_map is True:
295
296 if labels_.dtype.char == 'S':
297
298 ulabels = list(Set(labels_))
299 ulabels.sort()
300 labels_map = dict([ (x[1], x[0]) for x in enumerate(ulabels) ])
301 if __debug__:
302 debug('DS', 'Mapping for the labels computed to be %s'
303 % labels_map)
304 else:
305 if __debug__:
306 debug('DS', 'Mapping of labels was requested but labels '
307 'are not strings. Skipped')
308 labels_map = None
309 pass
310 elif labels_map is False:
311 labels_map = None
312
313 if isinstance(labels_map, dict):
314 if labels_map_known:
315 if __debug__:
316 debug('DS',
317 "`dsattr` dict has `labels_map` (%s) but there is also" \
318 " __init__ parameter `labels_map` (%s) which overrides " \
319 " stored in `dsattr`" % (lcl_dsattr['labels_map'], labels_map))
320
321 lcl_dsattr['labels_map'] = labels_map
322
323 if labels_.dtype.char == 'S' or not labels_map_known:
324 if __debug__:
325 debug('DS_', "Remapping labels using mapping %s" % labels_map)
326
327
328 try:
329 lcl_data['labels'] = N.array(
330 [labels_map[x] for x in lcl_data['labels']])
331 except KeyError, e:
332 raise ValueError, "Provided labels_map %s is insufficient " \
333 "to map all the labels. Mapping for label %s is " \
334 "missing" % (labels_map, e)
335
336 elif not lcl_dsattr.has_key('labels_map'):
337 lcl_dsattr['labels_map'] = labels_map
338 elif __debug__:
339 debug('DS_', 'Not overriding labels_map in dsattr since it has one')
340
341 if check_data:
342 self._checkData()
343
344
345
346
347
348
349
350 if not labels is None or not chunks is None:
351
352
353 lcl_dsattr['__uniquereseted'] = False
354 self._resetallunique(force=True)
355
356
357 __doc__ = enhancedDocString('Dataset', locals())
358
359
360 @property
362 """To verify if dataset is in the same state as when smth else was done
363
364 Like if classifier was trained on the same dataset as in question"""
365
366 _data = self._data
367 res = idhash_(_data)
368
369
370
371
372 keys = _data.keys()
373 keys.sort()
374 for k in keys:
375 res += idhash_(_data[k])
376 return res
377
378
380 """Set to None all unique* attributes of corresponding dictionary
381 """
382 _dsattr = self._dsattr
383
384 if not force and _dsattr['__uniquereseted']:
385 return
386
387 _uniqueattributes = self._uniqueattributes
388
389 if __debug__ and "DS_" in debug.active:
390 debug("DS_", "Reseting all attributes %s for dataset %s"
391 % (_uniqueattributes,
392 self.summary(uniq=False, idhash=False,
393 stats=False, lstats=False)))
394
395
396 for k in _uniqueattributes:
397 _dsattr[k] = None
398 _dsattr['__uniquereseted'] = True
399
400
402 """Provide common facility to return unique attributes
403
404 XXX `dict_` can be simply replaced now with self._dsattr
405 """
406
407
408 _dsattr = self._dsattr
409
410 if not _dsattr.has_key(attrib) or _dsattr[attrib] is None:
411 if __debug__ and 'DS_' in debug.active:
412 debug("DS_", "Recomputing unique set for attrib %s within %s" %
413 (attrib, self.summary(uniq=False,
414 stats=False, lstats=False)))
415
416
417 _dsattr[attrib] = N.unique( N.asanyarray(dict_[attrib[6:]]) )
418 assert(not _dsattr[attrib] is None)
419 _dsattr['__uniquereseted'] = False
420
421 return _dsattr[attrib]
422
423
425 """Provide common facility to set attributes
426
427 """
428 if len(value) != self.nsamples:
429 raise ValueError, \
430 "Provided %s have %d entries while there is %d samples" % \
431 (attrib, len(value), self.nsamples)
432 self._data[attrib] = N.asarray(value)
433 uniqueattr = "unique" + attrib
434
435 _dsattr = self._dsattr
436 if _dsattr.has_key(uniqueattr):
437 _dsattr[uniqueattr] = None
438
439
441 """Returns the number of samples per unique label.
442 """
443
444 _data = self._data
445
446
447 uniqueattr = self._getuniqueattr(attrib="unique" + attrib,
448 dict_=_data)
449
450
451 result = dict(zip(uniqueattr, [ 0 ] * len(uniqueattr)))
452 for l in _data[attrib]:
453 result[l] += 1
454
455
456
457
458 return result
459
460
463 """Return indecies of samples given a list of attributes
464 """
465
466 if not operator.isSequenceType(values) \
467 or isinstance(values, basestring):
468 values = [ values ]
469
470
471
472 sel = N.array([], dtype=N.int16)
473 _data = self._data
474 for value in values:
475 sel = N.concatenate((
476 sel, N.where(_data[attrib]==value)[0]))
477
478 if sort:
479
480 sel.sort()
481
482 return sel
483
484
485 - def idsonboundaries(self, prior=0, post=0,
486 attributes_to_track=['labels', 'chunks'],
487 affected_labels=None,
488 revert=False):
489 """Find samples which are on the boundaries of the blocks
490
491 Such samples might need to be removed. By default (with
492 prior=0, post=0) ids of the first samples in a 'block' are
493 reported
494
495 :Parameters:
496 prior : int
497 how many samples prior to transition sample to include
498 post : int
499 how many samples post the transition sample to include
500 attributes_to_track : list of basestring
501 which attributes to track to decide on the boundary condition
502 affected_labels : list of basestring
503 for which labels to perform selection. If None - for all
504 revert : bool
505 either to revert the meaning and provide ids of samples which are found
506 to not to be boundary samples
507 """
508
509 _data = self._data
510 labels = self.labels
511 nsamples = self.nsamples
512
513 lastseen = none = [None for attr in attributes_to_track]
514 transitions = []
515
516 for i in xrange(nsamples+1):
517 if i < nsamples:
518 current = [_data[attr][i] for attr in attributes_to_track]
519 else:
520 current = none
521 if lastseen != current:
522
523 new_transitions = range(max(0, i-prior),
524 min(nsamples-1, i+post)+1)
525 if affected_labels is not None:
526 new_transitions = [labels[i] for i in new_transitions
527 if i in affected_labels]
528 transitions += new_transitions
529 lastseen = current
530
531 transitions = Set(transitions)
532 if revert:
533 transitions = Set(range(nsamples)).difference(transitions)
534
535
536 transitions = N.array(list(transitions))
537 transitions.sort()
538 return list(transitions)
539
540
542 """Adapt different kinds of samples
543
544 Handle all possible input value for 'samples' and tranform
545 them into a 2d (samples x feature) representation.
546 """
547
548
549 if (not isinstance(samples, N.ndarray)):
550
551
552 samples = N.array(samples, ndmin=2, dtype=dtype, copy=copy)
553 else:
554 if samples.ndim < 2 \
555 or (not dtype is None and dtype != samples.dtype):
556 if dtype is None:
557 dtype = samples.dtype
558 samples = N.array(samples, ndmin=2, dtype=dtype, copy=copy)
559 elif copy:
560 samples = samples.copy()
561
562
563 if len(samples.shape) > 2:
564 raise DatasetError, "Only (samples x features) -> 2d sample " \
565 + "are supported (got %s shape of samples)." \
566 % (`samples.shape`) \
567 +" Consider MappedDataset if applicable."
568
569 return samples
570
571
573 """Checks `_data` members to have the same # of samples.
574 """
575
576
577
578
579
580
581 nsamples = self.nsamples
582 _data = self._data
583
584 for k, v in _data.iteritems():
585 if not len(v) == nsamples:
586 raise DatasetError, \
587 "Length of sample attribute '%s' [%i] does not " \
588 "match the number of samples in the dataset [%i]." \
589 % (k, len(v), nsamples)
590
591
592 uniques = N.unique(_data['origids'])
593 uniques.sort()
594
595 sorted_ids = _data['origids'].copy()
596 sorted_ids.sort()
597
598 if not (uniques == sorted_ids).all():
599 raise DatasetError, "Samples IDs are not unique."
600
601
602 if N.asanyarray(_data['labels'].dtype.char == 'S'):
603 warning('Labels for dataset %s are literal, should be numeric. '
604 'You might like to use labels_map argument.' % self)
605
607 """If a sample attribute is given as a scalar expand/repeat it to a
608 length matching the number of samples in the dataset.
609 """
610 try:
611
612
613 if isinstance(attr, basestring):
614 raise TypeError
615 if len(attr) != self.nsamples:
616 raise DatasetError, \
617 "Length of sample attribute '%s' [%d]" \
618 % (attr_name, len(attr)) \
619 + " has to match the number of samples" \
620 + " [%d]." % self.nsamples
621
622 return N.array(attr)
623
624 except TypeError:
625
626
627 return N.repeat(attr, self.nsamples)
628
629
630 @classmethod
632 """Register an attribute for any Dataset class.
633
634 Creates property assigning getters/setters depending on the
635 availability of corresponding _get, _set functions.
636 """
637 classdict = cls.__dict__
638 if not classdict.has_key(key):
639 if __debug__:
640 debug("DS", "Registering new attribute %s" % key)
641
642
643 getter = '_get%s' % key
644 if classdict.has_key(getter):
645 getter = '%s.%s' % (cls.__name__, getter)
646 else:
647 getter = "lambda x: x.%s['%s']" % (dictname, key)
648
649
650
651 setter = '_set%s' % key
652 if classdict.has_key(setter):
653 setter = '%s.%s' % (cls.__name__, setter)
654 elif dictname=="_data":
655 setter = "lambda self,x: self._setdataattr" + \
656 "(attrib='%s', value=x)" % (key)
657 else:
658 setter = None
659
660 if __debug__:
661 debug("DS", "Registering new property %s.%s" %
662 (cls.__name__, key))
663 exec "%s.%s = property(fget=%s, fset=%s)" % \
664 (cls.__name__, key, getter, setter)
665
666 if abbr is not None:
667 exec "%s.%s = property(fget=%s, fset=%s)" % \
668 (cls.__name__, abbr, getter, setter)
669
670 if hasunique:
671 uniquekey = "unique%s" % key
672 getter = '_get%s' % uniquekey
673 if classdict.has_key(getter):
674 getter = '%s.%s' % (cls.__name__, getter)
675 else:
676 getter = "lambda x: x._getuniqueattr" + \
677 "(attrib='%s', dict_=x.%s)" % (uniquekey, dictname)
678
679 if __debug__:
680 debug("DS", "Registering new property %s.%s" %
681 (cls.__name__, uniquekey))
682
683 exec "%s.%s = property(fget=%s)" % \
684 (cls.__name__, uniquekey, getter)
685 if abbr is not None:
686 exec "%s.U%s = property(fget=%s)" % \
687 (cls.__name__, abbr, getter)
688
689
690 sampleskey = "samplesper%s" % key[:-1]
691 if __debug__:
692 debug("DS", "Registering new property %s.%s" %
693 (cls.__name__, sampleskey))
694
695 exec "%s.%s = property(fget=%s)" % \
696 (cls.__name__, sampleskey,
697 "lambda x: x._getNSamplesPerAttr(attrib='%s')" % key)
698
699 cls._uniqueattributes.append(uniquekey)
700
701
702 sampleskey = "idsby%s" % key
703 if __debug__:
704 debug("DS", "Registering new property %s.%s" %
705 (cls.__name__, sampleskey))
706
707 exec "%s.%s = %s" % (cls.__name__, sampleskey,
708 "lambda self, x: " +
709 "self._getSampleIdsByAttr(x,attrib='%s')" % key)
710
711 cls._uniqueattributes.append(uniquekey)
712
713 cls._registeredattributes.append(key)
714 elif __debug__:
715 warning('Trying to reregister attribute `%s`. For now ' % key +
716 'such capability is not present')
717
718
720 """String summary over the object
721 """
722 try:
723 ssummary = self.summary(uniq=True,
724 idhash=__debug__ and ('DS_ID' in debug.active),
725 stats=__debug__ and ('DS_STATS' in debug.active),
726 lstats=__debug__ and ('DS_STATS' in debug.active),
727 )
728 except (AttributeError, KeyError), e:
729
730
731
732
733 ssummary = str(e)
734 return ssummary
735
736
738 return "<%s>" % str(self)
739
740
741 - def summary(self, uniq=True, stats=True, idhash=False, lstats=True,
742 maxc=30, maxl=20):
743 """String summary over the object
744
745 :Parameters:
746 uniq : bool
747 Include summary over data attributes which have unique
748 idhash : bool
749 Include idhash value for dataset and samples
750 stats : bool
751 Include some basic statistics (mean, std, var) over dataset samples
752 lstats : bool
753 Include statistics on chunks/labels
754 maxc : int
755 Maximal number of chunks when provide details on labels/chunks
756 maxl : int
757 Maximal number of labels when provide details on labels/chunks
758 """
759
760 samples = self.samples
761 _data = self._data
762 _dsattr = self._dsattr
763
764 if idhash:
765 idhash_ds = "{%s}" % self.idhash
766 idhash_samples = "{%s}" % idhash_(samples)
767 else:
768 idhash_ds = ""
769 idhash_samples = ""
770
771 s = """Dataset %s/ %s %d%s x %d""" % \
772 (idhash_ds, samples.dtype,
773 self.nsamples, idhash_samples, self.nfeatures)
774
775 ssep = (' ', '\n')[lstats]
776 if uniq:
777 s += "%suniq:" % ssep
778 for uattr in _dsattr.keys():
779 if not uattr.startswith("unique"):
780 continue
781 attr = uattr[6:]
782 try:
783 value = self._getuniqueattr(attrib=uattr,
784 dict_=_data)
785 s += " %d %s" % (len(value), attr)
786 except:
787 pass
788
789 if isinstance(self.labels_map, dict):
790 s += ' labels_mapped'
791
792 if stats:
793
794
795
796 if self.nfeatures:
797 s += "%sstats: mean=%g std=%g var=%g min=%g max=%g\n" % \
798 (ssep, N.mean(samples), N.std(samples),
799 N.var(samples), N.min(samples), N.max(samples))
800 else:
801 s += "%sstats: dataset has no features\n" % ssep
802
803 if lstats:
804 s += self.summary_labels(maxc=maxc, maxl=maxl)
805
806 return s
807
808
810 """Provide summary statistics over the labels and chunks
811
812 :Parameters:
813 maxc : int
814 Maximal number of chunks when provide details
815 maxl : int
816 Maximal number of labels when provide details
817 """
818
819
820 from mvpa.datasets.miscfx import getSamplesPerChunkLabel
821 spcl = getSamplesPerChunkLabel(self)
822
823 ul = self.uniquelabels.tolist()
824 uc = self.uniquechunks.tolist()
825 s = ""
826 if len(ul) < maxl and len(uc) < maxc:
827 s += "\nCounts of labels in each chunk:"
828
829 table = [[' chunks\labels'] + ul]
830 table += [[''] + ['---'] * len(ul)]
831 for c, counts in zip(uc, spcl):
832 table.append([ str(c) ] + counts.tolist())
833 s += '\n' + table2string(table)
834 else:
835 s += "No details due to large number of labels or chunks. " \
836 "Increase maxc and maxl if desired"
837
838 labels_map = self.labels_map
839 if isinstance(labels_map, dict):
840 s += "\nOriginal labels were mapped using following mapping:"
841 s += '\n\t'+'\n\t'.join([':\t'.join(map(str, x))
842 for x in labels_map.items()]) + '\n'
843
844 def cl_stats(axis, u, name1, name2):
845 """ Compute statistics per label
846 """
847 stats = {'min': N.min(spcl, axis=axis),
848 'max': N.max(spcl, axis=axis),
849 'mean': N.mean(spcl, axis=axis),
850 'std': N.std(spcl, axis=axis),
851 '#%ss' % name2: N.sum(spcl>0, axis=axis)}
852 entries = [' ' + name1, 'mean', 'std', 'min', 'max', '#%ss' % name2]
853 table = [ entries ]
854 for i, l in enumerate(u):
855 d = {' ' + name1 : l}
856 d.update(dict([ (k, stats[k][i]) for k in stats.keys()]))
857 table.append( [ ('%.3g', '%s')[isinstance(d[e], basestring)]
858 % d[e] for e in entries] )
859 return '\nSummary per %s across %ss\n' % (name1, name2) \
860 + table2string(table)
861
862 if len(ul) < maxl:
863 s += cl_stats(0, ul, 'label', 'chunk')
864 if len(uc) < maxc:
865 s += cl_stats(1, uc, 'chunk', 'label')
866 return s
867
868
870 """Merge the samples of one Dataset object to another (in-place).
871
872 No dataset attributes, besides labels_map, will be merged!
873 Additionally, a new set of unique `origids` will be generated.
874 """
875
876 _data = self._data
877 other_data = other._data
878
879 if not self.nfeatures == other.nfeatures:
880 raise DatasetError, "Cannot add Dataset, because the number of " \
881 "feature do not match."
882
883
884 slm = self.labels_map
885 olm = other.labels_map
886 if N.logical_xor(slm is None, olm is None):
887 raise ValueError, "Cannot add datasets where only one of them " \
888 "has labels map assigned. If needed -- implement it"
889
890
891 for k,v in _data.iteritems():
892 if k == 'origids':
893
894
895
896 _data[k] = N.arange(len(v) + len(other_data[k]))
897
898 elif k == 'labels' and slm is not None:
899
900
901
902 nlm = slm.copy()
903
904 nextid = N.sort(nlm.values())[-1] + 1
905 olabels = other.labels
906 olabels_remap = {}
907 for ol, olnum in olm.iteritems():
908 if not nlm.has_key(ol):
909
910
911
912 if olnum in nlm.values():
913 nextid = N.sort(nlm.values() + olm.values())[-1] + 1
914 else:
915 nextid = olnum
916 olabels_remap[olnum] = nextid
917 nlm[ol] = nextid
918 nextid += 1
919 else:
920 olabels_remap[olnum] = nlm[ol]
921 olabels = [olabels_remap[x] for x in olabels]
922
923 _data['labels'] = N.concatenate((v, olabels), axis=0)
924
925 self._dsattr['labels_map'] = nlm
926
927 if __debug__:
928
929
930
931 if (len(Set(slm.keys())) != len(Set(slm.values()))) or \
932 (len(Set(olm.keys())) != len(Set(olm.values()))):
933 warning("Adding datasets where multiple labels "
934 "mapped to the same ID is not recommended. "
935 "Please check the outcome. Original mappings "
936 "were %s and %s. Resultant is %s"
937 % (slm, olm, nlm))
938
939 else:
940 _data[k] = N.concatenate((v, other_data[k]), axis=0)
941
942
943 self._resetallunique()
944
945 return self
946
947
949 """Merge the samples two Dataset objects.
950
951 All data of both datasets is copied, concatenated and a new Dataset is
952 returned.
953
954 NOTE: This can be a costly operation (both memory and time). If
955 performance is important consider the '+=' operator.
956 """
957
958 out = super(Dataset, self).__new__(self.__class__)
959
960
961
962 out.__init__(data=self._data,
963 dsattr=self._dsattr,
964 copy_samples=True,
965 copy_data=True,
966 copy_dsattr=True)
967
968 out += other
969
970 return out
971
972
973 - def copy(self, deep=True):
974 """Create a copy (clone) of the dataset, by fully copying current one
975
976 :Keywords:
977 deep : bool
978 deep flag is provided to __init__ for
979 copy_{samples,data,dsattr}. By default full copy is done.
980 """
981
982 out = super(Dataset, self).__new__(self.__class__)
983
984
985
986 out.__init__(data=self._data,
987 dsattr=self._dsattr,
988 copy_samples=True,
989 copy_data=True,
990 copy_dsattr=True)
991
992 return out
993
994
996 """Select a number of features from the current set.
997
998 :Parameters:
999 ids
1000 iterable container to select ids
1001 sort : bool
1002 if to sort Ids. Order matters and `selectFeatures` assumes
1003 incremental order. If not such, in non-optimized code
1004 selectFeatures would verify the order and sort
1005
1006 Returns a new Dataset object with a copy of corresponding features
1007 from the original samples array.
1008
1009 WARNING: The order of ids determines the order of features in
1010 the returned dataset. This might be useful sometimes, but can
1011 also cause major headaches! Order would is verified when
1012 running in non-optimized code (if __debug__)
1013 """
1014 if ids is None and groups is None:
1015 raise ValueError, "No feature selection specified."
1016
1017
1018 if ids is None:
1019 ids = []
1020
1021 if not groups is None:
1022 if not self._dsattr.has_key('featuregroups'):
1023 raise RuntimeError, \
1024 "Dataset has no feature grouping information."
1025
1026 for g in groups:
1027 ids += (self._dsattr['featuregroups'] == g).nonzero()[0].tolist()
1028
1029
1030
1031 if sort:
1032 ids = copy.deepcopy(ids)
1033 ids.sort()
1034 elif __debug__ and 'CHECK_DS_SORTED' in debug.active:
1035 from mvpa.misc.support import isSorted
1036 if not isSorted(ids):
1037 warning("IDs for selectFeatures must be provided " +
1038 "in sorted order, otherwise major headache might occur")
1039
1040
1041 new_data = self._data.copy()
1042
1043
1044
1045 new_data['samples'] = self._data['samples'][:, ids]
1046
1047
1048 if self._dsattr.has_key('featuregroups'):
1049 new_dsattr = self._dsattr.copy()
1050 new_dsattr['featuregroups'] = self._dsattr['featuregroups'][ids]
1051 else:
1052 new_dsattr = self._dsattr
1053
1054
1055 dataset = super(Dataset, self).__new__(self.__class__)
1056
1057
1058
1059 dataset.__init__(data=new_data,
1060 dsattr=new_dsattr,
1061 check_data=False,
1062 copy_samples=False,
1063 copy_data=False,
1064 copy_dsattr=False
1065 )
1066
1067 return dataset
1068
1069
1070 - def applyMapper(self, featuresmapper=None, samplesmapper=None,
1071 train=True):
1072 """Obtain new dataset by applying mappers over features and/or samples.
1073
1074 While featuresmappers leave the sample attributes information
1075 unchanged, as the number of samples in the dataset is invariant,
1076 samplesmappers are also applied to the samples attributes themselves!
1077
1078 Applying a featuresmapper will destroy any feature grouping information.
1079
1080 :Parameters:
1081 featuresmapper : Mapper
1082 `Mapper` to somehow transform each sample's features
1083 samplesmapper : Mapper
1084 `Mapper` to transform each feature across samples
1085 train : bool
1086 Flag whether to train the mapper with this dataset before applying
1087 it.
1088
1089 TODO: selectFeatures is pretty much
1090 applyMapper(featuresmapper=MaskMapper(...))
1091 """
1092
1093
1094 new_data = self._data.copy()
1095
1096
1097
1098 if samplesmapper:
1099 if __debug__:
1100 debug("DS", "Training samplesmapper %s" % `samplesmapper`)
1101 samplesmapper.train(self)
1102
1103 if __debug__:
1104 debug("DS", "Applying samplesmapper %s" % `samplesmapper` +
1105 " to samples of dataset `%s`" % `self`)
1106
1107
1108
1109 if new_data.has_key('origids'):
1110 del(new_data['origids'])
1111
1112
1113 for k in new_data.keys():
1114 new_data[k] = samplesmapper.forward(self._data[k])
1115
1116
1117
1118 new_dsattr = self._dsattr
1119
1120 if featuresmapper:
1121 if __debug__:
1122 debug("DS", "Training featuresmapper %s" % `featuresmapper`)
1123 featuresmapper.train(self)
1124
1125 if __debug__:
1126 debug("DS", "Applying featuresmapper %s" % `featuresmapper` +
1127 " to samples of dataset `%s`" % `self`)
1128 new_data['samples'] = featuresmapper.forward(self._data['samples'])
1129
1130
1131
1132 if self._dsattr.has_key('featuregroups'):
1133 new_dsattr = self._dsattr.copy()
1134 del(new_dsattr['featuregroups'])
1135 else:
1136 new_dsattr = self._dsattr
1137
1138
1139 dataset = super(Dataset, self).__new__(self.__class__)
1140
1141
1142
1143 dataset.__init__(data=new_data,
1144 dsattr=new_dsattr,
1145 check_data=False,
1146 copy_samples=False,
1147 copy_data=False,
1148 copy_dsattr=False
1149 )
1150
1151
1152 if samplesmapper:
1153 dataset._resetallunique(force=True)
1154
1155 return dataset
1156
1157
1159 """Choose a subset of samples defined by samples IDs.
1160
1161 Returns a new dataset object containing the selected sample
1162 subset.
1163
1164 TODO: yoh, we might need to sort the mask if the mask is a
1165 list of ids and is not ordered. Clarify with Michael what is
1166 our intent here!
1167 """
1168
1169
1170 if not operator.isSequenceType( ids ):
1171 ids = [ids]
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190 data = {}
1191 for k, v in self._data.iteritems():
1192 data[k] = v[ids, ]
1193
1194
1195 dataset = super(Dataset, self).__new__(self.__class__)
1196
1197
1198
1199 dataset.__init__(data=data,
1200 dsattr=self._dsattr,
1201 check_data=False,
1202 copy_samples=False,
1203 copy_data=False,
1204 copy_dsattr=False)
1205
1206 dataset._resetallunique(force=True)
1207 return dataset
1208
1209
1210
1211 - def index(self, *args, **kwargs):
1212 """Universal indexer to obtain indexes of interesting samples/features.
1213 See .select() for more information
1214
1215 :Return: tuple of (samples indexes, features indexes). Each
1216 item could be also None, if no selection on samples or
1217 features was requested (to discriminate between no selected
1218 items, and no selections)
1219 """
1220 s_indx = []
1221 f_indx = []
1222 return_dataset = kwargs.pop('return_dataset', False)
1223 largs = len(args)
1224
1225 args = list(args)
1226
1227 largs_nonstring = 0
1228
1229 for i in xrange(largs):
1230 l = args[i]
1231 if isinstance(l, basestring):
1232 if l.lower() == 'all':
1233
1234 args[i] = slice(None)
1235 else:
1236 break
1237 largs_nonstring += 1
1238
1239 if largs_nonstring >= 1:
1240 s_indx.append(args[0])
1241 if __debug__ and 'CHECK_DS_SELECT' in debug.active:
1242 _validate_indexes_uniq_sorted(args[0], 'select', 'samples')
1243 if largs_nonstring == 2:
1244 f_indx.append(args[1])
1245 if __debug__ and 'CHECK_DS_SELECT' in debug.active:
1246 _validate_indexes_uniq_sorted(args[1], 'select', 'features')
1247 elif largs_nonstring > 2:
1248 raise ValueError, "Only two positional arguments are allowed" \
1249 ". 1st for samples, 2nd for features"
1250
1251
1252
1253
1254 if (largs - largs_nonstring) % 2 != 0:
1255 raise ValueError, "Positional selections must come in pairs:" \
1256 " e.g. ('labels', [1,2,3])"
1257
1258 for i in xrange(largs_nonstring, largs, 2):
1259 k, v = args[i:i+2]
1260 kwargs[k] = v
1261
1262
1263 data_ = self._data
1264 for k, v in kwargs.iteritems():
1265 if k == 'samples':
1266 s_indx.append(v)
1267 elif k == 'features':
1268 f_indx.append(v)
1269 elif data_.has_key(k):
1270
1271
1272 if __debug__:
1273 if not N.any([isinstance(v, cls) for cls in
1274 [list, tuple, slice, int]]):
1275 raise ValueError, "Trying to specify selection for %s " \
1276 "based on unsupported '%s'" % (k, v)
1277 s_indx.append(self._getSampleIdsByAttr(v, attrib=k, sort=False))
1278 else:
1279 raise ValueError, 'Keyword "%s" is not known, thus' \
1280 'select() failed' % k
1281
1282 def combine_indexes(indx, nelements):
1283 """Helper function: intersect selections given in indx
1284
1285 :Parameters:
1286 indxs : list of lists or slices
1287 selections of elements
1288 nelements : int
1289 number of elements total for deriving indexes from slices
1290 """
1291 indx_sel = None
1292 for s in indx:
1293 if isinstance(s, slice) or \
1294 isinstance(s, N.ndarray) and s.dtype==bool:
1295
1296
1297
1298 all_indexes = N.arange(nelements)
1299 s = all_indexes[s]
1300 elif not operator.isSequenceType(s):
1301 s = [ s ]
1302
1303 if indx_sel is None:
1304 indx_sel = Set(s)
1305 else:
1306
1307
1308
1309 indx_sel = indx_sel.intersection(s)
1310
1311
1312 if isinstance(indx_sel, Set):
1313 indx_sel = list(indx_sel)
1314
1315
1316 indx_sel.sort()
1317
1318 return indx_sel
1319
1320
1321 if len(s_indx) == 1 and isinstance(s_indx[0], slice) \
1322 and s_indx[0] == slice(None):
1323
1324 s_indx = s_indx[0]
1325 else:
1326
1327 if len(s_indx) == 0:
1328 s_indx = None
1329 else:
1330 s_indx = combine_indexes(s_indx, self.nsamples)
1331
1332
1333 if len(f_indx):
1334 f_indx = combine_indexes(f_indx, self.nfeatures)
1335 else:
1336 f_indx = None
1337
1338 return s_indx, f_indx
1339
1340
1341 - def select(self, *args, **kwargs):
1342 """Universal selector
1343
1344 WARNING: if you need to select duplicate samples
1345 (e.g. samples=[5,5]) or order of selected samples of features
1346 is important and has to be not ordered (e.g. samples=[3,2,1]),
1347 please use selectFeatures or selectSamples functions directly
1348
1349 Examples:
1350 Mimique plain selectSamples::
1351
1352 dataset.select([1,2,3])
1353 dataset[[1,2,3]]
1354
1355 Mimique plain selectFeatures::
1356
1357 dataset.select(slice(None), [1,2,3])
1358 dataset.select('all', [1,2,3])
1359 dataset[:, [1,2,3]]
1360
1361 Mixed (select features and samples)::
1362
1363 dataset.select([1,2,3], [1, 2])
1364 dataset[[1,2,3], [1, 2]]
1365
1366 Select samples matching some attributes::
1367
1368 dataset.select(labels=[1,2], chunks=[2,4])
1369 dataset.select('labels', [1,2], 'chunks', [2,4])
1370 dataset['labels', [1,2], 'chunks', [2,4]]
1371
1372 Mixed -- out of first 100 samples, select only those with
1373 labels 1 or 2 and belonging to chunks 2 or 4, and select
1374 features 2 and 3::
1375
1376 dataset.select(slice(0,100), [2,3], labels=[1,2], chunks=[2,4])
1377 dataset[:100, [2,3], 'labels', [1,2], 'chunks', [2,4]]
1378
1379 """
1380 s_indx, f_indx = self.index(*args, **kwargs)
1381
1382
1383 if s_indx == slice(None):
1384
1385
1386 if __debug__:
1387 debug('DS', 'in select() not selecting samples')
1388 ds = self
1389 else:
1390
1391 if __debug__:
1392 debug('DS', 'in select() selecting samples given selections'
1393 + str(s_indx))
1394 ds = self.selectSamples(s_indx)
1395
1396
1397 if f_indx is not None:
1398 if __debug__:
1399 debug('DS', 'in select() selecting features given selections'
1400 + str(f_indx))
1401 ds = ds.selectFeatures(f_indx)
1402
1403 return ds
1404
1405
1406
1407 - def where(self, *args, **kwargs):
1408 """Obtain indexes of interesting samples/features. See select() for more information
1409
1410 XXX somewhat obsoletes idsby...
1411 """
1412 s_indx, f_indx = self.index(*args, **kwargs)
1413 if s_indx is not None and f_indx is not None:
1414 return s_indx, f_indx
1415 elif s_indx is not None:
1416 return s_indx
1417 else:
1418 return f_indx
1419
1420
1422 """Convinience dataset parts selection
1423
1424 See select for more information
1425 """
1426
1427 if len(args) == 1 and isinstance(args[0], tuple):
1428 args = args[0]
1429
1430 args_, args = args, ()
1431 for a in args_:
1432 if isinstance(a, slice) and \
1433 isinstance(a.start, basestring):
1434
1435 if a.stop is None or a.step is not None:
1436 raise ValueError, \
1437 "Selection must look like ['chunks':[2,3]]"
1438 args += (a.start, a.stop)
1439 else:
1440 args += (a,)
1441 return self.select(*args)
1442
1443
1444 - def permuteLabels(self, status, perchunk=True, assure_permute=False):
1445 """Permute the labels.
1446
1447 TODO: rename status into something closer in semantics.
1448
1449 :Parameters:
1450 status : bool
1451 Calling this method with set to True, the labels are
1452 permuted among all samples. If 'status' is False the
1453 original labels are restored.
1454 perchunk : bool
1455 If True permutation is limited to samples sharing the same
1456 chunk value. Therefore only the association of a certain
1457 sample with a label is permuted while keeping the absolute
1458 number of occurences of each label value within a certain
1459 chunk constant.
1460 assure_permute : bool
1461 If True, assures that labels are permutted, ie any one is
1462 different from the original one
1463 """
1464
1465 _data = self._data
1466
1467 if len(self.uniquelabels)<2:
1468 raise RuntimeError, \
1469 "Call to permuteLabels is bogus since there is insuficient" \
1470 " number of labels: %s" % self.uniquelabels
1471
1472 if not status:
1473
1474 if _data.get('origlabels', None) is None:
1475 raise RuntimeError, 'Cannot restore labels. ' \
1476 'permuteLabels() has never been ' \
1477 'called with status == True.'
1478 self.labels = _data['origlabels']
1479 _data.pop('origlabels')
1480 else:
1481
1482
1483 if not _data.has_key('origlabels') \
1484 or _data['origlabels'] == None:
1485
1486 _data['origlabels'] = _data['labels']
1487
1488 _data['labels'] = copy.copy(_data['labels'])
1489
1490 labels = _data['labels']
1491
1492 if perchunk:
1493 for o in self.uniquechunks:
1494 labels[self.chunks == o] = \
1495 N.random.permutation(labels[self.chunks == o])
1496 else:
1497 labels = N.random.permutation(labels)
1498
1499 self.labels = labels
1500
1501 if assure_permute:
1502 if not (_data['labels'] != _data['origlabels']).any():
1503 if not (assure_permute is True):
1504 if assure_permute == 1:
1505 raise RuntimeError, \
1506 "Cannot assure permutation of labels %s for " \
1507 "some reason with chunks %s and while " \
1508 "perchunk=%s . Should not happen" % \
1509 (self.labels, self.chunks, perchunk)
1510 else:
1511 assure_permute = 11
1512 if __debug__:
1513 debug("DS", "Recalling permute to assure different labels")
1514 self.permuteLabels(status, perchunk=perchunk,
1515 assure_permute=assure_permute-1)
1516
1517
1519 """Select a random set of samples.
1520
1521 If 'nperlabel' is an integer value, the specified number of samples is
1522 randomly choosen from the group of samples sharing a unique label
1523 value ( total number of selected samples: nperlabel x len(uniquelabels).
1524
1525 If 'nperlabel' is a list which's length has to match the number of
1526 unique label values. In this case 'nperlabel' specifies the number of
1527 samples that shall be selected from the samples with the corresponding
1528 label.
1529
1530 The method returns a Dataset object containing the selected
1531 samples.
1532 """
1533
1534 if isinstance(nperlabel, int):
1535 nperlabel = [ nperlabel for i in self.uniquelabels ]
1536
1537 sample = []
1538
1539 labels = self.labels
1540 for i, r in enumerate(self.uniquelabels):
1541
1542 sample += random.sample( (labels == r).nonzero()[0],
1543 nperlabel[i] )
1544
1545 return self.selectSamples( sample )
1546
1547
1548
1549
1550
1551
1552
1553
1554
1556 """Currently available number of patterns.
1557 """
1558 return self._data['samples'].shape[0]
1559
1560
1562 """Number of features per pattern.
1563 """
1564 return self._data['samples'].shape[1]
1565
1566
1568 """Stored labels map (if any)
1569 """
1570 return self._dsattr.get('labels_map', None)
1571
1572
1574 """Set labels map.
1575
1576 Checks for the validity of the mapping -- values should cover
1577 all existing labels in the dataset
1578 """
1579 values = Set(lm.values())
1580 labels = Set(self.uniquelabels)
1581 if not values.issuperset(labels):
1582 raise ValueError, \
1583 "Provided mapping %s has some existing labels (out of %s) " \
1584 "missing from mapping" % (list(values), list(labels))
1585 self._dsattr['labels_map'] = lm
1586
1587
1589 """Set the data type of the samples array.
1590 """
1591
1592 _data = self._data
1593
1594 if _data['samples'].dtype != dtype:
1595 _data['samples'] = _data['samples'].astype(dtype)
1596
1597
1599 """Assign `definition` to featuregroups
1600
1601 XXX Feature-groups was not finished to be useful
1602 """
1603 if not len(definition) == self.nfeatures:
1604 raise ValueError, \
1605 "Length of feature group definition %i " \
1606 "does not match the number of features %i " \
1607 % (len(definition), self.nfeatures)
1608
1609 self._dsattr['featuregroups'] = N.array(definition)
1610
1611
1613 """Returns a boolean mask with all features in `ids` selected.
1614
1615 :Parameters:
1616 ids: list or 1d array
1617 To be selected features ids.
1618
1619 :Returns:
1620 ndarray: dtype='bool'
1621 All selected features are set to True; False otherwise.
1622 """
1623 fmask = N.repeat(False, self.nfeatures)
1624 fmask[ids] = True
1625
1626 return fmask
1627
1628
1630 """Returns feature ids corresponding to non-zero elements in the mask.
1631
1632 :Parameters:
1633 mask: 1d ndarray
1634 Feature mask.
1635
1636 :Returns:
1637 ndarray: integer
1638 Ids of non-zero (non-False) mask elements.
1639 """
1640 return mask.nonzero()[0]
1641
1642
1643 @staticmethod
1645 """Common sanity check for Dataset copy constructor calls."""
1646
1647 samples = None
1648 if kwargs.has_key('samples'):
1649 samples = kwargs['samples']
1650 if samples is None and kwargs.has_key('data') \
1651 and kwargs['data'].has_key('samples'):
1652 samples = kwargs['data']['samples']
1653 if samples is None:
1654 raise DatasetError, \
1655 "`samples` must be provided to copy constructor call."
1656
1657 if not len(samples.shape) == 2:
1658 raise DatasetError, \
1659 "samples must be in 2D shape in copy constructor call."
1660
1661
1662
1663 nsamples = property( fget=getNSamples )
1664 nfeatures = property( fget=getNFeatures )
1665 labels_map = property( fget=getLabelsMap, fset=setLabelsMap )
1666
1668 """Decorator to easily bind functions to a Dataset class
1669 """
1670 if __debug__:
1671 debug("DS_", "Binding function %s to Dataset class" % func.func_name)
1672
1673
1674 setattr(Dataset, func.func_name, func)
1675
1676
1677 return func
1678
1679
1680
1681 Dataset._registerAttribute("samples", "_data", abbr='S', hasunique=False)
1682 Dataset._registerAttribute("labels", "_data", abbr='L', hasunique=True)
1683 Dataset._registerAttribute("chunks", "_data", abbr='C', hasunique=True)
1684
1685 Dataset._registerAttribute("origids", "_data", abbr='I', hasunique=False)
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718 from mvpa.misc.state import ClassWithCollections, Collection
1719 from mvpa.misc.attributes import SampleAttribute, FeatureAttribute, \
1720 DatasetAttribute
1721
1722
1723 -class _Dataset(ClassWithCollections):
1724 """The successor of Dataset.
1725 """
1726
1727
1728
1729
1730 sa = None
1731 fa = None
1732 dsa = None
1733
1734
1735 samples = None
1736
1737 - def __init__(self, samples, sa=None, fa=None, dsa=None):
1738 """
1739 This is the generic internal constructor. Its main task is to allow
1740 for a maximum level of customization during dataset construction,
1741 including fast copy construction.
1742
1743 Parameters
1744 ----------
1745 samples : ndarray
1746 Data samples.
1747 sa : Collection
1748 Samples attributes collection.
1749 fa : Collection
1750 Features attributes collection.
1751 dsa : Collection
1752 Dataset attributes collection.
1753 """
1754
1755 ClassWithCollections.__init__(self)
1756
1757
1758
1759
1760
1761
1762
1763 self.samples = samples
1764
1765
1766
1767
1768
1769 for scol, tcol in ((sa, self.sa),
1770 (fa, self.fa),
1771 (dsa, self.dsa)):
1772
1773 if tcol is None:
1774
1775
1776 tcol = Collection(owner=self)
1777
1778
1779 if not scol is None:
1780 for name, attr in scol.items.iteritems():
1781
1782
1783 tcol.add(copy.copy(attr))
1784
1785
1786 @classmethod
1788
1789 """
1790 One line summary.
1791
1792 Long description.
1793
1794 Parameters
1795 ----------
1796 samples : ndarray
1797 The two-dimensional samples matrix.
1798 labels : ndarray
1799 chunks : ndarray
1800
1801 Returns
1802 -------
1803 blah blah
1804
1805 Notes
1806 -----
1807 blah blah
1808
1809 See Also
1810 --------
1811 blah blah
1812
1813 Examples
1814 --------
1815 blah blah
1816 """
1817
1818
1819
1820 labels_ = SampleAttribute(name='labels')
1821 labels_.value = labels
1822 chunks_ = SampleAttribute(name='chunks')
1823 chunks_.value = chunks
1824
1825
1826
1827
1828 sa = Collection(items={'labels': labels_, 'chunks': chunks_})
1829
1830
1831 return klass(samples, sa=sa)
1832
1833
1835 """Currently available number of patterns.
1836 """
1837 return self.samples.shape[0]
1838
1839
1841 """Number of features per pattern.
1842 """
1843 return self.samples.shape[1]
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939