Package mvpa :: Package tests :: Module test_dataset
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_dataset

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA dataset handling""" 
 10   
 11  import unittest 
 12  import random 
 13  import numpy as N 
 14  from mvpa.datasets import Dataset 
 15  from mvpa.datasets.miscfx import zscore, aggregateFeatures 
 16  from mvpa.mappers.mask import MaskMapper 
 17  from mvpa.misc.exceptions import DatasetError 
 18  from mvpa.support import copy 
 19   
 20  from tests_warehouse import datasets 
 21   
22 -class DatasetTests(unittest.TestCase):
23
24 - def testAddPatterns(self):
25 """Test composition of new datasets by addition of existing ones 26 """ 27 data = Dataset(samples=range(5), labels=1, chunks=1) 28 29 self.failUnlessEqual( 30 data.uniquelabels, [1], 31 msg="uniquelabels must be correctly recomputed") 32 33 # simple sequence has to be a single pattern 34 self.failUnlessEqual( data.nsamples, 1) 35 # check correct pattern layout (1x5) 36 self.failUnless( 37 (data.samples == N.array([[0, 1, 2, 3, 4]])).all() ) 38 39 # check for single labels and origin 40 self.failUnless( (data.labels == N.array([1])).all() ) 41 self.failUnless( (data.chunks == N.array([1])).all() ) 42 43 # now try adding pattern with wrong shape 44 self.failUnlessRaises( DatasetError, 45 data.__iadd__, Dataset(samples=N.ones((2,3)), 46 labels=1, 47 chunks=1)) 48 49 # now add two real patterns 50 dss = datasets['uni2large'].samples 51 data += Dataset(samples=dss[:2, :5], labels=2, chunks=2 ) 52 self.failUnlessEqual( data.nfeatures, 5 ) 53 self.failUnless((data.labels == N.array([1, 2, 2])).all() ) 54 self.failUnless((data.chunks == N.array([1, 2, 2])).all() ) 55 56 # test automatic origins 57 data += Dataset(samples=dss[3:5, :5], labels=3) 58 self.failUnless((data.chunks == N.array([1, 2, 2, 0, 1]) ).all()) 59 60 # test unique class labels 61 self.failUnless((data.uniquelabels == N.array([1, 2, 3]) ).all()) 62 63 # test wrong label length 64 self.failUnlessRaises(DatasetError, 65 Dataset, 66 samples=dss[:4, :5], 67 labels=[ 1, 2, 3 ], 68 chunks=2) 69 70 # test wrong origin length 71 self.failUnlessRaises(DatasetError, 72 Dataset, 73 samples=dss[:4, :5], 74 labels=[ 1, 2, 3, 4 ], 75 chunks=[ 2, 2, 2 ])
76 77
78 - def testFeatureSelection(self):
79 """Testing feature selection: sorted/not sorted, feature groups 80 """ 81 origdata = datasets['uni2large'].samples[:10, :20] 82 data = Dataset(samples=origdata, labels=2, chunks=2 ) 83 84 # define some feature groups 85 data.defineFeatureGroups(N.repeat(range(4), 5)) 86 87 unmasked = data.samples.copy() 88 89 # default must be no mask 90 self.failUnless( data.nfeatures == 20 ) 91 92 features_to_select = [3, 0, 17] 93 features_to_select_copy = copy.deepcopy(features_to_select) 94 features_to_select_sorted = copy.deepcopy(features_to_select) 95 features_to_select_sorted.sort() 96 97 bsel = N.array([False]*20) 98 bsel[ features_to_select ] = True 99 # check selection with feature list 100 for sel, issorted in \ 101 [(data.selectFeatures( features_to_select, sort=False), False), 102 (data.selectFeatures( features_to_select, sort=True), True), 103 (data.select(slice(None), features_to_select), True), 104 (data.select(slice(None), N.array(features_to_select)), True), 105 (data.select(slice(None), bsel), True) 106 ]: 107 self.failUnless(sel.nfeatures == 3) 108 109 # check size of the masked patterns 110 self.failUnless(sel.samples.shape == (10, 3)) 111 112 # check that the right features are selected 113 fts = (features_to_select, features_to_select_sorted)[int(issorted)] 114 self.failUnless((unmasked[:, fts] == sel.samples).all()) 115 116 # check grouping information 117 self.failUnless((sel._dsattr['featuregroups'] == [0, 0, 3]).all()) 118 119 # check side effect on features_to_select parameter: 120 self.failUnless(features_to_select==features_to_select_copy) 121 122 # check selection by feature group id 123 gsel = data.selectFeatures(groups=[2, 3]) 124 self.failUnless(gsel.nfeatures == 10) 125 self.failUnless(set(gsel._dsattr['featuregroups']) == set([2, 3]))
126 127
128 - def testSampleSelection(self):
129 origdata = datasets['uni2large'].samples[:100, :10].T 130 data = Dataset(samples=origdata, labels=2, chunks=2 ) 131 132 self.failUnless( data.nsamples == 10 ) 133 134 # set single pattern to enabled 135 for sel in [ data.selectSamples(5), 136 data.select(5), 137 data.select(slice(5, 6)), 138 ]: 139 self.failUnless( sel.nsamples == 1 ) 140 self.failUnless( data.nfeatures == 100 ) 141 self.failUnless( sel.origids == [5] ) 142 143 # check duplicate selections 144 for sel in [ data.selectSamples([5, 5]), 145 # Following ones would fail since select removes 146 # repetitions (XXX) 147 #data.select([5,5]), 148 #data.select([5,5], 'all'), 149 #data.select([5,5], slice(None)), 150 ]: 151 self.failUnless( sel.nsamples == 2 ) 152 self.failUnless( (sel.samples[0] == data.samples[5]).all() ) 153 self.failUnless( (sel.samples[0] == sel.samples[1]).all() ) 154 self.failUnless( len(sel.labels) == 2 ) 155 self.failUnless( len(sel.chunks) == 2 ) 156 self.failUnless((sel.origids == [5, 5]).all()) 157 158 self.failUnless( sel.samples.shape == (2, 100) ) 159 160 # check selection by labels 161 for sel in [ data.selectSamples(data.idsbylabels(2)), 162 data.select(labels=2), 163 data.select('labels', 2), 164 data.select('labels', [2]), 165 data['labels', [2]], 166 data['labels': [2], 'labels':2], 167 data['labels': [2]], 168 ]: 169 self.failUnless( sel.nsamples == data.nsamples ) 170 self.failUnless( N.all(sel.samples == data.samples) ) 171 # not present label 172 for sel in [ data.selectSamples(data.idsbylabels(3)), 173 data.select(labels=3), 174 data.select('labels', 3), 175 data.select('labels', [3]), 176 ]: 177 self.failUnless( sel.nsamples == 0 ) 178 179 data = Dataset(samples=origdata, 180 labels=[8, 9, 4, 3, 3, 3, 4, 2, 8, 9], 181 chunks=2) 182 for sel in [ data.selectSamples(data.idsbylabels([2, 3])), 183 data.select('labels', [2, 3]), 184 data.select('labels', [2, 3], labels=[1, 2, 3, 4]), 185 data.select('labels', [2, 3], chunks=[1, 2, 3, 4]), 186 data['labels':[2, 3], 'chunks':[1, 2, 3, 4]], 187 data['chunks':[1, 2, 3, 4], 'labels':[2, 3]], 188 ]: 189 self.failUnless(N.all(sel.origids == [ 3., 4., 5., 7.])) 190 191 # lets cause it to compute unique labels 192 self.failUnless( (data.uniquelabels == [2, 3, 4, 8, 9]).all() ); 193 194 195 # select some samples removing some labels completely 196 sel = data.selectSamples(data.idsbylabels([3, 4, 8, 9])) 197 self.failUnlessEqual(set(sel.uniquelabels), set([3, 4, 8, 9])) 198 self.failUnless((sel.origids == [0, 1, 2, 3, 4, 5, 6, 8, 9]).all())
199 200
201 - def testEvilSelects(self):
202 """Test some obscure selections of samples via select() or __getitem__ 203 """ 204 origdata = datasets['uni2large'].samples[:100, :10].T 205 data = Dataset(samples=origdata, 206 # 0 1 2 3 4 5 6 7 8 9 207 labels=[8, 9, 4, 3, 3, 3, 3, 2, 8, 9], 208 chunks=[1, 2, 3, 2, 3, 1, 5, 6, 3, 6]) 209 210 # malformed getitem 211 if __debug__: 212 # check is enforced only in __debug__ 213 self.failUnlessRaises(ValueError, data.__getitem__, 214 'labels', 'featu') 215 216 # too many indicies 217 self.failUnlessRaises(ValueError, data.__getitem__, 1, 1, 1) 218 219 # various getitems which should carry the same result 220 for sel in [ data.select('chunks', [2, 6], labels=[3, 2], 221 features=slice(None)), 222 data.select('all', 'all', labels=[2,3], chunks=[2, 6]), 223 data['chunks', [2, 6], 'labels', [3, 2]], 224 data[:, :, 'chunks', [2, 6], 'labels', [3, 2]], 225 # get warnings but should work as the rest for now 226 data[3:8, 'chunks', [2, 6, 2, 6], 'labels', [3, 2]], 227 ]: 228 self.failUnless(N.all(sel.origids == [3, 7])) 229 self.failUnless(sel.nfeatures == 100) 230 self.failUnless(N.all(sel.samples == origdata[ [3, 7] ])) 231 232 target = origdata[ [3, 7] ] 233 target = target[:, [1, 3] ] 234 # various getitems which should carry the same result 235 for sel in [ data.select('all', [1, 3], 236 'chunks', [2, 6], labels=[3, 2]), 237 data[:, [1,3], 'chunks', [2, 6], 'labels', [3, 2]], 238 data[:, [1,3], 'chunks', [2, 6], 'labels', [3, 2]], 239 # get warnings but should work as the rest for now 240 data[3:8, [1, 1, 3, 1], 241 'chunks', [2, 6, 2, 6], 'labels', [3, 2]], 242 ]: 243 self.failUnless(N.all(sel.origids == [3, 7])) 244 self.failUnless(sel.nfeatures == 2) 245 self.failUnless(N.all(sel.samples == target)) 246 247 # Check if we get empty selection if requesting impossible 248 self.failUnless(data.select(chunks=[23]).nsamples == 0) 249 250 # Check .where() 251 self.failUnless(N.all(data.where(chunks=[2, 6])==[1, 3, 7, 9])) 252 self.failUnless(N.all(data.where(chunks=[2, 6], labels=[22, 3])==[3])) 253 # both samples and features 254 idx = data.where('all', [1, 3, 10], labels=[2, 3, 4]) 255 self.failUnless(N.all(idx[1] == [1, 3, 10])) 256 self.failUnless(N.all(idx[0] == range(2, 8))) 257 # empty query 258 self.failUnless(data.where() is None) 259 # empty result 260 self.failUnless(data.where(labels=[123]) == [])
261 262
264 data = Dataset(samples=N.arange( 20 ).reshape( (4, 5) ), 265 labels=1, 266 chunks=1) 267 268 self.failUnless( data.nsamples == 4 ) 269 self.failUnless( data.nfeatures == 5 ) 270 fsel = data.selectFeatures([1, 2]) 271 fpsel = fsel.selectSamples([0, 3]) 272 self.failUnless( fpsel.nsamples == 2 ) 273 self.failUnless( fpsel.nfeatures == 2 ) 274 275 self.failUnless( (fpsel.samples == [[1, 2], [16, 17]]).all() )
276 277
278 - def testPatternMerge(self):
279 data1 = Dataset(samples=N.ones((5, 5)), labels=1, chunks=1 ) 280 data2 = Dataset(samples=N.ones((3, 5)), labels=2, chunks=1 ) 281 282 merged = data1 + data2 283 284 self.failUnless( merged.nfeatures == 5 ) 285 l12 = [1]*5 + [2]*3 286 l1 = [1]*8 287 self.failUnless( (merged.labels == l12).all() ) 288 self.failUnless( (merged.chunks == l1).all() ) 289 290 data1 += data2 291 292 self.failUnless( data1.nfeatures == 5 ) 293 self.failUnless( (data1.labels == l12).all() ) 294 self.failUnless( (data1.chunks == l1).all() )
295 296
298 """ 299 """ 300 data = Dataset(samples=N.ones((5, 1)), labels=range(5), chunks=1 ) 301 data += Dataset(samples=N.ones((5, 1))+1, labels=range(5), chunks=2 ) 302 data += Dataset(samples=N.ones((5, 1))+2, labels=range(5), chunks=3 ) 303 data += Dataset(samples=N.ones((5, 1))+3, labels=range(5), chunks=4 ) 304 data += Dataset(samples=N.ones((5, 1))+4, labels=range(5), chunks=5 ) 305 self.failUnless( data.samplesperlabel == {0:5, 1:5, 2:5, 3:5, 4:5} ) 306 307 308 sample = data.getRandomSamples( 2 ) 309 self.failUnless( sample.samplesperlabel.values() == [ 2, 2, 2, 2, 2 ] ) 310 311 self.failUnless( (data.uniquechunks == range(1, 6)).all() ) 312 313 # store the old labels 314 origlabels = data.labels.copy() 315 316 data.permuteLabels(True) 317 318 self.failIf( (data.labels == origlabels).all() ) 319 320 data.permuteLabels(False) 321 322 self.failUnless( (data.labels == origlabels).all() ) 323 324 # now try another object with the same data 325 data2 = Dataset(samples=data.samples, 326 labels=data.labels, 327 chunks=data.chunks ) 328 329 # labels are the same as the originals 330 self.failUnless( (data2.labels == origlabels).all() ) 331 332 # now permute in the new object 333 data2.permuteLabels( True ) 334 335 # must not affect the old one 336 self.failUnless( (data.labels == origlabels).all() ) 337 # but only the new one 338 self.failIf( (data2.labels == origlabels).all() )
339 340
341 - def testAttributes(self):
342 """Test adding custom attributes to a dataset 343 """ 344 #class BlobbyDataset(Dataset): 345 # pass 346 # TODO: we can't assign attributes to those for now... 347 ds = Dataset(samples=range(5), labels=1, chunks=1) 348 self.failUnlessRaises(AttributeError, lambda x:x.blobs, ds) 349 """Dataset.blobs should fail since .blobs wasn't yet registered""" 350 351 #register new attribute but it would alter only new instances 352 Dataset._registerAttribute("blobs", "_data", hasunique=True) 353 ds = Dataset(samples=range(5), labels=1, chunks=1) 354 self.failUnless(not ds.blobs != [ 0 ], 355 msg="By default new attributes supposed to get 0 as the value") 356 357 try: 358 ds.blobs = [1, 2] 359 self.fail(msg="Dataset.blobs=[1,2] should fail since " 360 "there is 5 samples") 361 except ValueError, e: 362 pass 363 364 try: 365 ds.blobs = [1] 366 except e: 367 self.fail(msg="We must be able to assign the attribute")
368 369 # Dataset still shouldn't have blobs... just BlobbyDataset 370 #self.failUnlessRaises(AttributeError, lambda x:x.blobs, 371 # Dataset(samples=range(5), labels=1, chunks=1)) 372 373
374 - def testRequiredAtrributes(self):
375 """Verify that we have required attributes 376 """ 377 self.failUnlessRaises(DatasetError, Dataset) 378 self.failUnlessRaises(DatasetError, Dataset, samples=[1]) 379 self.failUnlessRaises(DatasetError, Dataset, labels=[1]) 380 try: 381 ds = Dataset(samples=[1], labels=[1]) 382 except: 383 self.fail(msg="samples and labels are 2 required parameters") 384 assert(ds is not None) # silence pylint
385 386
387 - def testZScoring(self):
388 """Test z-scoring transformation 389 """ 390 # dataset: mean=2, std=1 391 samples = N.array( (0,1,3,4,2,2,3,1,1,3,3,1,2,2,2,2) ).\ 392 reshape((16, 1)) 393 data = Dataset(samples=samples, 394 labels=range(16), chunks=[0]*16) 395 self.failUnlessEqual( data.samples.mean(), 2.0 ) 396 self.failUnlessEqual( data.samples.std(), 1.0 ) 397 zscore(data, perchunk=True) 398 399 # check z-scoring 400 check = N.array([-2,-1,1,2,0,0,1,-1,-1,1,1,-1,0,0,0,0], 401 dtype='float64').reshape(16,1) 402 self.failUnless( (data.samples == check).all() ) 403 404 data = Dataset(samples=samples, 405 labels=range(16), chunks=[0]*16) 406 zscore(data, perchunk=False) 407 self.failUnless( (data.samples == check).all() ) 408 409 # check z-scoring taking set of labels as a baseline 410 data = Dataset(samples=samples, 411 labels=[0, 2, 2, 2, 1] + [2]*11, 412 chunks=[0]*16) 413 zscore(data, baselinelabels=[0, 1]) 414 self.failUnless((samples == data.samples+1.0).all())
415 416
417 - def testAggregation(self):
418 data = Dataset(samples=N.arange( 20 ).reshape( (4, 5) ), 419 labels=1, 420 chunks=1) 421 422 ag_data = aggregateFeatures(data, N.mean) 423 424 self.failUnless(ag_data.nsamples == 4) 425 self.failUnless(ag_data.nfeatures == 1) 426 self.failUnless((ag_data.samples[:, 0] == [2, 7, 12, 17]).all())
427 428
429 - def testApplyMapper(self):
430 """Test creation of new dataset by applying a mapper""" 431 mapper = MaskMapper(N.array([1, 0, 1])) 432 dataset = Dataset(samples=N.arange(12).reshape( (4, 3) ), 433 labels=1, 434 chunks=1) 435 seldataset = dataset.applyMapper(featuresmapper=mapper) 436 self.failUnless( (dataset.selectFeatures([0, 2]).samples 437 == seldataset.samples).all() ) 438 439 # Lets do simple test on maskmapper reverse since it seems to 440 # do evil things. Those checks are done only in __debug__ 441 if __debug__: 442 # should fail since in mask we have just 2 features now 443 self.failUnlessRaises(ValueError, mapper.reverse, [10, 20, 30]) 444 self.failUnlessRaises(ValueError, mapper.forward, [10, 20])
445 446 # XXX: the intended test is added as SampleGroupMapper test 447 # self.failUnlessRaises(NotImplementedError, 448 # dataset.applyMapper, None, [1]) 449 # """We don't yet have implementation for samplesmapper -- 450 # if we get one -- remove this check and place a test""" 451 452
453 - def testId(self):
454 """Test Dataset.idhash() if it gets changed if any of the 455 labels/chunks changes 456 """ 457 458 dataset = Dataset(samples=N.arange(12).reshape( (4, 3) ), 459 labels=1, 460 chunks=1) 461 origid = dataset.idhash 462 dataset.labels = [3, 1, 2, 3] # change all labels 463 self.failUnless(origid != dataset.idhash, 464 msg="Changing all labels should alter dataset's idhash") 465 466 origid = dataset.idhash 467 468 z = dataset.labels[1] 469 self.failUnlessEqual(origid, dataset.idhash, 470 msg="Accessing shouldn't change idhash") 471 z = dataset.chunks 472 self.failUnlessEqual(origid, dataset.idhash, 473 msg="Accessing shouldn't change idhash") 474 z[2] = 333 475 self.failUnless(origid != dataset.idhash, 476 msg="Changing value in attribute should change idhash") 477 478 origid = dataset.idhash 479 dataset.samples[1, 1] = 1000 480 self.failUnless(origid != dataset.idhash, 481 msg="Changing value in data should change idhash") 482 483 484 origid = dataset.idhash 485 dataset.permuteLabels(True) 486 self.failUnless(origid != dataset.idhash, 487 msg="Permutation also changes idhash") 488 489 dataset.permuteLabels(False) 490 self.failUnless(origid == dataset.idhash, 491 msg="idhash should be restored after " 492 "permuteLabels(False)")
493 494
496 dataset = Dataset(samples=N.arange(12).reshape((4, 3)), 497 labels=1, 498 chunks=1) 499 500 mask = dataset.convertFeatureIds2FeatureMask(range(dataset.nfeatures)) 501 self.failUnless(len(mask) == dataset.nfeatures) 502 self.failUnless((mask == True).all()) 503 504 self.failUnless( 505 (dataset.convertFeatureMask2FeatureIds(mask) == range(3)).all()) 506 507 mask[1] = False 508 509 self.failUnless( 510 (dataset.convertFeatureMask2FeatureIds(mask) == [0, 2]).all())
511 512
513 - def testSummary(self):
514 """Dummy test""" 515 ds = datasets['uni2large'] 516 ds = ds[N.random.permutation(range(ds.nsamples))[:20]] 517 summary = ds.summary() 518 self.failUnless(len(summary)>40)
519 520
521 - def testLabelsMapping(self):
522 """Test mapping of the labels from strings to numericals 523 """ 524 od = {'apple':0, 'orange':1} 525 samples = [[3], [2], [3]] 526 labels_l = ['apple', 'orange', 'apple'] 527 528 # test broadcasting of the label 529 ds = Dataset(samples=samples, labels='orange') 530 self.failUnless(N.all(ds.labels == ['orange']*3)) 531 532 # Test basic mapping of litteral labels 533 for ds in [Dataset(samples=samples, labels=labels_l, labels_map=od), 534 # Figure out mapping 535 Dataset(samples=samples, labels=labels_l, labels_map=True)]: 536 self.failUnless(N.all(ds.labels == [0, 1, 0])) 537 self.failUnless(ds.labels_map == od) 538 ds_ = ds[1] 539 self.failUnless(ds_.labels_map == od, 540 msg='selectSamples should provide full mapping preserved') 541 542 # We should complaint about insufficient mapping 543 self.failUnlessRaises(ValueError, Dataset, samples=samples, 544 labels=labels_l, labels_map = {'apple':0}) 545 546 # Conformance to older behavior -- if labels are given in 547 # strings, no mapping occur by default 548 ds2 = Dataset(samples=samples, labels=labels_l) 549 self.failUnlessEqual(ds2.labels_map, None) 550 551 # We should label numerical labels if it was requested: 552 od3 = {1:100, 2:101, 3:100} 553 ds3 = Dataset(samples=samples, labels=[1, 2, 3], 554 labels_map=od3) 555 self.failUnlessEqual(ds3.labels_map, od3) 556 self.failUnless(N.all(ds3.labels == [100, 101, 100])) 557 558 ds3_ = ds3[1] 559 self.failUnlessEqual(ds3.labels_map, od3) 560 561 ds4 = Dataset(samples=samples, labels=labels_l) 562 563 # Lets check setting the labels map 564 ds = Dataset(samples=samples, labels=labels_l, labels_map=od) 565 566 self.failUnlessRaises(ValueError, ds.setLabelsMap, 567 {'orange': 1, 'nonorange': 3}) 568 new_map = {'tasty':0, 'crappy':1} 569 ds.labels_map = new_map.copy() 570 self.failUnlessEqual(ds.labels_map, new_map)
571 572
574 """Adding datasets needs special care whenever labels mapping 575 is used.""" 576 samples = [[3], [2], [3]] 577 l1 = ['a', 'b', 'a'] 578 l2 = ['b', 'a', 'c'] 579 ds1 = Dataset(samples=samples, labels=l1, 580 labels_map={'a':1, 'b':2}) 581 ds2 = Dataset(samples=samples, labels=l2, 582 labels_map={'c':1, 'a':4, 'b':2}) 583 584 # some dataset without mapping 585 ds0 = Dataset(samples=samples, labels=l2) 586 587 # original mappings 588 lm1 = ds1.labels_map.copy() 589 lm2 = ds2.labels_map.copy() 590 591 ds3 = ds1 + ds2 592 self.failUnless(N.all(ds3.labels == 593 N.hstack((ds1.labels, [2, 1, 5])))) 594 self.failUnless(ds1.labels_map == lm1) 595 self.failUnless(ds2.labels_map == lm2) 596 597 # check iadd 598 ds1 += ds2 599 self.failUnless(N.all(ds1.labels == ds3.labels)) 600 601 # it should be deterministic 602 self.failUnless(N.all(ds1.labels_map == ds3.labels_map)) 603 604 # don't allow to add datasets where one of them doesn't have a labels_map 605 # whenever the other one does 606 self.failUnlessRaises(ValueError, ds1.__add__, ds0) 607 self.failUnlessRaises(ValueError, ds1.__iadd__, ds0)
608 609
610 - def testCopy(self):
611 # lets use some instance of somewhat evolved dataset 612 ds = datasets['uni2small'] 613 # Clone the beast 614 ds_ = ds.copy() 615 # verify that we have the same data 616 self.failUnless(N.all(ds.samples == ds_.samples)) 617 self.failUnless(N.all(ds.labels == ds_.labels)) 618 self.failUnless(N.all(ds.chunks == ds_.chunks)) 619 620 # modify and see if we don't change data in the original one 621 ds_.samples[0, 0] = 1234 622 self.failUnless(N.any(ds.samples != ds_.samples)) 623 self.failUnless(N.all(ds.labels == ds_.labels)) 624 self.failUnless(N.all(ds.chunks == ds_.chunks)) 625 626 ds_.labels = N.hstack(([123], ds_.labels[1:])) 627 self.failUnless(N.any(ds.samples != ds_.samples)) 628 self.failUnless(N.any(ds.labels != ds_.labels)) 629 self.failUnless(N.all(ds.chunks == ds_.chunks)) 630 631 ds_.chunks = N.hstack(([1234], ds_.chunks[1:])) 632 self.failUnless(N.any(ds.samples != ds_.samples)) 633 self.failUnless(N.any(ds.labels != ds_.labels)) 634 self.failUnless(N.any(ds.chunks != ds_.chunks)) 635 636 self.failUnless(N.any(ds.uniquelabels != ds_.uniquelabels)) 637 self.failUnless(N.any(ds.uniquechunks != ds_.uniquechunks))
638 639
640 - def testIdsonboundaries(self):
641 """Test detection of transition points 642 643 Shame on Yarik -- he didn't create unittests right away... damn me 644 """ 645 ds = Dataset(samples=N.array(range(10), ndmin=2).T, 646 labels=[0,0,1,1,0,0,1,1,0,0], 647 chunks=[0,0,0,0,0,1,1,1,1,1]) 648 self.failUnless(ds.idsonboundaries() == [0,2,4,5,6,8], 649 "We should have got ids whenever either chunk or " 650 "label changes") 651 self.failUnless(ds.idsonboundaries(attributes_to_track=['chunks']) 652 == [0, 5]) 653 # Preceding samples 654 self.failUnless(ds.idsonboundaries(prior=1, post=-1, 655 attributes_to_track=['chunks']) 656 == [4, 9]) 657 self.failUnless(ds.idsonboundaries(prior=2, post=-1, 658 attributes_to_track=['chunks']) 659 == [3, 4, 8, 9]) 660 self.failUnless(ds.idsonboundaries(prior=2, post=-1, 661 attributes_to_track=['chunks'], 662 revert=True) 663 == [0, 1, 2, 5, 6, 7]) 664 self.failUnless(ds.idsonboundaries(prior=1, post=1, 665 attributes_to_track=['chunks']) 666 == [0, 1, 4, 5, 6, 9]) 667 # all should be there 668 self.failUnless(ds.idsonboundaries(prior=2) == range(10))
669 670
671 -def suite():
672 return unittest.makeSuite(DatasetTests)
673 674 675 if __name__ == '__main__': 676 import runner 677