![]() |
Multivariate Pattern Analysis in Python |
Runs both classifiers on the the same dataset and compare their performance. This example also shows an example usage of confusion matrices and how two classifers can be combined.
>>> from mvpa.suite import *
>>>
>>> if __debug__:
>>> debug.active.append('SMLR_')
>>>
>>> # features of sample data
>>> print "Generating samples..."
>>> nfeat = 10000
>>> nsamp = 100
>>> ntrain = 90
>>> goodfeat = 10
>>> offset = .5
>>>
>>> # create the sample datasets
>>> samp1 = N.random.randn(nsamp,nfeat)
>>> samp1[:,:goodfeat] += offset
>>>
>>> samp2 = N.random.randn(nsamp,nfeat)
>>> samp2[:,:goodfeat] -= offset
>>>
>>> # create the pymvpa training dataset from the labeled features
>>> patternsPos = Dataset(samples=samp1[:ntrain,:], labels=1)
>>> patternsNeg = Dataset(samples=samp2[:ntrain,:], labels=0)
>>> trainpat = patternsPos + patternsNeg
>>>
>>> # create patters for the testing dataset
>>> patternsPos = Dataset(samples=samp1[ntrain:,:], labels=1)
>>> patternsNeg = Dataset(samples=samp2[ntrain:,:], labels=0)
>>> testpat = patternsPos + patternsNeg
>>>
>>> # set up the SMLR classifier
>>> print "Evaluating SMLR classifier..."
>>> smlr = SMLR(fit_all_weights=True)
>>>
>>> # enable saving of the values used for the prediction
>>> smlr.states.enable('values')
>>>
>>> # train with the known points
>>> smlr.train(trainpat)
>>>
>>> # run the predictions on the test values
>>> pre = smlr.predict(testpat.samples)
>>>
>>> # calculate the confusion matrix
>>> smlr_confusion = ConfusionMatrix(
>>> labels=trainpat.uniquelabels, targets=testpat.labels,
>>> predictions=pre)
>>>
>>> # now do the same for a linear SVM
>>> print "Evaluating Linear SVM classifier..."
>>> lsvm = LinearNuSVMC(probability=1)
>>>
>>> # enable saving of the values used for the prediction
>>> lsvm.states.enable('values')
>>>
>>> # train with the known points
>>> lsvm.train(trainpat)
>>>
>>> # run the predictions on the test values
>>> pre = lsvm.predict(testpat.samples)
>>>
>>> # calculate the confusion matrix
>>> lsvm_confusion = ConfusionMatrix(
>>> labels=trainpat.uniquelabels, targets=testpat.labels,
>>> predictions=pre)
>>>
>>> # now train SVM with selected features
>>> print "Evaluating Linear SVM classifier with SMLR's features..."
>>>
>>> keepInd = (N.abs(smlr.weights).mean(axis=1)!=0)
>>> newtrainpat = trainpat.selectFeatures(keepInd, sort=False)
>>> newtestpat = testpat.selectFeatures(keepInd, sort=False)
>>>
>>> # train with the known points
>>> lsvm.train(newtrainpat)
>>>
>>> # run the predictions on the test values
>>> pre = lsvm.predict(newtestpat.samples)
>>>
>>> # calculate the confusion matrix
>>> lsvm_confusion_sparse = ConfusionMatrix(
>>> labels=newtrainpat.uniquelabels, targets=newtestpat.labels,
>>> predictions=pre)
>>>
>>>
>>> print "SMLR Percent Correct:\t%g%% (Retained %d/%d features)" % \
>>> (smlr_confusion.percentCorrect,
>>> (smlr.weights!=0).sum(), N.prod(smlr.weights.shape))
>>> print "linear-SVM Percent Correct:\t%g%%" % \
>>> (lsvm_confusion.percentCorrect)
>>> print "linear-SVM Percent Correct (with %d features from SMLR):\t%g%%" % \
>>> (keepInd.sum(), lsvm_confusion_sparse.percentCorrect)
See also
The full source code of this example is included in the PyMVPA source distribution (doc/examples/smlr.py).