Examples

Each of the following examples is a stand-alone script containing all necessary code to run some analysis. All examples are shipped with PyMVPA and can be found in the doc/examples/ directory in the source package. This directory include some more special-interest examples which are not listed here.

Some examples need to access sample dataset available under data/ directory within root of PyMVPA hierarchy, thus they have to be invoked directly from PyMVPA root (e.g. doc/examples/searchlight_2d.py).

Simple Plotting of Classifier Behavior

This example runs a number of classifiers on a simple dataset and plots the decision surface of each classifier.

#!/usr/bin/env python
#emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Example demonstrating a simple classifiction of a 2-D dataset"""

from mvpa.suite import *
"""
# Command above substitutes the following list

import numpy as N
import pylab as P

# local imports
from mvpa.datasets import Dataset
from mvpa.clfs.plr import PLR
from mvpa.clfs.ridge import RidgeReg
from mvpa.clfs.svm import RbfNuSVMC,LinearNuSVMC
from mvpa.clfs.knn import kNN
"""


# set up the labeled data
# two skewed 2-D distributions
num_dat = 200
dist = 4
feat_pos=N.random.randn(2, num_dat)
feat_pos[0, :] *= 2.
feat_pos[1, :] *= .5
feat_pos[0, :] += dist
feat_neg=N.random.randn(2, num_dat)
feat_neg[0, :] *= .5
feat_neg[1, :] *= 2.
feat_neg[0, :] -= dist

# set up the testing features
x1 = N.linspace(-10, 10, 100)
x2 = N.linspace(-10, 10, 100)
x,y = N.meshgrid(x1, x2);
feat_test = N.array((N.ravel(x), N.ravel(y)))

# create the pymvpa dataset from the labeled features
patternsPos = Dataset(samples=feat_pos.T, labels=1)
patternsNeg = Dataset(samples=feat_neg.T, labels=0)
patterns = patternsPos + patternsNeg

# set up classifiers to try out
clfs = {'Ridge Regression': RidgeReg(),
        'Linear SVM': LinearNuSVMC(probability=1,
                      enable_states=['probabilities']),
        'RBF SVM': RbfNuSVMC(probability=1,
                      enable_states=['probabilities']),
        'Logistic Regression': PLR(criterion=0.00001),
        'k-Nearest-Neighbour': kNN(k=10)}

# loop over classifiers and show how they do
fig = 0

# make a new figure
P.figure(figsize=(8,12))
for c in clfs:
    # tell which one we are doing
    print "Running %s classifier..." % (c)

    # make a new subplot for each classifier
    fig += 1
    P.subplot(3,2,fig)

    # plot the training points
    P.plot(feat_pos[0, :], feat_pos[1, :], "r.")
    P.plot(feat_neg[0, :], feat_neg[1, :], "b.")

    # select the clasifier
    clf = clfs[c]

    # enable saving of the values used for the prediction
    clf.states.enable('values')

    # train with the known points
    clf.train(patterns)

    # run the predictions on the test values
    pre = clf.predict(feat_test.T)

    # if ridge, use the prediction, otherwise use the values
    if c == 'Ridge Regression' or c == 'k-Nearest-Neighbour':
        # use the prediction
        res = N.asarray(pre)
    elif c == 'Logistic Regression':
        # get out the values used for the prediction
        res = N.asarray(clf.values)
    else:
        # get the probabilities from the svm
        res = N.asarray([(q[1][1] - q[1][0] + 1) / 2
                for q in clf.probabilities])

    # reshape the results
    z = N.asarray(res).reshape((100, 100))

    # plot the predictions
    P.pcolor(x, y, z, shading='interp')
    P.clim(0, 1)
    P.colorbar()
    P.contour(x, y, z, linewidths=1, colors='black', hold=True)

    # add the title
    P.title(c)

# show all the cool figures
P.show()

Easy Searchlight

Run a searchlight analysis on the example fMRI dataset that is shipped with PyMVPA. This example is part of the PyMVPA source distribution: doc/examples/searchlight_2d.py`.

#!/usr/bin/python
#emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Example demonstrating a searchlight analysis on an fMRI dataset"""

from mvpa.suite import *
"""
# Command above substitutes commands below

import numpy as N
import pylab as P

# local imports
from mvpa.misc.iohelpers import SampleAttributes
from mvpa.datasets.niftidataset import NiftiDataset
from mvpa.datasets.misc import zscore
from mvpa.misc.signal import detrend
from mvpa.clfs.knn import kNN
from mvpa.clfs.svm import LinearNuSVMC
from mvpa.clfs.transerror import TransferError
from mvpa.datasets.splitter import NFoldSplitter, OddEvenSplitter
from mvpa.algorithms.cvtranserror import CrossValidatedTransferError
from mvpa.measures.searchlight import Searchlight
from mvpa.misc import debug
"""

# enable debug output for searchlight call
debug.active += ["SLC"]


#
# load PyMVPA example dataset
#
attr = SampleAttributes('data/attributes.txt')
dataset = NiftiDataset(samples='data/bold.nii.gz',
                       labels=attr.labels,
                       chunks=attr.chunks,
                       mask='data/mask.nii.gz')

#
# preprocessing
#

# do chunkswise linear detrending on dataset
detrend(dataset, perchunk=True, model='linear')

# only use 'rest', 'house' and 'scrambled' samples from dataset
dataset = dataset.selectSamples(
                N.array([ l in [0,2,6] for l in dataset.labels], dtype='bool'))

# zscore dataset relative to baseline ('rest') mean
zscore(dataset, perchunk=True, baselinelabels=[0], targetdtype='float32')

# remove baseline samples from dataset for final analysis
dataset = dataset.selectSamples(N.array([l != 0 for l in dataset.labels],
                                        dtype='bool'))

#
# Run Searchlight
#

# choose classifier
clf = LinearNuSVMC()

# setup measure to be computed by Searchlight
# cross-validated mean transfer using an odd-even dataset splitter
cv = CrossValidatedTransferError(TransferError(clf),
                                 NFoldSplitter())

# setup plotting
fig = 0
P.figure(figsize=(12,4))


for radius in [1,5,10]:
    # tell which one we are doing
    print "Running searchlight with radius: %i ..." % (radius)

    # setup Searchlight with a custom radius
    # radius has to be in the same unit as the nifti file's pixdim property.
    sl = Searchlight(cv, radius=radius)

    # run searchlight on example dataset and retrieve error map
    sl_map = sl(dataset)

    # map sensitivity map into original dataspace
    orig_sl_map = dataset.mapReverse(N.array(sl_map))
    masked_orig_sl_map = N.ma.masked_array(orig_sl_map, mask=orig_sl_map == 0)

    # make a new subplot for each classifier
    fig += 1
    P.subplot(1,3,fig)

    P.title('Radius %i' % radius)

    P.imshow(masked_orig_sl_map[0],
             interpolation='nearest',
             aspect=1.25,
             cmap=P.cm.autumn)
    P.clim(0.5, 0.65)
    P.colorbar(shrink=0.6)


# show all the cool figures
P.show()

Sensitivity Measure

Run some basic and meta sensitivity measures on the example fMRI dataset that comes with PyMVPA and plot the computed featurewise measures for each.

#!/usr/bin/env python
#emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Example demonstrating some FeaturewiseDatasetMeasures performing on a fMRI
dataset with brain activity recorded while perceiving images of objects
(shoes vs. chairs).

Generated images show sensitivity maps computed by six sensitivity analyzers.

This example assumes that the PyMVPA example dataset is located in data/.
"""

from mvpa.suite import *
"""
# Command above substitutes commands below

import numpy as N
import pylab as P

# local imports
from mvpa.datasets.niftidataset import NiftiDataset
from mvpa.misc.iohelpers import SampleAttributes
from mvpa.measures.anova import OneWayAnova
from mvpa.clfs.svm import LinearNuSVMC
from mvpa.datasets.misc import zscore
from mvpa.misc.signal import detrend
from mvpa.measures.splitmeasure import SplitFeaturewiseMeasure
from mvpa.datasets.splitter import OddEvenSplitter, NFoldSplitter
"""

# load PyMVPA example dataset
attr = SampleAttributes('data/attributes.txt')
dataset = NiftiDataset(samples='data/bold.nii.gz',
                       labels=attr.labels,
                       chunks=attr.chunks,
                       mask='data/mask.nii.gz')

# define sensitivity analyzer
sensanas = {'a) ANOVA': OneWayAnova(transformer=N.abs),
            'b) Linear SVM weights': LinearNuSVMC().getSensitivityAnalyzer(
                                                       transformer=N.abs),
            'c) Splitting ANOVA (odd-even)':
                SplitFeaturewiseMeasure(OneWayAnova(transformer=N.abs),
                                             OddEvenSplitter()),
            'd) Splitting SVM (odd-even)':
                SplitFeaturewiseMeasure(
                    LinearNuSVMC().getSensitivityAnalyzer(transformer=N.abs),
                                     OddEvenSplitter()),
            'e) Splitting ANOVA (nfold)':
                SplitFeaturewiseMeasure(OneWayAnova(transformer=N.abs),
                                             NFoldSplitter()),
            'f) Splitting SVM (nfold)':
                SplitFeaturewiseMeasure(
                    LinearNuSVMC().getSensitivityAnalyzer(transformer=N.abs),
                                     NFoldSplitter())
           }

# do chunkswise linear detrending on dataset
detrend(dataset, perchunk=True, model='linear')

# only use 'rest', 'shoe' and 'bottle' samples from dataset
dataset = dataset.selectSamples(
                N.array([ l in [0,3,7] for l in dataset.labels], dtype='bool'))

# zscore dataset relative to baseline ('rest') mean
zscore(dataset, perchunk=True, baselinelabels=[0], targetdtype='float32')

# remove baseline samples from dataset for final analysis
dataset = dataset.selectSamples(N.array([l != 0 for l in dataset.labels],
                                        dtype='bool'))

fig = 0
P.figure(figsize=(8,8))

keys = sensanas.keys()
keys.sort()

for s in keys:
    # tell which one we are doing
    print "Running %s ..." % (s)

    # compute sensitivies
    smap = sensanas[s](dataset)

    # map sensitivity map into original dataspace
    orig_smap = dataset.mapReverse(smap)
    masked_orig_smap = N.ma.masked_array(orig_smap, mask=orig_smap == 0)

    # make a new subplot for each classifier
    fig += 1
    P.subplot(3,2,fig)

    P.title(s)

    P.imshow(masked_orig_smap[0],
             interpolation='nearest',
             aspect=1.25,
             cmap=P.cm.autumn)

    # uniform scaling per base sensitivity analyzer
    if s.count('ANOVA'):
        P.clim(0, 0.4)
    else:
        P.clim(0, 0.055)

    P.colorbar(shrink=0.6)


# show all the cool figures
P.show()

Classification of SVD-mapped Datasets

Demonstrate the usage of a dataset mapper performing singular value decomposition within a cross-validation.

#!/usr/bin/python
#emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Example demonstrating a how to use data projection onto SVD components
for *any* clasifier"""

from mvpa.suite import *
"""
# Command above substitutes commands below

import numpy as N
import pylab as P

# local imports
from mvpa.misc.iohelpers import SampleAttributes
from mvpa.datasets.niftidataset import NiftiDataset
from mvpa.datasets.misc import zscore
from mvpa.misc.signal import detrend
from mvpa.clfs.transerror import TransferError
from mvpa.datasets.splitter import NFoldSplitter
from mvpa.algorithms.cvtranserror import CrossValidatedTransferError
from mvpa.clfs.svm import LinearCSVMC
from mvpa.clfs.base import MappedClassifier
from mvpa.mappers import SVDMapper

from mvpa.misc import debug
"""

debug.active += ["CROSSC"]

# plotting helper function
def makeBarPlot(data, labels=None, title=None, ylim=None, ylabel=None):
    xlocations = N.array(range(len(data))) + 0.5
    width = 0.5

    # work with arrays
    data = N.array(data)

    # plot bars
    plot = P.bar(xlocations,
                 data.mean(axis=1),
                 yerr=data.std(axis=1) / N.sqrt(data.shape[1]),
                 width=width,
                 color='0.6',
                 ecolor='black')
    P.axhline(0.5, ls='--', color='0.4')

    if ylim:
        P.ylim(*(ylim))
    if title:
        P.title(title)

    if labels:
        P.xticks(xlocations+ width/2, labels)

    if ylabel:
        P.ylabel(ylabel)

    P.xlim(0, xlocations[-1]+width*2)


#
# load PyMVPA example dataset
#
attr = SampleAttributes('data/attributes.txt')
dataset = NiftiDataset(samples='data/bold.nii.gz',
                       labels=attr.labels,
                       chunks=attr.chunks,
                       mask='data/mask.nii.gz')

#
# preprocessing
#

# do chunkswise linear detrending on dataset
detrend(dataset, perchunk=True, model='linear')

# only use 'rest', 'face' and 'house' samples from dataset
dataset = dataset.selectSamples(
                N.array([ l in [0,4,5] for l in dataset.labels], dtype='bool'))

# zscore dataset relative to baseline ('rest') mean
zscore(dataset, perchunk=True, baselinelabels=[0], targetdtype='float32')

# remove baseline samples from dataset for final analysis
dataset = dataset.selectSamples(N.array([l != 0 for l in dataset.labels],
                                        dtype='bool'))

# define some classifiers: a simple one and several classifiers with built-in
# SVDs
clfs = [('All orig. features', LinearCSVMC()),
        ('All PCs', MappedClassifier(LinearCSVMC(), SVDMapper())),
        ('First 3 PCs', MappedClassifier(LinearCSVMC(),
                        SVDMapper(selector=range(5)))),
        ('First 50 PCs', MappedClassifier(LinearCSVMC(),
                        SVDMapper(selector=range(50)))),
        ('PCs 3-50', MappedClassifier(LinearCSVMC(),
                        SVDMapper(selector=range(3,50))))]


# run and visualize in barplot
results = []
labels = []

for desc, clf in clfs:
    print desc
    cv = CrossValidatedTransferError(
            TransferError(clf),
            NFoldSplitter(),
            enable_states=['results'])
    cv(dataset)

    results.append(cv.results)
    labels.append(desc)

makeBarPlot(results,labels=labels, title='Linear C-SVM classification')
P.show()

Compare SMLR to Linear SVM Classifier

Runs both classifiers on the the same dataset and compare their performance. This example also shows an example usage of confusion matrices and how two classifers can be combined.

#!/usr/bin/env python
#emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
#ex: set sts=4 ts=4 sw=4 et:
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
#   See COPYING file distributed along with the PyMVPA package for the
#   copyright and license terms.
#
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Example demonstrating a SMLR classifier"""

from mvpa.suite import *
"""
# Command above substitutes commands below

import numpy as N

from mvpa.datasets import Dataset
from mvpa.clfs.smlr import SMLR
from mvpa.clfs.svm import LinearNuSVMC
from mvpa.clfs.transerror import ConfusionMatrix
"""

from mvpa.misc import debug
debug.active.append('SMLR_')

# features of sample data
print "Generating samples..."
nfeat = 10000
nsamp = 100
ntrain = 90
goodfeat = 10
offset = .5

# create the sample datasets
samp1 = N.random.randn(nsamp,nfeat)
samp1[:,:goodfeat] += offset

samp2 = N.random.randn(nsamp,nfeat)
samp2[:,:goodfeat] -= offset

# create the pymvpa training dataset from the labeled features
patternsPos = Dataset(samples=samp1[:ntrain,:], labels=1)
patternsNeg = Dataset(samples=samp2[:ntrain,:], labels=0)
trainpat = patternsPos + patternsNeg

# create patters for the testing dataset
patternsPos = Dataset(samples=samp1[ntrain:,:], labels=1)
patternsNeg = Dataset(samples=samp2[ntrain:,:], labels=0)
testpat = patternsPos + patternsNeg

# set up the SMLR classifier
print "Evaluating SMLR classifier..."
smlr = SMLR(fit_all_weights=True)

# enable saving of the values used for the prediction
smlr.states.enable('values')

# train with the known points
smlr.train(trainpat)

# run the predictions on the test values
pre = smlr.predict(testpat.samples)

# calculate the confusion matrix
smlr_confusion = ConfusionMatrix(
    labels=trainpat.uniquelabels, targets=testpat.labels,
    predictions=pre)

# now do the same for a linear SVM
print "Evaluating Linear SVM classifier..."
lsvm = LinearNuSVMC(probability=1)

# enable saving of the values used for the prediction
lsvm.states.enable('values')

# train with the known points
lsvm.train(trainpat)

# run the predictions on the test values
pre = lsvm.predict(testpat.samples)

# calculate the confusion matrix
lsvm_confusion = ConfusionMatrix(
    labels=trainpat.uniquelabels, targets=testpat.labels,
    predictions=pre)

# now train SVM with selected features
print "Evaluating Linear SVM classifier with SMLR's features..."

keepInd = (N.abs(smlr.weights).mean(axis=1)!=0)
newtrainpat = trainpat.selectFeatures(keepInd, sort=False)
newtestpat = testpat.selectFeatures(keepInd, sort=False)

# train with the known points
lsvm.train(newtrainpat)

# run the predictions on the test values
pre = lsvm.predict(newtestpat.samples)

# calculate the confusion matrix
lsvm_confusion_sparse = ConfusionMatrix(
    labels=newtrainpat.uniquelabels, targets=newtestpat.labels,
    predictions=pre)


print "SMLR Percent Correct:\t%g%% (Retained %d/%d features)" % \
    (smlr_confusion.percentCorrect,
     (smlr.weights!=0).sum(), N.prod(smlr.weights.shape))
print "linear-SVM Percent Correct:\t%g%%" % \
    (lsvm_confusion.percentCorrect)
print "linear-SVM Percent Correct (with %d features from SMLR):\t%g%%" % \
    (keepInd.sum(), lsvm_confusion_sparse.percentCorrect)