# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""
This module presents an interface to use the glm implemented in
nipy.algorithms.statistics.models.regression.
It contains the GLM and contrast classes that are meant to be the main objects
of fMRI data analyses.
It is important to note that the GLM is meant as a one-session General Linear
Model. But inference can be performed on multiple sessions by computing fixed
effects on contrasts
Examples
--------
>>> import numpy as np
>>> from nipy.modalities.fmri.glm import GeneralLinearModel
>>> n, p, q = 100, 80, 10
>>> X, Y = np.random.randn(p, q), np.random.randn(p, n)
>>> cval = np.hstack((1, np.zeros(9)))
>>> model = GeneralLinearModel(X)
>>> model.fit(Y)
>>> z_vals = model.contrast(cval).z_score() # z-transformed statistics
Example of fixed effects statistics across two contrasts
>>> cval_ = cval.copy()
>>> np.random.shuffle(cval_)
>>> z_ffx = (model.contrast(cval) + model.contrast(cval_)).z_score()
"""
from __future__ import print_function
from __future__ import absolute_import
import numpy as np
from warnings import warn
import scipy.stats as sps
from nibabel import load, Nifti1Image
from nipy.io.nibcompat import get_header, get_affine
from nipy.labs.mask import compute_mask_sessions
from nipy.algorithms.statistics.models.regression import OLSModel, ARModel
from nipy.algorithms.statistics.utils import multiple_mahalanobis, z_score
from nipy.externals.six import string_types
DEF_TINY = 1e-50
DEF_DOFMAX = 1e10
[docs]def data_scaling(Y):
"""Scaling of the data to have percent of baseline change columnwise
Parameters
----------
Y: array of shape(n_time_points, n_voxels)
the input data
Returns
-------
Y: array of shape (n_time_points, n_voxels),
the data after mean-scaling, de-meaning and multiplication by 100
mean : array of shape (n_voxels,)
the data mean
"""
mean = Y.mean(0)
Y = 100 * (Y / mean - 1)
return Y, mean
[docs]class GeneralLinearModel(object):
""" This class handles the so-called on General Linear Model
Most of what it does in the fit() and contrast() methods
fit() performs the standard two-step ('ols' then 'ar1') GLM fitting
contrast() returns a contrast instance, yileding statistics and p-values.
The link between fit() and constrast is done vis the two class members:
glm_results : dictionary of nipy.algorithms.statistics.models.
regression.RegressionResults instances,
describing results of a GLM fit
labels : array of shape(n_voxels),
labels that associate each voxel with a results key
"""
[docs] def __init__(self, X):
"""
Parameters
----------
X : array of shape (n_time_points, n_regressors)
the design matrix
"""
self.X = X
self.labels_ = None
self.results_ = None
[docs] def fit(self, Y, model='ols', steps=100):
"""GLM fitting of a dataset using 'ols' regression or the two-pass
Parameters
----------
Y : array of shape(n_time_points, n_samples)
the fMRI data
model : {'ar1', 'ols'}, optional
the temporal variance model. Defaults to 'ols'
steps : int, optional
Maximum number of discrete steps for the AR(1) coef histogram
"""
if model not in ['ar1', 'ols']:
raise ValueError('Unknown model')
if Y.ndim == 1:
Y = Y[:, np.newaxis]
if Y.shape[0] != self.X.shape[0]:
raise ValueError('Response and predictors are inconsistent')
# fit the OLS model
ols_result = OLSModel(self.X).fit(Y)
# compute and discretize the AR1 coefs
ar1 = ((ols_result.resid[1:] * ols_result.resid[:-1]).sum(0) /
(ols_result.resid ** 2).sum(0))
ar1 = (ar1 * steps).astype(np.int) * 1. / steps
# Fit the AR model acccording to current AR(1) estimates
if model == 'ar1':
self.results_ = {}
self.labels_ = ar1
# fit the model
for val in np.unique(self.labels_):
m = ARModel(self.X, val)
self.results_[val] = m.fit(Y[:, self.labels_ == val])
else:
self.labels_ = np.zeros(Y.shape[1])
self.results_ = {0.0: ols_result}
[docs] def get_beta(self, column_index=None):
"""Accessor for the best linear unbiased estimated of model parameters
Parameters
----------
column_index: int or array-like of int or None, optional
The indexed of the columns to be returned. if None (default
behaviour), the whole vector is returned
Returns
-------
beta: array of shape (n_voxels, n_columns)
the beta
"""
# make colum_index a list if it an int
if column_index is None:
column_index = np.arange(self.X.shape[1])
if not hasattr(column_index, '__iter__'):
column_index = [int(column_index)]
n_beta = len(column_index)
# build the beta array
beta = np.zeros((n_beta, self.labels_.size), dtype=np.float)
for l in self.results_.keys():
beta[:, self.labels_ == l] = self.results_[l].theta[column_index]
return beta
[docs] def get_mse(self):
"""Accessor for the mean squared error of the model
Returns
-------
mse: array of shape (n_voxels)
the sum of square error per voxel
"""
# build the beta array
mse = np.zeros(self.labels_.size, dtype=np.float)
for l in self.results_.keys():
mse[self.labels_ == l] = self.results_[l].MSE
return mse
[docs] def get_logL(self):
"""Accessor for the log-likelihood of the model
Returns
-------
logL: array of shape (n_voxels,)
the sum of square error per voxel
"""
# build the beta array
logL = np.zeros(self.labels_.size, dtype=np.float)
for l in self.results_.keys():
logL[self.labels_ == l] = self.results_[l].logL
return logL
[docs] def contrast(self, con_val, contrast_type=None):
""" Specify and estimate a linear contrast
Parameters
----------
con_val : numpy.ndarray of shape (p) or (q, p)
where q = number of contrast vectors and p = number of regressors
contrast_type : {None, 't', 'F' or 'tmin-conjunction'}, optional
type of the contrast. If None, then defaults to 't' for 1D
`con_val` and 'F' for 2D `con_val`
Returns
-------
con: Contrast instance
"""
if self.labels_ is None or self.results_ is None:
raise ValueError('The model has not been estimated yet')
con_val = np.asarray(con_val)
if con_val.ndim == 1:
dim = 1
else:
dim = con_val.shape[0]
if contrast_type is None:
if dim == 1:
contrast_type = 't'
else:
contrast_type = 'F'
if contrast_type not in ['t', 'F', 'tmin-conjunction']:
raise ValueError('Unknown contrast type: %s' % contrast_type)
effect_ = np.zeros((dim, self.labels_.size), dtype=np.float)
var_ = np.zeros((dim, dim, self.labels_.size), dtype=np.float)
if contrast_type == 't':
for l in self.results_.keys():
resl = self.results_[l].Tcontrast(con_val)
effect_[:, self.labels_ == l] = resl.effect.T
var_[:, :, self.labels_ == l] = (resl.sd ** 2).T
else:
for l in self.results_.keys():
resl = self.results_[l].Fcontrast(con_val)
effect_[:, self.labels_ == l] = resl.effect
var_[:, :, self.labels_ == l] = resl.covariance
dof_ = self.results_[l].df_resid
return Contrast(effect=effect_, variance=var_, dof=dof_,
contrast_type=contrast_type)
[docs]class Contrast(object):
""" The contrast class handles the estimation of statistical contrasts
on a given model: student (t), Fisher (F), conjunction (tmin-conjunction).
The important feature is that it supports addition,
thus opening the possibility of fixed-effects models.
The current implementation is meant to be simple,
and could be enhanced in the future on the computational side
(high-dimensional F constrasts may lead to memory breakage).
Notes
-----
The 'tmin-conjunction' test is the valid conjunction test discussed in:
Nichols T, Brett M, Andersson J, Wager T, Poline JB. Valid conjunction
inference with the minimum statistic. Neuroimage. 2005 Apr 15;25(3):653-60.
This test gives the p-value of the z-values under the conjunction null,
i.e. the union of the null hypotheses for all terms.
"""
[docs] def __init__(self, effect, variance, dof=DEF_DOFMAX, contrast_type='t',
tiny=DEF_TINY, dofmax=DEF_DOFMAX):
"""
Parameters
==========
effect: array of shape (contrast_dim, n_voxels)
the effects related to the contrast
variance: array of shape (contrast_dim, contrast_dim, n_voxels)
the associated variance estimate
dof: scalar, the degrees of freedom
contrast_type: string to be chosen among 't' and 'F'
"""
if variance.ndim != 3:
raise ValueError('Variance array should have 3 dimensions')
if effect.ndim != 2:
raise ValueError('Variance array should have 2 dimensions')
if variance.shape[0] != variance.shape[1]:
raise ValueError('Inconsistent shape for the variance estimate')
if ((variance.shape[1] != effect.shape[0]) or
(variance.shape[2] != effect.shape[1])):
raise ValueError('Effect and variance have inconsistent shape')
self.effect = effect
self.variance = variance
self.dof = float(dof)
self.dim = effect.shape[0]
if self.dim > 1 and contrast_type is 't':
print('Automatically converted multi-dimensional t to F contrast')
contrast_type = 'F'
self.contrast_type = contrast_type
self.stat_ = None
self.p_value_ = None
self.baseline = 0
self.tiny = tiny
self.dofmax = dofmax
[docs] def stat(self, baseline=0.0):
""" Return the decision statistic associated with the test of the
null hypothesis: (H0) 'contrast equals baseline'
Parameters
==========
baseline: float, optional,
Baseline value for the test statistic
"""
self.baseline = baseline
# Case: one-dimensional contrast ==> t or t**2
if self.dim == 1:
# avoids division by zero
stat = (self.effect - baseline) / np.sqrt(
np.maximum(self.variance, self.tiny))
if self.contrast_type == 'F':
stat = stat ** 2
# Case: F contrast
elif self.contrast_type == 'F':
# F = |t|^2/q , |t|^2 = e^t inv(v) e
if self.effect.ndim == 1:
self.effect = self.effect[np.newaxis]
if self.variance.ndim == 1:
self.variance = self.variance[np.newaxis, np.newaxis]
stat = (multiple_mahalanobis(self.effect - baseline,
self.variance) / self.dim)
# Case: tmin (conjunctions)
elif self.contrast_type == 'tmin-conjunction':
vdiag = self.variance.reshape([self.dim ** 2] + list(
self.variance.shape[2:]))[:: self.dim + 1]
stat = (self.effect - baseline) / np.sqrt(
np.maximum(vdiag, self.tiny))
stat = stat.min(0)
# Unknwon stat
else:
raise ValueError('Unknown statistic type')
self.stat_ = stat
return stat.ravel()
[docs] def p_value(self, baseline=0.0):
"""Return a parametric estimate of the p-value associated
with the null hypothesis: (H0) 'contrast equals baseline'
Parameters
----------
baseline: float, optional
Baseline value for the test statistic
Notes
-----
The value of 0.5 is used where the stat is not defined
"""
if self.stat_ is None or not self.baseline == baseline:
self.stat_ = self.stat(baseline)
# Valid conjunction as in Nichols et al, Neuroimage 25, 2005.
if self.contrast_type in ['t', 'tmin-conjunction']:
p = sps.t.sf(self.stat_, np.minimum(self.dof, self.dofmax))
elif self.contrast_type == 'F':
p = sps.f.sf(self.stat_, self.dim, np.minimum(
self.dof, self.dofmax))
else:
raise ValueError('Unknown statistic type')
p[np.isnan(self.stat_)] = .5
self.p_value_ = p
return p
[docs] def z_score(self, baseline=0.0):
"""Return a parametric estimation of the z-score associated
with the null hypothesis: (H0) 'contrast equals baseline'
Parameters
----------
baseline: float, optional
Baseline value for the test statistic
Notes
-----
The value of 0 is used where the stat is not defined
"""
if self.p_value_ is None or not self.baseline == baseline:
self.p_value_ = self.p_value(baseline)
# Avoid inf values kindly supplied by scipy.
self.z_score_ = z_score(self.p_value_)
self.z_score_[np.isnan(self.stat_)] = 0
return self.z_score_
def __add__(self, other):
"""Addition of selfwith others, Yields an new Contrast instance
This should be used only on indepndent contrasts"""
if self.contrast_type != other.contrast_type:
raise ValueError(
'The two contrasts do not have consistant type dimensions')
if self.dim != other.dim:
raise ValueError(
'The two contrasts do not have compatible dimensions')
effect_ = self.effect + other.effect
variance_ = self.variance + other.variance
dof_ = self.dof + other.dof
return Contrast(effect=effect_, variance=variance_, dof=dof_,
contrast_type=self.contrast_type)
def __rmul__(self, scalar):
"""Multiplication of the contrast by a scalar"""
scalar = float(scalar)
effect_ = self.effect * scalar
variance_ = self.variance * scalar ** 2
dof_ = self.dof
return Contrast(effect=effect_, variance=variance_, dof=dof_,
contrast_type=self.contrast_type)
__mul__ = __rmul__
def __div__(self, scalar):
return self.__rmul__(1 / float(scalar))
[docs]class FMRILinearModel(object):
""" This class is meant to handle GLMs from a higher-level perspective
i.e. by taking images as input and output
"""
[docs] def __init__(self, fmri_data, design_matrices, mask='compute',
m=0.2, M=0.9, threshold=.5):
"""Load the data
Parameters
----------
fmri_data : Image or str or sequence of Images / str
fmri images / paths of the (4D) fmri images
design_matrices : arrays or str or sequence of arrays / str
design matrix arrays / paths of .npz files
mask : str or Image or None, optional
string can be 'compute' or a path to an image
image is an input (assumed binary) mask image(s),
if 'compute', the mask is computed
if None, no masking will be applied
m, M, threshold: float, optional
parameters of the masking procedure. Should be within [0, 1]
Notes
-----
The only computation done here is mask computation (if required)
Examples
--------
We need the example data package for this example::
from nipy.utils import example_data
from nipy.modalities.fmri.glm import FMRILinearModel
fmri_files = [example_data.get_filename('fiac', 'fiac0', run)
for run in ['run1.nii.gz', 'run2.nii.gz']]
design_files = [example_data.get_filename('fiac', 'fiac0', run)
for run in ['run1_design.npz', 'run2_design.npz']]
mask = example_data.get_filename('fiac', 'fiac0', 'mask.nii.gz')
multi_session_model = FMRILinearModel(fmri_files,
design_files,
mask)
multi_session_model.fit()
z_image, = multi_session_model.contrast([np.eye(13)[1]] * 2)
# The number of voxels with p < 0.001 given by ...
print(np.sum(z_image.get_data() > 3.09))
"""
# manipulate the arguments
if isinstance(fmri_data, string_types) or hasattr(fmri_data, 'get_data'):
fmri_data = [fmri_data]
if isinstance(design_matrices, (string_types, np.ndarray)):
design_matrices = [design_matrices]
if len(fmri_data) != len(design_matrices):
raise ValueError('Incompatible number of fmri runs and '
'design matrices were provided')
self.fmri_data, self.design_matrices = [], []
self.glms, self.means = [], []
# load the fmri data
for fmri_run in fmri_data:
if isinstance(fmri_run, string_types):
self.fmri_data.append(load(fmri_run))
else:
self.fmri_data.append(fmri_run)
# set self.affine as the affine of the first image
self.affine = get_affine(self.fmri_data[0])
# load the designs
for design_matrix in design_matrices:
if isinstance(design_matrix, string_types):
loaded = np.load(design_matrix)
self.design_matrices.append(loaded[loaded.files[0]])
else:
self.design_matrices.append(design_matrix)
# load the mask
if mask == 'compute':
mask = compute_mask_sessions(
fmri_data, m=m, M=M, cc=1, threshold=threshold, opening=0)
self.mask = Nifti1Image(mask.astype(np.int8), self.affine)
elif mask is None:
mask = np.ones(self.fmri_data[0].shape[:3]).astype(np.int8)
self.mask = Nifti1Image(mask, self.affine)
else:
if isinstance(mask, string_types):
self.mask = load(mask)
else:
self.mask = mask
[docs] def fit(self, do_scaling=True, model='ar1', steps=100):
""" Load the data, mask the data, scale the data, fit the GLM
Parameters
----------
do_scaling : bool, optional
if True, the data should be scaled as percent of voxel mean
model : string, optional,
the kind of glm ('ols' or 'ar1') you want to fit to the data
steps : int, optional
in case of an ar1, discretization of the ar1 parameter
"""
from nibabel import Nifti1Image
# get the mask as an array
mask = self.mask.get_data().astype(np.bool)
self.glms, self.means = [], []
for fmri, design_matrix in zip(self.fmri_data, self.design_matrices):
if do_scaling:
# scale the data
data, mean = data_scaling(fmri.get_data()[mask].T)
else:
data, mean = (fmri.get_data()[mask].T,
fmri.get_data()[mask].T.mean(0))
mean_data = mask.astype(np.int16)
mean_data[mask] = mean
self.means.append(Nifti1Image(mean_data, self.affine))
# fit the GLM
glm = GeneralLinearModel(design_matrix)
glm.fit(data, model, steps)
self.glms.append(glm)
[docs] def contrast(self, contrasts, con_id='', contrast_type=None, output_z=True,
output_stat=False, output_effects=False,
output_variance=False):
""" Estimation of a contrast as fixed effects on all sessions
Parameters
----------
contrasts : array or list of arrays of shape (n_col) or (n_dim, n_col)
where ``n_col`` is the number of columns of the design matrix,
numerical definition of the contrast (one array per run)
con_id : str, optional
name of the contrast
contrast_type : {'t', 'F', 'tmin-conjunction'}, optional
type of the contrast
output_z : bool, optional
Return or not the corresponding z-stat image
output_stat : bool, optional
Return or not the base (t/F) stat image
output_effects : bool, optional
Return or not the corresponding effect image
output_variance : bool, optional
Return or not the corresponding variance image
Returns
-------
output_images : list of nibabel images
The required output images, in the following order:
z image, stat(t/F) image, effects image, variance image
"""
if self.glms == []:
raise ValueError('first run fit() to estimate the model')
if isinstance(contrasts, np.ndarray):
contrasts = [contrasts]
if len(contrasts) != len(self.glms):
raise ValueError(
'contrasts must be a sequence of %d session contrasts' %
len(self.glms))
contrast_ = None
for i, (glm, con) in enumerate(zip(self.glms, contrasts)):
if np.all(con == 0):
warn('Contrast for session %d is null' % i)
elif contrast_ is None:
contrast_ = glm.contrast(con, contrast_type)
else:
contrast_ = contrast_ + glm.contrast(con, contrast_type)
if output_z or output_stat:
# compute the contrast and stat
contrast_.z_score()
# Prepare the returned images
mask = self.mask.get_data().astype(np.bool)
do_outputs = [output_z, output_stat, output_effects, output_variance]
estimates = ['z_score_', 'stat_', 'effect', 'variance']
descrips = ['z statistic', 'Statistical value', 'Estimated effect',
'Estimated variance']
dims = [1, 1, contrast_.dim, contrast_.dim ** 2]
n_vox = mask.sum()
output_images = []
for (do_output, estimate, descrip, dim) in zip(
do_outputs, estimates, descrips, dims):
if do_output:
if dim > 1:
result_map = np.tile(
mask.astype(np.float)[:, :, :, np.newaxis], dim)
result_map[mask] = np.reshape(
getattr(contrast_, estimate).T, (n_vox, dim))
else:
result_map = mask.astype(np.float)
result_map[mask] = np.squeeze(
getattr(contrast_, estimate))
output = Nifti1Image(result_map, self.affine)
get_header(output)['descrip'] = (
'%s associated with contrast %s' % (descrip, con_id))
output_images.append(output)
return output_images