Source code for nipy.modalities.fmri.glm

# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""
This module presents an interface to use the glm implemented in
nipy.algorithms.statistics.models.regression.

It contains the GLM and contrast classes that are meant to be the main objects
of fMRI data analyses.

It is important to note that the GLM is meant as a one-session General Linear
Model. But inference can be performed on multiple sessions by computing fixed
effects on contrasts

Examples
--------

>>> import numpy as np
>>> from nipy.modalities.fmri.glm import GeneralLinearModel
>>> n, p, q = 100, 80, 10
>>> X, Y = np.random.randn(p, q), np.random.randn(p, n)
>>> cval = np.hstack((1, np.zeros(9)))
>>> model = GeneralLinearModel(X)
>>> model.fit(Y)
>>> z_vals = model.contrast(cval).z_score() # z-transformed statistics

Example of fixed effects statistics across two contrasts

>>> cval_ = cval.copy()
>>> np.random.shuffle(cval_)
>>> z_ffx = (model.contrast(cval) + model.contrast(cval_)).z_score()
"""
from __future__ import print_function
from __future__ import absolute_import

import numpy as np

from warnings import warn

import scipy.stats as sps

from nibabel import load, Nifti1Image

from nipy.io.nibcompat import get_header, get_affine
from nipy.labs.mask import compute_mask_sessions
from nipy.algorithms.statistics.models.regression import OLSModel, ARModel
from nipy.algorithms.statistics.utils import multiple_mahalanobis, z_score

from nipy.externals.six import string_types

DEF_TINY = 1e-50
DEF_DOFMAX = 1e10


[docs]def data_scaling(Y): """Scaling of the data to have percent of baseline change columnwise Parameters ---------- Y: array of shape(n_time_points, n_voxels) the input data Returns ------- Y: array of shape (n_time_points, n_voxels), the data after mean-scaling, de-meaning and multiplication by 100 mean : array of shape (n_voxels,) the data mean """ mean = Y.mean(0) Y = 100 * (Y / mean - 1) return Y, mean
[docs]class GeneralLinearModel(object): """ This class handles the so-called on General Linear Model Most of what it does in the fit() and contrast() methods fit() performs the standard two-step ('ols' then 'ar1') GLM fitting contrast() returns a contrast instance, yileding statistics and p-values. The link between fit() and constrast is done vis the two class members: glm_results : dictionary of nipy.algorithms.statistics.models. regression.RegressionResults instances, describing results of a GLM fit labels : array of shape(n_voxels), labels that associate each voxel with a results key """
[docs] def __init__(self, X): """ Parameters ---------- X : array of shape (n_time_points, n_regressors) the design matrix """ self.X = X self.labels_ = None self.results_ = None
[docs] def fit(self, Y, model='ols', steps=100): """GLM fitting of a dataset using 'ols' regression or the two-pass Parameters ---------- Y : array of shape(n_time_points, n_samples) the fMRI data model : {'ar1', 'ols'}, optional the temporal variance model. Defaults to 'ols' steps : int, optional Maximum number of discrete steps for the AR(1) coef histogram """ if model not in ['ar1', 'ols']: raise ValueError('Unknown model') if Y.ndim == 1: Y = Y[:, np.newaxis] if Y.shape[0] != self.X.shape[0]: raise ValueError('Response and predictors are inconsistent') # fit the OLS model ols_result = OLSModel(self.X).fit(Y) # compute and discretize the AR1 coefs ar1 = ((ols_result.resid[1:] * ols_result.resid[:-1]).sum(0) / (ols_result.resid ** 2).sum(0)) ar1 = (ar1 * steps).astype(np.int) * 1. / steps # Fit the AR model acccording to current AR(1) estimates if model == 'ar1': self.results_ = {} self.labels_ = ar1 # fit the model for val in np.unique(self.labels_): m = ARModel(self.X, val) self.results_[val] = m.fit(Y[:, self.labels_ == val]) else: self.labels_ = np.zeros(Y.shape[1]) self.results_ = {0.0: ols_result}
[docs] def get_beta(self, column_index=None): """Accessor for the best linear unbiased estimated of model parameters Parameters ---------- column_index: int or array-like of int or None, optional The indexed of the columns to be returned. if None (default behaviour), the whole vector is returned Returns ------- beta: array of shape (n_voxels, n_columns) the beta """ # make colum_index a list if it an int if column_index is None: column_index = np.arange(self.X.shape[1]) if not hasattr(column_index, '__iter__'): column_index = [int(column_index)] n_beta = len(column_index) # build the beta array beta = np.zeros((n_beta, self.labels_.size), dtype=np.float) for l in self.results_.keys(): beta[:, self.labels_ == l] = self.results_[l].theta[column_index] return beta
[docs] def get_mse(self): """Accessor for the mean squared error of the model Returns ------- mse: array of shape (n_voxels) the sum of square error per voxel """ # build the beta array mse = np.zeros(self.labels_.size, dtype=np.float) for l in self.results_.keys(): mse[self.labels_ == l] = self.results_[l].MSE return mse
[docs] def get_logL(self): """Accessor for the log-likelihood of the model Returns ------- logL: array of shape (n_voxels,) the sum of square error per voxel """ # build the beta array logL = np.zeros(self.labels_.size, dtype=np.float) for l in self.results_.keys(): logL[self.labels_ == l] = self.results_[l].logL return logL
[docs] def contrast(self, con_val, contrast_type=None): """ Specify and estimate a linear contrast Parameters ---------- con_val : numpy.ndarray of shape (p) or (q, p) where q = number of contrast vectors and p = number of regressors contrast_type : {None, 't', 'F' or 'tmin-conjunction'}, optional type of the contrast. If None, then defaults to 't' for 1D `con_val` and 'F' for 2D `con_val` Returns ------- con: Contrast instance """ if self.labels_ is None or self.results_ is None: raise ValueError('The model has not been estimated yet') con_val = np.asarray(con_val) if con_val.ndim == 1: dim = 1 else: dim = con_val.shape[0] if contrast_type is None: if dim == 1: contrast_type = 't' else: contrast_type = 'F' if contrast_type not in ['t', 'F', 'tmin-conjunction']: raise ValueError('Unknown contrast type: %s' % contrast_type) effect_ = np.zeros((dim, self.labels_.size), dtype=np.float) var_ = np.zeros((dim, dim, self.labels_.size), dtype=np.float) if contrast_type == 't': for l in self.results_.keys(): resl = self.results_[l].Tcontrast(con_val) effect_[:, self.labels_ == l] = resl.effect.T var_[:, :, self.labels_ == l] = (resl.sd ** 2).T else: for l in self.results_.keys(): resl = self.results_[l].Fcontrast(con_val) effect_[:, self.labels_ == l] = resl.effect var_[:, :, self.labels_ == l] = resl.covariance dof_ = self.results_[l].df_resid return Contrast(effect=effect_, variance=var_, dof=dof_, contrast_type=contrast_type)
[docs]class Contrast(object): """ The contrast class handles the estimation of statistical contrasts on a given model: student (t), Fisher (F), conjunction (tmin-conjunction). The important feature is that it supports addition, thus opening the possibility of fixed-effects models. The current implementation is meant to be simple, and could be enhanced in the future on the computational side (high-dimensional F constrasts may lead to memory breakage). Notes ----- The 'tmin-conjunction' test is the valid conjunction test discussed in: Nichols T, Brett M, Andersson J, Wager T, Poline JB. Valid conjunction inference with the minimum statistic. Neuroimage. 2005 Apr 15;25(3):653-60. This test gives the p-value of the z-values under the conjunction null, i.e. the union of the null hypotheses for all terms. """
[docs] def __init__(self, effect, variance, dof=DEF_DOFMAX, contrast_type='t', tiny=DEF_TINY, dofmax=DEF_DOFMAX): """ Parameters ========== effect: array of shape (contrast_dim, n_voxels) the effects related to the contrast variance: array of shape (contrast_dim, contrast_dim, n_voxels) the associated variance estimate dof: scalar, the degrees of freedom contrast_type: string to be chosen among 't' and 'F' """ if variance.ndim != 3: raise ValueError('Variance array should have 3 dimensions') if effect.ndim != 2: raise ValueError('Variance array should have 2 dimensions') if variance.shape[0] != variance.shape[1]: raise ValueError('Inconsistent shape for the variance estimate') if ((variance.shape[1] != effect.shape[0]) or (variance.shape[2] != effect.shape[1])): raise ValueError('Effect and variance have inconsistent shape') self.effect = effect self.variance = variance self.dof = float(dof) self.dim = effect.shape[0] if self.dim > 1 and contrast_type is 't': print('Automatically converted multi-dimensional t to F contrast') contrast_type = 'F' self.contrast_type = contrast_type self.stat_ = None self.p_value_ = None self.baseline = 0 self.tiny = tiny self.dofmax = dofmax
[docs] def stat(self, baseline=0.0): """ Return the decision statistic associated with the test of the null hypothesis: (H0) 'contrast equals baseline' Parameters ========== baseline: float, optional, Baseline value for the test statistic """ self.baseline = baseline # Case: one-dimensional contrast ==> t or t**2 if self.dim == 1: # avoids division by zero stat = (self.effect - baseline) / np.sqrt( np.maximum(self.variance, self.tiny)) if self.contrast_type == 'F': stat = stat ** 2 # Case: F contrast elif self.contrast_type == 'F': # F = |t|^2/q , |t|^2 = e^t inv(v) e if self.effect.ndim == 1: self.effect = self.effect[np.newaxis] if self.variance.ndim == 1: self.variance = self.variance[np.newaxis, np.newaxis] stat = (multiple_mahalanobis(self.effect - baseline, self.variance) / self.dim) # Case: tmin (conjunctions) elif self.contrast_type == 'tmin-conjunction': vdiag = self.variance.reshape([self.dim ** 2] + list( self.variance.shape[2:]))[:: self.dim + 1] stat = (self.effect - baseline) / np.sqrt( np.maximum(vdiag, self.tiny)) stat = stat.min(0) # Unknwon stat else: raise ValueError('Unknown statistic type') self.stat_ = stat return stat.ravel()
[docs] def p_value(self, baseline=0.0): """Return a parametric estimate of the p-value associated with the null hypothesis: (H0) 'contrast equals baseline' Parameters ---------- baseline: float, optional Baseline value for the test statistic Notes ----- The value of 0.5 is used where the stat is not defined """ if self.stat_ is None or not self.baseline == baseline: self.stat_ = self.stat(baseline) # Valid conjunction as in Nichols et al, Neuroimage 25, 2005. if self.contrast_type in ['t', 'tmin-conjunction']: p = sps.t.sf(self.stat_, np.minimum(self.dof, self.dofmax)) elif self.contrast_type == 'F': p = sps.f.sf(self.stat_, self.dim, np.minimum( self.dof, self.dofmax)) else: raise ValueError('Unknown statistic type') p[np.isnan(self.stat_)] = .5 self.p_value_ = p return p
[docs] def z_score(self, baseline=0.0): """Return a parametric estimation of the z-score associated with the null hypothesis: (H0) 'contrast equals baseline' Parameters ---------- baseline: float, optional Baseline value for the test statistic Notes ----- The value of 0 is used where the stat is not defined """ if self.p_value_ is None or not self.baseline == baseline: self.p_value_ = self.p_value(baseline) # Avoid inf values kindly supplied by scipy. self.z_score_ = z_score(self.p_value_) self.z_score_[np.isnan(self.stat_)] = 0 return self.z_score_
def __add__(self, other): """Addition of selfwith others, Yields an new Contrast instance This should be used only on indepndent contrasts""" if self.contrast_type != other.contrast_type: raise ValueError( 'The two contrasts do not have consistant type dimensions') if self.dim != other.dim: raise ValueError( 'The two contrasts do not have compatible dimensions') effect_ = self.effect + other.effect variance_ = self.variance + other.variance dof_ = self.dof + other.dof return Contrast(effect=effect_, variance=variance_, dof=dof_, contrast_type=self.contrast_type) def __rmul__(self, scalar): """Multiplication of the contrast by a scalar""" scalar = float(scalar) effect_ = self.effect * scalar variance_ = self.variance * scalar ** 2 dof_ = self.dof return Contrast(effect=effect_, variance=variance_, dof=dof_, contrast_type=self.contrast_type) __mul__ = __rmul__ def __div__(self, scalar): return self.__rmul__(1 / float(scalar))
[docs]class FMRILinearModel(object): """ This class is meant to handle GLMs from a higher-level perspective i.e. by taking images as input and output """
[docs] def __init__(self, fmri_data, design_matrices, mask='compute', m=0.2, M=0.9, threshold=.5): """Load the data Parameters ---------- fmri_data : Image or str or sequence of Images / str fmri images / paths of the (4D) fmri images design_matrices : arrays or str or sequence of arrays / str design matrix arrays / paths of .npz files mask : str or Image or None, optional string can be 'compute' or a path to an image image is an input (assumed binary) mask image(s), if 'compute', the mask is computed if None, no masking will be applied m, M, threshold: float, optional parameters of the masking procedure. Should be within [0, 1] Notes ----- The only computation done here is mask computation (if required) Examples -------- We need the example data package for this example:: from nipy.utils import example_data from nipy.modalities.fmri.glm import FMRILinearModel fmri_files = [example_data.get_filename('fiac', 'fiac0', run) for run in ['run1.nii.gz', 'run2.nii.gz']] design_files = [example_data.get_filename('fiac', 'fiac0', run) for run in ['run1_design.npz', 'run2_design.npz']] mask = example_data.get_filename('fiac', 'fiac0', 'mask.nii.gz') multi_session_model = FMRILinearModel(fmri_files, design_files, mask) multi_session_model.fit() z_image, = multi_session_model.contrast([np.eye(13)[1]] * 2) # The number of voxels with p < 0.001 given by ... print(np.sum(z_image.get_data() > 3.09)) """ # manipulate the arguments if isinstance(fmri_data, string_types) or hasattr(fmri_data, 'get_data'): fmri_data = [fmri_data] if isinstance(design_matrices, (string_types, np.ndarray)): design_matrices = [design_matrices] if len(fmri_data) != len(design_matrices): raise ValueError('Incompatible number of fmri runs and ' 'design matrices were provided') self.fmri_data, self.design_matrices = [], [] self.glms, self.means = [], [] # load the fmri data for fmri_run in fmri_data: if isinstance(fmri_run, string_types): self.fmri_data.append(load(fmri_run)) else: self.fmri_data.append(fmri_run) # set self.affine as the affine of the first image self.affine = get_affine(self.fmri_data[0]) # load the designs for design_matrix in design_matrices: if isinstance(design_matrix, string_types): loaded = np.load(design_matrix) self.design_matrices.append(loaded[loaded.files[0]]) else: self.design_matrices.append(design_matrix) # load the mask if mask == 'compute': mask = compute_mask_sessions( fmri_data, m=m, M=M, cc=1, threshold=threshold, opening=0) self.mask = Nifti1Image(mask.astype(np.int8), self.affine) elif mask is None: mask = np.ones(self.fmri_data[0].shape[:3]).astype(np.int8) self.mask = Nifti1Image(mask, self.affine) else: if isinstance(mask, string_types): self.mask = load(mask) else: self.mask = mask
[docs] def fit(self, do_scaling=True, model='ar1', steps=100): """ Load the data, mask the data, scale the data, fit the GLM Parameters ---------- do_scaling : bool, optional if True, the data should be scaled as percent of voxel mean model : string, optional, the kind of glm ('ols' or 'ar1') you want to fit to the data steps : int, optional in case of an ar1, discretization of the ar1 parameter """ from nibabel import Nifti1Image # get the mask as an array mask = self.mask.get_data().astype(np.bool) self.glms, self.means = [], [] for fmri, design_matrix in zip(self.fmri_data, self.design_matrices): if do_scaling: # scale the data data, mean = data_scaling(fmri.get_data()[mask].T) else: data, mean = (fmri.get_data()[mask].T, fmri.get_data()[mask].T.mean(0)) mean_data = mask.astype(np.int16) mean_data[mask] = mean self.means.append(Nifti1Image(mean_data, self.affine)) # fit the GLM glm = GeneralLinearModel(design_matrix) glm.fit(data, model, steps) self.glms.append(glm)
[docs] def contrast(self, contrasts, con_id='', contrast_type=None, output_z=True, output_stat=False, output_effects=False, output_variance=False): """ Estimation of a contrast as fixed effects on all sessions Parameters ---------- contrasts : array or list of arrays of shape (n_col) or (n_dim, n_col) where ``n_col`` is the number of columns of the design matrix, numerical definition of the contrast (one array per run) con_id : str, optional name of the contrast contrast_type : {'t', 'F', 'tmin-conjunction'}, optional type of the contrast output_z : bool, optional Return or not the corresponding z-stat image output_stat : bool, optional Return or not the base (t/F) stat image output_effects : bool, optional Return or not the corresponding effect image output_variance : bool, optional Return or not the corresponding variance image Returns ------- output_images : list of nibabel images The required output images, in the following order: z image, stat(t/F) image, effects image, variance image """ if self.glms == []: raise ValueError('first run fit() to estimate the model') if isinstance(contrasts, np.ndarray): contrasts = [contrasts] if len(contrasts) != len(self.glms): raise ValueError( 'contrasts must be a sequence of %d session contrasts' % len(self.glms)) contrast_ = None for i, (glm, con) in enumerate(zip(self.glms, contrasts)): if np.all(con == 0): warn('Contrast for session %d is null' % i) elif contrast_ is None: contrast_ = glm.contrast(con, contrast_type) else: contrast_ = contrast_ + glm.contrast(con, contrast_type) if output_z or output_stat: # compute the contrast and stat contrast_.z_score() # Prepare the returned images mask = self.mask.get_data().astype(np.bool) do_outputs = [output_z, output_stat, output_effects, output_variance] estimates = ['z_score_', 'stat_', 'effect', 'variance'] descrips = ['z statistic', 'Statistical value', 'Estimated effect', 'Estimated variance'] dims = [1, 1, contrast_.dim, contrast_.dim ** 2] n_vox = mask.sum() output_images = [] for (do_output, estimate, descrip, dim) in zip( do_outputs, estimates, descrips, dims): if do_output: if dim > 1: result_map = np.tile( mask.astype(np.float)[:, :, :, np.newaxis], dim) result_map[mask] = np.reshape( getattr(contrast_, estimate).T, (n_vox, dim)) else: result_map = mask.astype(np.float) result_map[mask] = np.squeeze( getattr(contrast_, estimate)) output = Nifti1Image(result_map, self.affine) get_header(output)['descrip'] = ( '%s associated with contrast %s' % (descrip, con_id)) output_images.append(output) return output_images