Source code for nipy.algorithms.statistics.mixed_effects_stat

"""
Module for computation of mixed effects statistics with an EM algorithm.
i.e.
solves problems of the form
y = X beta + e1 + e2,
where X and Y are known, e1 and e2 are centered with diagonal covariance.
V1 = var(e1) is known, and V2 = var(e2) = lambda identity.
the code estimates beta and lambda using an EM algorithm.
Likelihood ratio tests can then be used to test the columns of beta.

Author: Bertrand Thirion, 2012.

>>> N, P = 15, 500
>>> V1 = np.random.randn(N, P) ** 2
>>> effects = np.ones(P)
>>> Y = generate_data(np.ones(N), effects, .25, V1)
>>> T1 = one_sample_ttest(Y, V1, n_iter=5)
>>> T2 = t_stat(Y)
>>> assert(T1.std() < T2.std())
"""
from __future__ import absolute_import
from __future__ import print_function

import numpy as np

EPS = 100 * np.finfo(float).eps


[docs]def generate_data(X, beta, V2, V1): """ Generate a group of individuals from the provided parameters Parameters ---------- X: array of shape (n_samples, n_reg), the design matrix of the model beta: float or array of shape (n_reg, n_tests), the associated effects V2: float or array of shape (n_tests), group variance V1: array of shape(n_samples, n_tests), the individual variances Returns ------- Y: array of shape(n_samples, n_tests) the individual data related to the two-level normal model """ # check that the variances are positive if (V1 < 0).any(): raise ValueError('Variance should be positive') Y = np.random.randn(*V1.shape) Y *= np.sqrt(V2 + V1) if X.ndim == 1: X = X[:, np.newaxis] if np.isscalar(beta): beta = beta * np.ones((X.shape[1], V1.shape[1])) if beta.ndim == 1: beta = beta[np.newaxis] Y += np.dot(X, beta) return Y
[docs]def check_arrays(Y, V1): """Check that the given data can be used for the models Parameters ---------- Y: array of shape (n_samples, n_tests) or (n_samples) the estimated effects V1: array of shape (n_samples, n_tests) or (n_samples) first-level variance """ if (V1 < 0).any(): raise ValueError("a negative variance has been provided") if np.size(Y) == Y.shape[0]: Y = Y[:, np.newaxis] if np.size(V1) == V1.shape[0]: V1 = V1[:, np.newaxis] if Y.shape != V1.shape: raise ValueError("Y and V1 do not have the same shape") return Y, V1
[docs]def t_stat(Y): """ Returns the t stat of the sample on each row of the matrix Parameters ---------- Y, array of shape (n_samples, n_tests) Returns ------- t_variates, array of shape (n_tests) """ return Y.mean(0) / Y.std(0) * np.sqrt(Y.shape[0] - 1)
[docs]class MixedEffectsModel(object): """Class to handle multiple one-sample mixed effects models """
[docs] def __init__(self, X, n_iter=5, verbose=False): """ Set the effects and first-level variance, and initialize related quantities Parameters ---------- X: array of shape(n_samples, n_effects), the design matrix n_iter: int, optional, number of iterations of the EM algorithm verbose: bool, optional, verbosity mode """ self.n_iter = n_iter self.verbose = verbose self.X = X self.pinv_X = np.linalg.pinv(X)
[docs] def log_like(self, Y, V1): """ Compute the log-likelihood of (Y, V1) under the model Parameters ---------- Y, array of shape (n_samples, n_tests) or (n_samples) the estimated effects V1, array of shape (n_samples, n_tests) or (n_samples) first-level variance Returns ------- logl: array of shape self.n_tests, the log-likelihood of the model """ Y, V1 = check_arrays(Y, V1) tvar = self.V2 + V1 logl = np.sum(((Y - self.Y_) ** 2) / tvar, 0) logl += np.sum(np.log(tvar), 0) logl += np.log(2 * np.pi) * Y.shape[0] logl *= (- 0.5) return logl
[docs] def predict(self, Y, V1): """Return the log_likelihood of the data.See the log_like method""" return self.log_like(Y, V1)
[docs] def score(self, Y, V1): """Return the log_likelihood of the data. See the log_like method""" return self.log_like(Y, V1)
def _one_step(self, Y, V1): """Applies one step of an EM algorithm to estimate self.mean_, self.var Parameters ---------- Y, array of shape (n_samples, n_tests) or (n_samples) the estimated effects V1, array of shape (n_samples, n_tests) or (n_samples) first-level variance """ # E step prec = 1. / (self.V2 + V1) Y_ = prec * (self.V2 * Y + V1 * self.Y_) cvar = V1 * self.V2 * prec # M step self.beta_ = np.dot(self.pinv_X, Y_) self.Y_ = np.dot(self.X, self.beta_) self.V2 = np.mean((Y_ - self.Y_) ** 2, 0) + cvar.mean(0)
[docs] def fit(self, Y, V1): """ Launches the EM algorithm to estimate self Parameters ---------- Y, array of shape (n_samples, n_tests) or (n_samples) the estimated effects V1, array of shape (n_samples, n_tests) or (n_samples) first-level variance Returns ------- self """ # Basic data checks if self.X.shape[0] != Y.shape[0]: raise ValueError('X and Y must have the same numbers of rows') Y, V1 = check_arrays(Y, V1) self.beta_ = np.dot(self.pinv_X, Y) self.Y_ = np.dot(self.X, self.beta_) self.V2 = np.mean((Y - self.Y_) ** 2, 0) if self.verbose: log_like_init = self.log_like(Y, V1) print('Average log-likelihood: ', log_like_init.mean()) for i in range(self.n_iter): self._one_step(Y, V1) if self.verbose: log_like_ = self.log_like(Y, V1) if (log_like_ < (log_like_init - EPS)).any(): raise ValueError('The log-likelihood cannot decrease') log_like_init = log_like_ print('Iteration %d, average log-likelihood: %f' % ( i, log_like_.mean())) return self
[docs]def two_sample_ftest(Y, V1, group, n_iter=5, verbose=False): """Returns the mixed effects t-stat for each row of the X (one sample test) This uses the Formula in Roche et al., NeuroImage 2007 Parameters ---------- Y: array of shape (n_samples, n_tests) the data V1: array of shape (n_samples, n_tests) first-level variance assocated with the data group: array of shape (n_samples) a vector of indicators yielding the samples membership n_iter: int, optional, number of iterations of the EM algorithm verbose: bool, optional, verbosity mode Returns ------- tstat: array of shape (n_tests), statistical values obtained from the likelihood ratio test """ # check that group is correct if group.size != Y.shape[0]: raise ValueError('The number of labels is not the number of samples') if (np.unique(group) != np.array([0, 1])).all(): raise ValueError('group should be composed only of zeros and ones') # create design matrices X = np.vstack((np.ones_like(group), group)).T return mfx_stat(Y, V1, X, 1, n_iter=n_iter, verbose=verbose, return_t=False, return_f=True)[0]
[docs]def two_sample_ttest(Y, V1, group, n_iter=5, verbose=False): """Returns the mixed effects t-stat for each row of the X (one sample test) This uses the Formula in Roche et al., NeuroImage 2007 Parameters ---------- Y: array of shape (n_samples, n_tests) the data V1: array of shape (n_samples, n_tests) first-level variance assocated with the data group: array of shape (n_samples) a vector of indicators yielding the samples membership n_iter: int, optional, number of iterations of the EM algorithm verbose: bool, optional, verbosity mode Returns ------- tstat: array of shape (n_tests), statistical values obtained from the likelihood ratio test """ X = np.vstack((np.ones_like(group), group)).T return mfx_stat(Y, V1, X, 1, n_iter=n_iter, verbose=verbose, return_t=True)[0]
[docs]def one_sample_ftest(Y, V1, n_iter=5, verbose=False): """Returns the mixed effects F-stat for each row of the X (one sample test) This uses the Formula in Roche et al., NeuroImage 2007 Parameters ---------- Y: array of shape (n_samples, n_tests) the data V1: array of shape (n_samples, n_tests) first-level variance ssociated with the data n_iter: int, optional, number of iterations of the EM algorithm verbose: bool, optional, verbosity mode Returns ------- fstat, array of shape (n_tests), statistical values obtained from the likelihood ratio test sign, array of shape (n_tests), sign of the mean for each test (allow for post-hoc signed tests) """ return mfx_stat(Y, V1, np.ones((Y.shape[0], 1)), 0, n_iter=n_iter, verbose=verbose, return_t=False, return_f=True)[0]
[docs]def one_sample_ttest(Y, V1, n_iter=5, verbose=False): """Returns the mixed effects t-stat for each row of the X (one sample test) This uses the Formula in Roche et al., NeuroImage 2007 Parameters ---------- Y: array of shape (n_samples, n_tests) the observations V1: array of shape (n_samples, n_tests) first-level variance associated with the observations n_iter: int, optional, number of iterations of the EM algorithm verbose: bool, optional, verbosity mode Returns ------- tstat: array of shape (n_tests), statistical values obtained from the likelihood ratio test """ return mfx_stat(Y, V1, np.ones((Y.shape[0], 1)), 0, n_iter=n_iter, verbose=verbose, return_t=True)[0]
[docs]def mfx_stat(Y, V1, X, column, n_iter=5, return_t=True, return_f=False, return_effect=False, return_var=False, verbose=False): """Run a mixed-effects model test on the column of the design matrix Parameters ---------- Y: array of shape (n_samples, n_tests) the data V1: array of shape (n_samples, n_tests) first-level variance assocated with the data X: array of shape(n_samples, n_regressors) the design matrix of the model column: int, index of the column of X to be tested n_iter: int, optional, number of iterations of the EM algorithm return_t: bool, optional, should one return the t test (True by default) return_f: bool, optional, should one return the F test (False by default) return_effect: bool, optional, should one return the effect estimate (False by default) return_var: bool, optional, should one return the variance estimate (False by default) verbose: bool, optional, verbosity mode Returns ------- (tstat, fstat, effect, var): tuple of arrays of shape (n_tests), those required by the input return booleans """ # check that X/columns are correct column = int(column) if X.shape[0] != Y.shape[0]: raise ValueError('X.shape[0] is not the number of samples') if (column > X.shape[1]): raise ValueError('the column index is more than the number of columns') # create design matrices contrast_mask = 1 - np.eye(X.shape[1])[column] X0 = X * contrast_mask # instantiate the mixed effects models model_0 = MixedEffectsModel(X0, n_iter=n_iter, verbose=verbose).fit(Y, V1) model_1 = MixedEffectsModel(X, n_iter=n_iter, verbose=verbose).fit(Y, V1) # compute the log-likelihood ratio statistic fstat = 2 * (model_1.log_like(Y, V1) - model_0.log_like(Y, V1)) fstat = np.maximum(0, fstat) sign = np.sign(model_1.beta_[column]) output = () if return_t: output += (np.sqrt(fstat) * sign,) if return_f: output += (fstat,) if return_var: output += (model_1.V2,) if return_effect: output += (model_1.beta_[column],) return output