Source code for nipy.algorithms.statistics.bayesian_mixed_effects

"""
Generic implementation of multiple regression analysis under noisy
measurements.
"""
from __future__ import absolute_import

import numpy as np

nonzero = lambda x: np.maximum(x, 1e-25)


[docs]def two_level_glm(y, vy, X, niter=10):
    """
    Inference of a mixed-effect linear model using the variational
    Bayes algorithm.

    Parameters
    ----------
    y : array-like
      Array of observations. Shape should be (n, ...) where n is the
      number of independent observations per unit.

    vy : array-like
      First-level variances associated with the observations. Should
      be of the same shape as Y.

    X : array-like
      Second-level design matrix. Shape should be (n, p) where n is
      the number of observations per unit, and p is the number of
      regressors.

    Returns
    -------
    beta : array-like
      Effect estimates (posterior means)

    s2 : array-like
      Variance estimates. The posterior variance matrix of beta[:, i]
      may be computed by s2[:, i] * inv(X.T * X)

    dof : float
      Degrees of freedom as per the variational Bayes approximation
      (simply, the number of observations minus the number of
      independent regressors)
    """
    # Number of observations, regressors and points
    nobs = X.shape[0]
    if X.ndim == 1:
        nreg = 1
    else:
        nreg = X.shape[1]
    if nobs <= nreg:
        raise ValueError('Too many regressors compared to data size')
    if y.ndim == 1:
        npts = 1
    else:
        npts = np.prod(y.shape[1:])

    # Reshape input arrays
    X = X.reshape((nobs, nreg))
    y = np.reshape(y, (nobs, npts))
    vy = nonzero(np.reshape(vy, (nobs, npts)))

    # Degrees of freedom
    dof = float(nobs - nreg)

    # Compute the pseudo-inverse matrix
    pinvX = np.linalg.pinv(X)

    # Initialize outputs
    b = np.zeros((nreg, npts))
    zfit = np.zeros((nobs, npts))
    s2 = np.inf

    # VB loop
    for it in range(niter):

        # Update distribution of "true" effects
        w1 = 1 / vy
        w2 = 1 / nonzero(s2)
        vz = 1 / (w1 + w2)
        z = vz * (w1 * y + w2 * zfit)

        # Update distribution of population parameters
        b = np.dot(pinvX, z)
        zfit = np.dot(X, b)
        s2 = np.sum((z - zfit) ** 2 + vz, 0) / dof

    # Ouput arrays
    B = np.reshape(b, [nreg] + list(y.shape[1:]))
    S2 = np.reshape(s2, list(y.shape[1:]))

    return B, S2, dof