Source code for nipy.labs.spatial_models.structural_bfls

# vi: set ft=python sts=4 ts=4 sw=4 et:
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
"""
The main routine of this module implement the LandmarkRegions class,
that is used to represent Regions of interest at the population level
(in a template space).

This has been used in
Thirion et al. Structural Analysis of fMRI
Data Revisited: Improving the Sensitivity and Reliability of fMRI
Group Studies.  IEEE TMI 2007

Author : Bertrand Thirion, 2006-2013
"""
from __future__ import print_function
from __future__ import absolute_import

#autoindent

import numpy as np
from scipy import stats


def _threshold_weight_map(x, fraction):
    """threshold a positive map in order to retain a certain fraction of the
    total value"""
    sorted_x = - np.sort(- x)
    if fraction < sorted_x[0] / x.sum():
        return np.zeros_like(x)

    idx = np.where(np.cumsum(sorted_x) < fraction * x.sum())[0][-1]
    x[x < sorted_x[idx]] = 0
    return x


[docs]class LandmarkRegions(object): """ This class is intended to represent a set of inter-subject regions It should inherit from some abstract multiple ROI class, not implemented yet. """
[docs] def __init__(self, domain, k, indiv_coord, subjects, confidence): """ Building the landmark_region Parameters ---------- domain: ROI instance defines the spatial context of the SubDomains k: int, the number of landmark regions considered indiv_coord: k-length list of arrays, coordinates of the nodes in some embedding space. subjects: k-length list of integers these correspond to an ROI feature: the subject index of individual regions confidence: k-length list of arrays, confidence values for the regions (0 is low, 1 is high) """ self.domain = domain self.k = int(k) if len(indiv_coord) != k: raise ValueError('len(indiv_coord) should be equal to %d' % k) if len(subjects) != k: raise ValueError('len(subjects) should be equal to %d' % k) if len(confidence) != k: raise ValueError('len(confidence) should be equal to %d' % k) self.position = indiv_coord self.subjects = subjects self.confidence = confidence
[docs] def centers(self): """returns the average of the coordinates for each region """ pos = self.position centers_ = np.array([np.mean(pos[k], 0) for k in range(self.k)]) return centers_
[docs] def kernel_density(self, k=None, coord=None, sigma=1.): """ Compute the density of a component as a kde Parameters ---------- k: int (<= self.k) or None component upon which the density is computed if None, the sum is taken over k coord: array of shape(n, self.dom.em_dim), optional a set of input coordinates sigma: float, optional kernel size Returns ------- kde: array of shape(n) the density sampled at the coords """ from nipy.algorithms.utils.fast_distance import euclidean_distance if coord is None: coord = self.domain.coord if k is None: kde = np.zeros(coord.shape[0]) for k in range(self.k): pos = self.position[k] dist = euclidean_distance(pos, coord) kde += np.exp(- dist ** 2 / (2 * sigma ** 2)).sum(0) else: k = int(k) pos = self.position[k] dist = euclidean_distance(pos, coord) kde = np.exp(- dist ** 2 / (2 * sigma ** 2)).sum(0) return kde / (2 * np.pi * sigma ** 2) ** (pos.shape[1] / 2)
[docs] def map_label(self, coord=None, pval=1., sigma=1.): """Sample the set of landmark regions on the proposed coordiante set cs, assuming a Gaussian shape Parameters ---------- coord: array of shape(n,dim), optional, a set of input coordinates pval: float in [0,1]), optional cutoff for the CR, i.e. highest posterior density threshold sigma: float, positive, optional spatial scale of the spatial model Returns ------- label: array of shape (n): the posterior labelling """ if coord is None: coord = self.domain.coord label = - np.ones(coord.shape[0]) null_density = 1. / self.domain.local_volume.sum() if self.k > 0: aux = - np.zeros((coord.shape[0], self.k)) for k in range(self.k): kde = self.kernel_density(k, coord, sigma) aux[:, k] = _threshold_weight_map(kde, pval) aux[aux < null_density] = 0 maux = np.max(aux, 1) label[maux > 0] = np.argmax(aux, 1)[maux > 0] return label
[docs] def show(self): """function to print basic information on self """ centers = self.centers() subjects = self.subjects prevalence = self.roi_prevalence() print("index", "prevalence", "mean_position", "individuals") for i in range(self.k): print(i, prevalence[i], centers[i], np.unique(subjects[i]))
[docs] def roi_prevalence(self): """ Return a confidence index over the different rois Returns ------- confid: array of shape self.k the population_prevalence """ prevalence_ = np.zeros(self.k) subjects = self.subjects for j in range(self.k): subjj = subjects[j] conf = self.confidence[j] for ls in np.unique(subjj): lmj = 1 - np.prod(1 - conf[subjj == ls]) prevalence_[j] += lmj return prevalence_
[docs]def build_landmarks(domain, coords, subjects, labels, confidence=None, prevalence_pval=0.95, prevalence_threshold=0, sigma=1.): """ Given a list of hierarchical ROIs, and an associated labelling, this creates an Amer structure wuch groups ROIs with the same label. Parameters ---------- domain: discrete_domain.DiscreteDomain instance, description of the spatial context of the landmarks coords: array of shape(n, 3) Sets of coordinates for the different objects subjects: array of shape (n), dtype = np.int indicators of the dataset the objects come from labels: array of shape (n), dtype = np.int index of the landmark the object is associated with confidence: array of shape (n), measure of the significance of the regions prevalence_pval: float, optional prevalence_threshold: float, optional, (c) A label should be present in prevalence_threshold subjects with a probability>prevalence_pval in order to be valid sigma: float optional, regularizing constant that defines a prior on the region extent Returns ------- LR : None or structural_bfls.LR instance describing a cross-subject set of ROIs. If inference yields a null result, LR is set to None newlabel: array of shape (n) a relabelling of the individual ROIs, similar to u, that discards labels that do not fulfill the condition (c) """ if confidence is None: confidence = np.ones(labels.size) intrasubj = np.concatenate([np.arange(np.sum(subjects == s)) for s in np.unique(subjects)]) coordinates = [] subjs = [] pps = [] n_labels = int(labels.max() + 1) valid = np.zeros(n_labels).astype(np.int) # do some computation to find which regions are worth reporting for i in np.unique(labels[labels > - 1]): mean_c, var_c = 0., 0. subjects_i = subjects[labels == i] for subject_i in np.unique(subjects_i): confidence_i = 1 - np.prod(1 - confidence[(labels == i) * (subjects == subject_i)]) mean_c += confidence_i var_c += confidence_i * (1 - confidence_i) # If noise is too low the variance is 0: ill-defined: var_c = max(var_c, 1e-14) # if above threshold, get some information to create the landmarks if (stats.norm.sf(prevalence_threshold, mean_c, np.sqrt(var_c)) > prevalence_pval): coord = np.vstack([ coords[subjects == s][k] for (k, s) in zip( intrasubj[labels == i], subjects[labels == i])]) valid[i] = 1 coordinates.append(coord) subjs.append(subjects_i) pps.append(confidence[labels == i]) # relabel the ROIs maplabel = - np.ones(n_labels).astype(np.int) maplabel[valid > 0] = np.cumsum(valid[valid > 0]) - 1 # create the landmark regions structure LR = LandmarkRegions(domain, np.sum(valid), indiv_coord=coordinates, subjects=subjs, confidence=pps) return LR, maplabel