Source code for selectinf.randomized.group_lasso


from __future__ import print_function
import functools
from copy import copy

import numpy as np
from scipy.stats import norm as ndist

import regreg.api as rr

from .query import query, affine_gaussian_sampler

from .randomization import randomization
from ..base import restricted_estimator
from ..algorithms.debiased_lasso import (debiasing_matrix,
                                         pseudoinverse_debiasing_matrix)

#### High dimensional version
#### - parametric covariance
#### - Gaussian randomization

[docs]class group_lasso(query): r""" A class for the randomized LASSO for post-selection inference. The problem solved is .. math:: \text{minimize}_{\beta} \ell(\beta) + \sum_{i=1}^p \lambda_i |\beta_i\| - \omega^T\beta + \frac{\epsilon}{2} \|\beta\|^2_2 where $\lambda$ is `lam`, $\omega$ is a randomization generated below and the last term is a small ridge penalty. Each static method forms $\ell$ as well as the $\ell_1$ penalty. The generic class forms the remaining two terms in the objective. """
[docs] def __init__(self, loglike, groups, weights, ridge_term, randomizer, perturb=None): r""" Create a new post-selection object for the LASSO problem Parameters ---------- loglike : `regreg.smooth.glm.glm` A (negative) log-likelihood as implemented in `regreg`. feature_weights : np.ndarray Feature weights for L-1 penalty. If a float, it is brodcast to all features. ridge_term : float How big a ridge term to add? randomizer : object Randomizer -- contains representation of randomization density. perturb : np.ndarray Random perturbation subtracted as a linear term in the objective function. """ self.loglike = loglike self.nfeature = p = self.loglike.shape[0] self.ridge_term = ridge_term self.penalty = rr.group_lasso(groups, weights=weights, lagrange=1.) self._initial_omega = perturb # random perturbation self.randomizer = randomizer
[docs] def fit(self, solve_args={'tol': 1.e-12, 'min_its': 50}, perturb=None): """ Fit the randomized lasso using `regreg`. Parameters ---------- solve_args : keyword args Passed to `regreg.problems.simple_problem.solve`. Returns ------- signs : np.float Support and non-zero signs of randomized lasso solution. """ p = self.nfeature (self.initial_soln, self.initial_subgrad) = self._solve_randomized_problem( perturb=perturb, solve_args=solve_args) # assuming solution is non-zero here! active = [] active_dirs = {} unpenalized = [] overall = np.ones(p, np.bool) ordered_groups = [] ordered_opt = [] ordered_vars = [] tol = 1.e-6 for g in sorted(np.unique(self.penalty.groups)): group = self.penalty.groups == g soln = self.initial_soln if np.linalg.norm(soln[group]) * tol * np.linalg.norm(soln): ordered_groups.append(g) ordered_vars.extend(np.nonzero(group)[0]) if self.penalty.weights[g] == 0: unpenalized.append(g) ordered_opt.append(soln[group]) else: active.append(g) dir = soln[group] / np.linalg.norm(soln[group]) active_dirs[g] = dir ordered_opt.append(np.linalg.norm(soln[group]) - self.penalty.weights[g]) else: overall[group] = False self.selection_variable = {'directions': active_dirs, 'active_groups':active} self._ordered_groups = ordered_groups # initial state for opt variables self.observed_opt_state = np.hstack(ordered_opt) _beta_unpenalized = restricted_estimator(self.loglike, overall, solve_args=solve_args) beta_bar = np.zeros(p) beta_bar[overall] = _beta_unpenalized self._beta_full = beta_bar # form linear part num_opt_var = self.observed_opt_state.shape[0] # setup hessian and ridge term X, y = self.loglike.data W = self._W = self.loglike.saturated_loss.hessian(X.dot(beta_bar)) opt_linear = np.dot(X.T, X[:, ordered_vars] * W[:, None]) # set the observed score (data dependent) state # observed_score_state is # \nabla \ell(\bar{\beta}_E) + Q(\bar{\beta}_E) \bar{\beta}_E # in linear regression this is _ALWAYS_ -X^TY # # should be asymptotically equivalent to # \nabla \ell(\beta^*) + Q(\beta^*)\beta^* self.observed_score_state = -opt_linear.dot(_beta_unpenalized) self.observed_score_state[~overall] += self.loglike.smooth_objective(beta_bar, 'grad')[~overall] # now adjust for ridge term for i, var in enumerate(ordered_vars): opt_linear[var, i] += self.ridge_term opt_offset = self.initial_subgrad # for group LASSO, we will have # a different sampler for each group # based on conditioning on all scalings # except that group # means targets will also need to say what group # they are "targeting"... self._samplers = {} # dispersion will have to be adjusted for data splitting dispersion = 1. (prec_opt_linear, logdens_linear) = self._get_precision_opt_linear(opt_linear, ordered_vars, dispersion) for group, dens_info in _reference_density_info(soln, ordered_groups, ordered_vars, opt_linear, opt_offset, self.observed_score_state, self.initial_subgrad, self.penalty, prec_opt_linear).items(): (dir_g, idx_g, implied_mean, implied_variance, log_det, log_cond_density) = dens_info group_idx = self.penalty.groups == group initial_scaling = np.linalg.norm(soln[group]) - self.penalty.weights[group] sampler = polynomial_gaussian_sampler(implied_mean, implied_variance, initial_scaling, self.observed_score_state, log_cond_density, log_det, (np.atleast_2d(logdens_linear.T[:,idx_g].dot(dir_g).T), opt_offset)) self._samplers[group] = sampler self._setup = True return self.selection_variable
[docs] def summary(self, observed_target, group_assignments, target_cov, target_score_cov, alternatives, parameter=None, level=0.9, ndraw=10000, compute_intervals=False): # for smoke test, let's just # use the first non-zero group for everything if parameter is None: parameter = np.zeros_like(observed_target) pvalues = np.zeros_like(observed_target) pivots = np.zeros_like(observed_target) intervals = np.zeros((parameter.shape[0], 2)) for group in np.unique(group_assignments): group_idx = group_assignments == group (pvalues_, pivots_, intervals_) = self._inference_for_target( observed_target[group_idx], group, target_cov[group_idx][:, group_idx], target_score_cov[group_idx], [alternatives[i] for i in np.nonzero(group_idx)[0]], parameter=parameter[group_idx], level=level, ndraw=ndraw, compute_intervals=compute_intervals) pvalues[group_idx] = pvalues_ pivots[group_idx] = pivots_ intervals[group_idx] = np.array(intervals_) return pvalues, pivots, intervals
def _inference_for_target(self, observed_target, group, target_cov, target_score_cov, alternatives, opt_sample=None, target_sample=None, parameter=None, level=0.9, ndraw=10000, compute_intervals=False): """ Produce p-values and confidence intervals for targets of model including selected features Parameters ---------- target : one of ['selected', 'full'] features : np.bool Binary encoding of which features to use in final model and targets. parameter : np.array Hypothesized value for parameter -- defaults to 0. level : float Confidence level. ndraw : int (optional) Defaults to 1000. compute_intervals : bool Compute confidence intervals? dispersion : float (optional) Use a known value for dispersion, or Pearson's X^2? """ sampler = self._samplers[group] if parameter is None: parameter = np.zeros_like(observed_target) if opt_sample is None: opt_sample, logW = sampler.sample(ndraw) else: ndraw = opt_sample.shape[0] pivots = sampler.coefficient_pvalues(observed_target, target_cov, target_score_cov, parameter=parameter, sample=(opt_sample, logW), normal_sample=target_sample, alternatives=alternatives) if not np.all(parameter == 0): pvalues = sampler.coefficient_pvalues(observed_target, target_cov, target_score_cov, parameter=np.zeros_like(parameter), sample=(opt_sample, logW), normal_sample=target_sample, alternatives=alternatives) else: pvalues = pivots intervals = None if compute_intervals: intervals = sampler.confidence_intervals(observed_target, target_cov, target_score_cov, sample=(opt_sample, logW), normal_sample=target_sample, level=level) return pivots, pvalues, intervals def _get_precision_opt_linear(self, opt_linear, variables, dispersion=1): """ Precision of randomization times columns of restricted Hessian """ _, prec = self.randomizer.cov_prec if np.asarray(prec).shape in [(), (0,)]: value = prec * opt_linear / dispersion else: value = prec.dot(opt_linear) / dispersion cond_precision = opt_linear.T.dot(value) cond_cov = np.linalg.inv(cond_precision) logdens_linear = cond_cov.dot(value.T) * dispersion # is this last dispersion correct? return value, logdens_linear def _solve_randomized_problem(self, perturb=None, solve_args={'tol': 1.e-12, 'min_its': 50}): # take a new perturbation if supplied if perturb is not None: self._initial_omega = perturb if self._initial_omega is None: self._initial_omega = self.randomizer.sample() quad = rr.identity_quadratic(self.ridge_term, 0, -self._initial_omega, 0) problem = rr.simple_problem(self.loglike, self.penalty) initial_soln = problem.solve(quad, **solve_args) initial_subgrad = -(self.loglike.smooth_objective(initial_soln, 'grad') + quad.objective(initial_soln, 'grad')) return initial_soln, initial_subgrad
[docs] @staticmethod def gaussian(X, Y, groups, weights, sigma=1., quadratic=None, ridge_term=None, randomizer_scale=None): r""" Squared-error LASSO with feature weights. Objective function is (before randomization) .. math:: \beta \mapsto \frac{1}{2} \|Y-X\beta\|^2_2 + \sum_{i=1}^p \lambda_i |\beta_i| where $\lambda$ is `feature_weights`. The ridge term is determined by the Hessian and `np.std(Y)` by default, as is the randomizer scale. Parameters ---------- X : ndarray Shape (n,p) -- the design matrix. Y : ndarray Shape (n,) -- the response. feature_weights: [float, sequence] Penalty weights. An intercept, or other unpenalized features are handled by setting those entries of `feature_weights` to 0. If `feature_weights` is a float, then all parameters are penalized equally. sigma : float (optional) Noise variance. Set to 1 if `covariance_estimator` is not None. This scales the loglikelihood by `sigma**(-2)`. quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) An optional quadratic term to be added to the objective. Can also be a linear term by setting quadratic coefficient to 0. ridge_term : float How big a ridge term to add? randomizer_scale : float Scale for IID components of randomizer. randomizer : str One of ['laplace', 'logistic', 'gaussian'] Returns ------- L : `selection.randomized.lasso.lasso` """ loglike = rr.glm.gaussian(X, Y, coef=1. / sigma ** 2, quadratic=quadratic) n, p = X.shape mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.)) randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) if sigma != 1: blah weights = copy(weights) for k in weights.keys(): weights[k] = weights[k] / sigma**2 return group_lasso(loglike, groups, weights, ridge_term, randomizer)
[docs] @staticmethod def logistic(X, successes, groups, weights, trials=None, quadratic=None, ridge_term=None, randomizer_scale=None): r""" Logistic LASSO with feature weights (before randomization) .. math:: \beta \mapsto \ell(X\beta) + \sum_{i=1}^p \lambda_i |\beta_i| where $\ell$ is the negative of the logistic log-likelihood (half the logistic deviance) and $\lambda$ is `feature_weights`. Parameters ---------- X : ndarray Shape (n,p) -- the design matrix. successes : ndarray Shape (n,) -- response vector. An integer number of successes. For data that is proportions, multiply the proportions by the number of trials first. feature_weights: [float, sequence] Penalty weights. An intercept, or other unpenalized features are handled by setting those entries of `feature_weights` to 0. If `feature_weights` is a float, then all parameters are penalized equally. trials : ndarray (optional) Number of trials per response, defaults to ones the same shape as Y. quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) An optional quadratic term to be added to the objective. Can also be a linear term by setting quadratic coefficient to 0. ridge_term : float How big a ridge term to add? randomizer_scale : float Scale for IID components of randomizer. randomizer : str One of ['laplace', 'logistic', 'gaussian'] Returns ------- L : `selection.randomized.lasso.lasso` """ n, p = X.shape loglike = rr.glm.logistic(X, successes, trials=trials, quadratic=quadratic) mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: ridge_term = np.std(Y) * np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) return group_lasso(loglike, groups, weights, ridge_term, randomizer)
[docs] @staticmethod def coxph(X, times, status, groups, weights, quadratic=None, ridge_term=None, randomizer_scale=None): r""" Cox proportional hazards LASSO with feature weights. Objective function is (before randomization) .. math:: \beta \mapsto \ell^{\text{Cox}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i| where $\ell^{\text{Cox}}$ is the negative of the log of the Cox partial likelihood and $\lambda$ is `feature_weights`. Uses Efron's tie breaking method. Parameters ---------- X : ndarray Shape (n,p) -- the design matrix. times : ndarray Shape (n,) -- the survival times. status : ndarray Shape (n,) -- the censoring status. feature_weights: [float, sequence] Penalty weights. An intercept, or other unpenalized features are handled by setting those entries of `feature_weights` to 0. If `feature_weights` is a float, then all parameters are penalized equally. covariance_estimator : optional If None, use the parameteric covariance estimate of the selected model. quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) An optional quadratic term to be added to the objective. Can also be a linear term by setting quadratic coefficient to 0. ridge_term : float How big a ridge term to add? randomizer_scale : float Scale for IID components of randomizer. randomizer : str One of ['laplace', 'logistic', 'gaussian'] Returns ------- L : `selection.randomized.lasso.lasso` """ loglike = coxph_obj(X, times, status, quadratic=quadratic) # scale for randomization seems kind of meaningless here... mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: ridge_term = np.std(times) * np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(Y) * np.sqrt(n / (n - 1.)) randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) return group_lasso(loglike, groups, weights, ridge_term, randomizer)
[docs] @staticmethod def poisson(X, counts, groups, weights, quadratic=None, ridge_term=None, randomizer_scale=None): r""" Poisson log-linear LASSO with feature weights. Objective function is (before randomization) .. math:: \beta \mapsto \ell^{\text{Poisson}}(\beta) + \sum_{i=1}^p \lambda_i |\beta_i| where $\ell^{\text{Poisson}}$ is the negative of the log of the Poisson likelihood (half the deviance) and $\lambda$ is `feature_weights`. Parameters ---------- X : ndarray Shape (n,p) -- the design matrix. counts : ndarray Shape (n,) -- the response. feature_weights: [float, sequence] Penalty weights. An intercept, or other unpenalized features are handled by setting those entries of `feature_weights` to 0. If `feature_weights` is a float, then all parameters are penalized equally. quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) An optional quadratic term to be added to the objective. Can also be a linear term by setting quadratic coefficient to 0. ridge_term : float How big a ridge term to add? randomizer_scale : float Scale for IID components of randomizer. randomizer : str One of ['laplace', 'logistic', 'gaussian'] Returns ------- L : `selection.randomized.lasso.lasso` """ n, p = X.shape loglike = rr.glm.poisson(X, counts, quadratic=quadratic) # scale for randomizer seems kind of meaningless here... mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: ridge_term = np.std(counts) * np.sqrt(mean_diag) / np.sqrt(n - 1) if randomizer_scale is None: randomizer_scale = np.sqrt(mean_diag) * 0.5 * np.std(counts) * np.sqrt(n / (n - 1.)) randomizer = randomization.isotropic_gaussian((p,), randomizer_scale) return group_lasso(loglike, groups, weights, ridge_term, randomizer)
[docs] @staticmethod def sqrt_lasso(X, Y, groups, weights, quadratic=None, ridge_term=None, randomizer_scale=None, solve_args={'min_its': 200}, perturb=None): r""" Use sqrt-LASSO to choose variables. Objective function is (before randomization) .. math:: \beta \mapsto \|Y-X\beta\|_2 + \sum_{i=1}^p \lambda_i |\beta_i| where $\lambda$ is `feature_weights`. After solving the problem treat as if `gaussian` with implied variance and choice of multiplier. See arxiv.org/abs/1504.08031 for details. Parameters ---------- X : ndarray Shape (n,p) -- the design matrix. Y : ndarray Shape (n,) -- the response. feature_weights: [float, sequence] Penalty weights. An intercept, or other unpenalized features are handled by setting those entries of `feature_weights` to 0. If `feature_weights` is a float, then all parameters are penalized equally. quadratic : `regreg.identity_quadratic.identity_quadratic` (optional) An optional quadratic term to be added to the objective. Can also be a linear term by setting quadratic coefficient to 0. covariance : str One of 'parametric' or 'sandwich'. Method used to estimate covariance for inference in second stage. solve_args : dict Arguments passed to solver. ridge_term : float How big a ridge term to add? randomizer_scale : float Scale for IID components of randomizer. randomizer : str One of ['laplace', 'logistic', 'gaussian'] Returns ------- L : `selection.randomized.lasso.lasso` Notes ----- Unlike other variants of LASSO, this solves the problem on construction as the active set is needed to find equivalent gaussian LASSO. Assumes parametric model is correct for inference, i.e. does not accept a covariance estimator. """ n, p = X.shape if np.asarray(feature_weights).shape == (): feature_weights = np.ones(p) * feature_weights mean_diag = np.mean((X ** 2).sum(0)) if ridge_term is None: ridge_term = np.sqrt(mean_diag) / (n - 1) if randomizer_scale is None: randomizer_scale = 0.5 * np.sqrt(mean_diag) / np.sqrt(n - 1) if perturb is None: perturb = np.random.standard_normal(p) * randomizer_scale randomQ = rr.identity_quadratic(ridge_term, 0, -perturb, 0) # a ridge + linear term if quadratic is not None: totalQ = randomQ + quadratic else: totalQ = randomQ soln, sqrt_loss = solve_sqrt_lasso(X, Y, weights=feature_weights, quadratic=totalQ, solve_args=solve_args, force_fat=True) denom = np.linalg.norm(Y - X.dot(soln)) loglike = rr.glm.gaussian(X, Y) randomizer = randomization.isotropic_gaussian((p,), randomizer_scale * denom) weights = copy(weights) for k in weights.keys(): weights[k] = weights[k] * denom obj = lasso(loglike, groups, weights, ridge_term * denom, randomizer, perturb=perturb * denom) obj._sqrt_soln = soln return obj
# private functions def _reference_density_info(soln, ordered_groups, # ordering is used in assumptions about columns opt_linear ordered_variables, opt_linear, opt_offset, observed_score_state, observed_subgrad, group_lasso_penalty, # randomization precision times opt_linear precision_opt_linear, tol=1.e-6): ''' Parameters ---------- Compute generalized eigenvalues above and return function to evaluate jacobian as a function of $r_g=\|z_g\|_2$ fixing everything in the optimization variables except $r_g$. Above, $A_0$ is the Hessian of loss evaluated at an appropriate point. ''' pen = group_lasso_penalty # shorthand nz = soln != 0 # nonzero nnz = nz.sum() # num nonzero Hr = np.zeros((nnz, nnz)) # restricted hessian nz_groups = [] for group in ordered_groups: group_idx = pen.groups == group group_soln = soln[pen.groups == group] ng = group_idx.sum() group_direction = u_g = group_soln / np.linalg.norm(group_soln) group_norm = r_g = np.linalg.norm(group_soln) # really r_g^* group_weight = lambda_g = pen.weights[group] fraction = np.sqrt(r_g / (lambda_g + r_g)) # one of the blocks in D_0^{1/2} group_block = (np.identity(ng) * fraction + (1 - fraction) * np.multiply.outer(u_g, u_g)) group_P = np.identity(ng) - np.multiply.outer(u_g, u_g) nz_groups.append((group, # a group index g group_idx, # indices where group==idx group_block, group_P, r_g, lambda_g, group_direction) ) # setup the block hessian Hr=D_0^{1/2}A_0D_0^{1/2} ctr_g = 0 for group_g in nz_groups: which_idx_g, sqrt_g = group_g[1], group_g[2] idx_g = slice(ctr_g, ctr_g + which_idx_g.sum()) ctr_h = 0 for group_h in nz_groups: which_idx_h, sqrt_h = group_h[1], group_h[2] idx_h = slice(ctr_h, ctr_h + which_idx_h.sum()) H_hg = opt_linear[which_idx_h][:,idx_g] # E columns of Hessian + epsilon I assert(np.allclose(H_hg, opt_linear[which_idx_g][:,idx_h].T)) if group_h[0] == group_g[0]: # subtract the identity matrix H_hg -= np.identity(H_hg.shape[0]) Hr[idx_g][:,idx_h] += sqrt_h.dot(H_hg).dot(sqrt_g).T # multiply left and right by D_0^{1/2} ctr_h += which_idx_h.sum() ctr_g += which_idx_g.sum() implied_precision = np.dot(opt_linear.T, precision_opt_linear) # compute (I+Hr)^{-1}Hr final_matrix = np.linalg.inv(Hr) ctr_g = 0 ref_dens_info = {} for group_g in nz_groups: which_g, which_idx_g, _, P_g, r_g, lambda_g, u_g = group_g idx_g = slice(ctr_g, ctr_g + which_idx_g.sum()) if which_idx_g.sum() > 1: block_g = final_matrix[idx_g][:,idx_g] block_g = P_g.dot(block_g).dot(P_g) # \tilde{\gamma}'s eigvals_g = np.linalg.eigvalsh(block_g)[1:] # factors in the determinant factors_g = lambda_g / ((eigvals_g + 1) * r_g) k_g = which_idx_g.sum() def logdet_g(factors_g, r_g, k_g, lambda_g, r): r = np.reshape(r, (-1)) num = np.multiply.outer(factors_g, r - r_g) den = np.add.outer(lambda_g * np.ones_like(factors_g), r) return np.squeeze(np.log(1 + num / den).sum(0) + + np.log(lambda_g + r) * (k_g - 1)) logdet_g = functools.partial(logdet_g, factors_g, r_g, k_g, lambda_g) else: logdet_g = lambda r: np.zeros_like(r).reshape(-1) implied_variance = 1 / (np.dot(implied_precision[idx_g][:,idx_g], u_g) * u_g).sum() # zero out this group's coordinate in the solution soln_idx = soln.copy()[ordered_variables] soln_idx[idx_g] = 0 offset = (observed_subgrad + opt_linear.dot(soln_idx)) direction = precision_opt_linear[:,idx_g].dot(u_g) num_implied_mean = -((observed_score_state + offset) * direction).sum() implied_mean = num_implied_mean * implied_variance def log_cond_density(offset, direction, implied_variance, log_det, r, score_state): r = np.reshape(r, (-1,)) num_implied_mean = ((score_state + offset) * direction).sum() implied_mean = num_implied_mean * implied_variance return np.squeeze( (log_det(r) - 0.5 * (r - implied_mean)**2 / implied_variance)) log_cond_density_g = functools.partial(log_cond_density, offset, direction, implied_variance, logdet_g) ref_dens_info[which_g] = (u_g, idx_g, implied_mean, implied_variance, logdet_g, log_cond_density_g) ctr_g += which_idx_g.sum() return ref_dens_info
[docs]class polynomial_gaussian_sampler(affine_gaussian_sampler): """ 1-dimensional Gaussian density restricted to [0,\infty) times a polynomial """
[docs] def __init__(self, implied_mean, implied_covariance, initial_point, observed_score_state, log_cond_density, log_det, logdens_transform, # describes how score enters log_density. selection_info=None, useC=False): self.mean = implied_mean self.covariance = np.zeros((1, 1)) self.covariance[0, 0] = implied_covariance self._log_cond_density = log_cond_density self._log_det = log_det self.initial_point = initial_point self.observed_score_state = observed_score_state self.selection_info = selection_info self.logdens_transform = logdens_transform
[docs] def sample(self, ndraw): ''' Sample from a Gaussian truncated at zero with our mean and covariance, but give weight based on `self.log_det` Parameters ---------- ndraw : int How long a chain to return? ''' mean, variance = self.mean, self.covariance[0,0] sd = np.sqrt(variance) Zscore = mean / sd selection_prob = ndist.sf(-Zscore) truncated_Z = ndist.ppf((1 - selection_prob) + selection_prob * np.random.sample(ndraw)) truncated_normal = truncated_Z * sd + mean logW = self._log_det(truncated_normal) return truncated_normal.reshape((-1,1)), logW
[docs] def selective_MLE(self, observed_target, target_cov, target_score_cov, # initial (observed) value of optimization variables -- # used as a feasible point. # precise value used only for independent estimator init_soln, solve_args={'tol':1.e-12}, level=0.9): raise NotImplementedError
[docs] def reparam_map(self, parameter_target, observed_target, target_cov, target_score_cov, init_soln, solve_args={'tol':1.e-12}, useC=True): raise NotImplementedError
def _log_density_ray(self, candidate, direction, nuisance, gaussian_sample, opt_sample): value = affine_gaussian_sampler._log_density_ray(self, candidate, direction, nuisance, gaussian_sample, opt_sample) value += self._log_det(opt_sample) return value
# functions to construct targets of inference # and covariance with score representation
[docs]def selected_targets(loglike, W, active_groups, penalty, sign_info={}, dispersion=None, solve_args={'tol': 1.e-12, 'min_its': 50}): X, y = loglike.data n, p = X.shape features = [] group_assignments = [] for group in active_groups: group_idx = penalty.groups == group features.extend(np.nonzero(group_idx)[0]) group_assignments.extend([group] * group_idx.sum()) Xfeat = X[:, features] Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) observed_target = restricted_estimator(loglike, features, solve_args=solve_args) cov_target = np.linalg.inv(Qfeat) _score_linear = -Xfeat.T.dot(W[:, None] * X).T crosscov_target_score = _score_linear.dot(cov_target) alternatives = ['twosided'] * len(features) if dispersion is None: # use Pearson's X^2 dispersion = ((y - loglike.saturated_loss.mean_function( Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) return (observed_target, group_assignments, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives)
[docs]def full_targets(loglike, W, active_groups, penalty, dispersion=None, solve_args={'tol': 1.e-12, 'min_its': 50}): X, y = loglike.data n, p = X.shape features = [] group_assignments = [] for group in active_groups: group_idx = penalty.groups == group features.extend(np.nonzero(group_idx)[0]) group_assignments.extend([group] * group_idx.sum()) # target is one-step estimator Qfull = X.T.dot(W[:, None] * X) Qfull_inv = np.linalg.inv(Qfull) full_estimator = loglike.solve(**solve_args) cov_target = Qfull_inv[features][:, features] observed_target = full_estimator[features] crosscov_target_score = np.zeros((p, cov_target.shape[0])) crosscov_target_score[features] = -np.identity(cov_target.shape[0]) if dispersion is None: # use Pearson's X^2 dispersion = (((y - loglike.saturated_loss.mean_function(X.dot(full_estimator))) ** 2 / W).sum() / (n - p)) alternatives = ['twosided'] * len(features) return (observed_target, group_assignments, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives)
[docs]def debiased_targets(loglike, W, active_groups, penalty, sign_info={}, dispersion=None, approximate_inverse='JM', debiasing_args={}): X, y = loglike.data n, p = X.shape features = [] group_assignments = [] for group in active_groups: group_idx = penalty.groups == group features.extend(np.nonzero(group_idx)[0]) group_assignments.extend([group] * group_idx.sum()) # relevant rows of approximate inverse if approximate_inverse == 'JM': Qinv_hat = np.atleast_2d(debiasing_matrix(X * np.sqrt(W)[:, None], features, **debiasing_args)) / n else: Qinv_hat = np.atleast_2d(pseudoinverse_debiasing_matrix(X * np.sqrt(W)[:, None], features, **debiasing_args)) problem = rr.simple_problem(loglike, penalty) nonrand_soln = problem.solve() G_nonrand = loglike.smooth_objective(nonrand_soln, 'grad') observed_target = nonrand_soln[features] - Qinv_hat.dot(G_nonrand) if p > n: M1 = Qinv_hat.dot(X.T) cov_target = (M1 * W[None, :]).dot(M1.T) crosscov_target_score = -(M1 * W[None, :]).dot(X).T else: Qfull = X.T.dot(W[:, None] * X) cov_target = Qinv_hat.dot(Qfull.dot(Qinv_hat.T)) crosscov_target_score = -Qinv_hat.dot(Qfull).T if dispersion is None: # use Pearson's X^2 Xfeat = X[:, features] Qrelax = Xfeat.T.dot(W[:, None] * Xfeat) relaxed_soln = nonrand_soln[features] - np.linalg.inv(Qrelax).dot(G_nonrand[features]) dispersion = (((y - loglike.saturated_loss.mean_function(Xfeat.dot(relaxed_soln)))**2 / W).sum() / (n - len(features))) alternatives = ['twosided'] * len(features) return (observed_target, group_assignments, cov_target * dispersion, crosscov_target_score.T * dispersion, alternatives)
[docs]def form_targets(target, loglike, W, active_groups, penalty, **kwargs): _target = {'full':full_targets, 'selected':selected_targets, 'debiased':debiased_targets}[target] return _target(loglike, W, features, penalty, **kwargs)