Source code for selectinf.algorithms.softmax

r"""

This module implements the softmax approximation for
a multivariate Gaussian truncated by affine constraints. The approximation
is an approximation of the normalizing constant in the 
likelihood.

"""


from copy import copy

import numpy as np
from regreg.api import smooth_atom

[docs]class softmax_objective(smooth_atom): r""" The softmax objective .. math:: z \mapsto \frac{1}{2} z^TQz + \sum_{i=1}^{m} \log \left(1 + \frac{1}{(b_i-A_i^T z) / s_i} \right) Notes ----- Recall Chernoff's approximation for $Z \sim N(0,I_{n \times n})$: .. math:: -\log P_{\mu}(AZ \leq b) \approx \inf_{z:Az \leq b} \frac{1}{2}\|z-\mu\|^2_2 = \inf_{z} I_K(z) + \frac{1}{2}\|z-\mu\|^2_2 where $I_K$ is the constraint for the set $K=\left\{z:Az \leq b \right\}.$ The softmax approximation is similar to Chernoff's approximation though it uses a soft max barrier function .. math:: \sum_{i=1}^{m}\log\left(1+\frac{1}{b_i-A_i^T z}\right). The softmax objective is .. math:: z \mapsto \frac{1}{2} z^TQz + \sum_{i=1}^{m}\log\left(1+\frac{1}{(b_i-A_i^T z) / s_i}\right). where $s_i$ are scalings and $Q$ is a precision matrix (i.e. inverse covariance). """ objective_template = r"""\text{softmax}_K\left(%(var)s\right)"""
[docs] def __init__(self, shape, precision, constraints, feasible_point, coef=1., offset=None, quadratic=None, initial=None): smooth_atom.__init__(self, shape, offset=offset, quadratic=quadratic, initial=initial, coef=coef) self.precision = precision self.coefs[:] = feasible_point # not sure we really need to copy self.constraints = constraints con_linear = self.constraints.linear_part self.scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))
[docs] def smooth_objective(self, param, mode='both', check_feasibility=False): """ Evaluate the smooth objective, computing its value, gradient or both. Parameters ---------- mean_param : ndarray The current parameter values. mode : str One of ['func', 'grad', 'both']. check_feasibility : bool If True, return `np.inf` when point is not feasible, i.e. when `mean_param` is not in the domain. Returns ------- If `mode` is 'func' returns just the objective value at `mean_param`, else if `mode` is 'grad' returns the gradient else returns both. """ param = self.apply_offset(param) con = self.constraints # shorthand slack = (con.offset - con.linear_part.dot(param)) / self.scaling BIG = 1e20 if mode in ['both', 'func']: if np.any(slack < 0): f = BIG else: f = 0.5 * (param * self.precision.dot(param)).sum() + np.log(1. + 1. / slack).sum() f = self.scale(f) if mode in ['both', 'grad']: dslack = (1. / (slack + self.scaling) - 1. / slack) g = self.precision.dot(param) - con.linear_part.T.dot(dslack) g = self.scale(g) if mode == 'both': return f, g elif mode == 'grad': return g elif mode == 'func': return f else: raise ValueError("mode incorrectly specified")