Source code for selectinf.algorithms.softmax

r"""

This module implements the softmax approximation for
a multivariate Gaussian truncated by affine constraints. The approximation
is an approximation of the normalizing constant in the 
likelihood.

"""


from copy import copy

import numpy as np
from regreg.api import smooth_atom

[docs]class softmax_objective(smooth_atom):

    r"""
    The softmax objective

    .. math::

         z \mapsto \frac{1}{2} z^TQz + 
         \sum_{i=1}^{m} \log \left(1 + 
         \frac{1}{(b_i-A_i^T z) / s_i} \right)

    Notes
    -----

    Recall Chernoff's approximation for $Z \sim N(0,I_{n \times n})$:

    .. math::

        -\log P_{\mu}(AZ \leq b) \approx \inf_{z:Az \leq b} 
        \frac{1}{2}\|z-\mu\|^2_2
        = \inf_{z} I_K(z) +  \frac{1}{2}\|z-\mu\|^2_2

    where $I_K$ is the constraint for the set $K=\left\{z:Az \leq b \right\}.$

    The softmax approximation is similar to Chernoff's approximation
    though it uses a soft max barrier function 

    .. math::

         \sum_{i=1}^{m}\log\left(1+\frac{1}{b_i-A_i^T z}\right).

    The softmax objective is

    .. math::

         z \mapsto \frac{1}{2} z^TQz + 
         \sum_{i=1}^{m}\log\left(1+\frac{1}{(b_i-A_i^T z) / s_i}\right).

    where $s_i$ are scalings and $Q$ is a precision matrix (i.e. inverse covariance).


    """

    objective_template = r"""\text{softmax}_K\left(%(var)s\right)"""

[docs]    def __init__(self, 
                 shape,
                 precision,
                 constraints,
                 feasible_point,
                 coef=1., 
                 offset=None,
                 quadratic=None,
                 initial=None):

        smooth_atom.__init__(self,
                             shape,
                             offset=offset,
                             quadratic=quadratic,
                             initial=initial,
                             coef=coef)
        self.precision = precision
        self.coefs[:] = feasible_point

        # not sure we really need to copy
        
        self.constraints = constraints
        con_linear = self.constraints.linear_part
        self.scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T)))

[docs]    def smooth_objective(self, param, mode='both', check_feasibility=False):
        """

        Evaluate the smooth objective, computing its value, gradient or both.

        Parameters
        ----------

        mean_param : ndarray
            The current parameter values.

        mode : str
            One of ['func', 'grad', 'both']. 

        check_feasibility : bool
            If True, return `np.inf` when
            point is not feasible, i.e. when `mean_param` is not
            in the domain.

        Returns
        -------

        If `mode` is 'func' returns just the objective value 
        at `mean_param`, else if `mode` is 'grad' returns the gradient
        else returns both.
        """
        
        param = self.apply_offset(param)

        con = self.constraints # shorthand
        slack = (con.offset - con.linear_part.dot(param)) / self.scaling

        BIG = 1e20
        if mode in ['both', 'func']:
            if np.any(slack < 0):
                f = BIG
            else:
                f = 0.5 * (param * self.precision.dot(param)).sum() + np.log(1. + 1. / slack).sum()
            f = self.scale(f)

        if mode in ['both', 'grad']:
            dslack = (1. / (slack + self.scaling) - 1. / slack)
            g = self.precision.dot(param) - con.linear_part.T.dot(dslack)
            g = self.scale(g)

        if mode == 'both':
            return f, g
        elif mode == 'grad':
            return g
        elif mode == 'func':
            return f
        else:
            raise ValueError("mode incorrectly specified")