Source code for mumott.optimization.loss_functions.squared_loss

import logging
from typing import Dict

import numpy as np
from numpy.typing import NDArray

from mumott.methods.residual_calculators.base_residual_calculator import ResidualCalculator
from .base_loss_function import LossFunction

logger = logging.getLogger(__name__)

[docs]class SquaredLoss(LossFunction): r"""Class object for obtaining the squared loss function and gradient from a given :ref:`residual_calculator <residual_calculators>`. This loss function can be written as :math:`L(r(x, d)) = 0.5 r(x, d)^2`, where :math:`r` is the residual, a function of :math:`x`, the optimization coefficients, and :math:`d`, the data. The gradient with respect to :math:`x` is then :math:`\frac{\partial r}{\partial x}`. The partial derivative of :math:`r` with respect to :math:`x` is the responsibility of the :attr:`residual_calculator` to compute. Generally speaking, the squared loss function is easy to compute and has a well-behaved gradient, but it is not robust against outliers in the data. Using weights to normalize residuals by the variance can mitigate this somewhat. Parameters ---------- residual_calculator : ResidualCalculator The :ref:`residual calculator instance <residual_calculators>` from which the residuals, weights, and gradient terms are obtained. use_weights : bool Whether to use weighting in the computation of the residual norm and gradient. Default is ``False``. preconditioner : np.ndarray A preconditioner to be applied to the gradient. Must have the same shape as :attr:`residual_calculator.coefficients` or it must be possible to broadcast by multiplication. residual_norm_multiplier : float A multiplier that is applied to the residual norm and gradient. Useful in cases where a very small or large loss function value changes the optimizer behaviour. """ def __init__(self, residual_calculator: ResidualCalculator, use_weights: bool = False, preconditioner: NDArray[float] = None, residual_norm_multiplier: float = 1): super().__init__(residual_calculator, use_weights, preconditioner, residual_norm_multiplier) def _get_residual_norm_internal(self, get_gradient: bool = False, gradient_part: str = None) -> Dict: """ Gets the residual norm, and if needed, the gradient, using the attached :attr:`residual_calculator`. Parameters ---------- get_gradient Whether to return the gradient. Default is ``False``. gradient_part Used for the zonal harmonics resonstructions to determine what part of the gradient is being calculated. Default is None. Returns ------- A ``dict`` with two entries, ``residual_norm`` and ``gradient``. If ``get_gradient`` is false, its value will be ``None``. """ residual_calculator_output = self._residual_calculator.get_residuals( get_gradient=get_gradient, get_weights=self._use_weights, gradient_part=gradient_part) residuals = residual_calculator_output['residuals'] if self.use_weights: # weights (1/variance) need to be applied since they depend on the loss function residual_norm = 0.5 * np.einsum( 'ijkh, ijkh, ijkh -> ...', residuals, residuals, residual_calculator_output['weights']) else: residual_norm = 0.5 * np.einsum( 'ijkh, ijkh -> ...', residuals, residuals) if residual_norm < 1: logger.warning(f'The residual norm value ({residual_norm}) is < 1.' ' Note that some optimizers change their convergence criteria for' ' loss functions < 1!') return dict(residual_norm=residual_norm, gradient=residual_calculator_output['gradient'])
[docs] def get_estimate_of_lifschitz_constant(self) -> float: """ Calculate an estimate of the Lifschitz constant of this cost function. Used to determine a safe step-size for certain optimization algorithms. Returns ------- lifschitz_constant Lifschitz constant. """ matrix_norm = self._residual_calculator.get_estimate_of_matrix_norm() return 2 / matrix_norm
@property def _function_as_str(self) -> str: """ Should return a string representation of the associated loss function. """ return 'L(r) = r ** 2' @property def _function_as_tex(self) -> str: """ Should return a string representation of the associated loss function in MathJax-renderable TeX.""" return r'$L(r) = r^2$'