import logging
from typing import Dict
import numpy as np
from numpy.typing import NDArray
from mumott.methods.residual_calculators.base_residual_calculator import ResidualCalculator
from .base_loss_function import LossFunction
logger = logging.getLogger(__name__)
[docs]class SquaredLoss(LossFunction):
r"""Class object for obtaining the squared loss function and
gradient from a given :ref:`residual_calculator <residual_calculators>`.
This loss function can be written as :math:`L(r(x, d)) = 0.5 r(x, d)^2`, where :math:`r` is the
residual, a function of :math:`x`, the optimization coefficients, and :math:`d`, the data.
The gradient with respect to :math:`x` is then :math:`\frac{\partial r}{\partial x}`.
The partial derivative of :math:`r` with respect to :math:`x` is the responsibility
of the :attr:`residual_calculator` to compute.
Generally speaking, the squared loss function is easy to compute and has a well-behaved
gradient, but it is not robust against outliers in the data. Using weights to normalize
residuals by the variance can mitigate this somewhat.
Parameters
----------
residual_calculator : ResidualCalculator
The :ref:`residual calculator instance <residual_calculators>` from which the
residuals, weights, and gradient terms are obtained.
use_weights : bool
Whether to use weighting in the computation of the residual norm and gradient.
Default is ``False``.
preconditioner : np.ndarray
A preconditioner to be applied to the gradient. Must have the same shape as
:attr:`residual_calculator.coefficients` or it must be possible to broadcast by multiplication.
residual_norm_multiplier : float
A multiplier that is applied to the residual norm and gradient. Useful in cases where
a very small or large loss function value changes the optimizer behaviour.
"""
def __init__(self,
residual_calculator: ResidualCalculator,
use_weights: bool = False,
preconditioner: NDArray[float] = None,
residual_norm_multiplier: float = 1):
super().__init__(residual_calculator, use_weights, preconditioner, residual_norm_multiplier)
def _get_residual_norm_internal(self, get_gradient: bool = False, gradient_part: str = None) -> Dict:
""" Gets the residual norm, and if needed,
the gradient, using the attached :attr:`residual_calculator`.
Parameters
----------
get_gradient
Whether to return the gradient. Default is ``False``.
gradient_part
Used for the zonal harmonics resonstructions to determine what part of the gradient is
being calculated. Default is None.
Returns
-------
A ``dict`` with two entries, ``residual_norm`` and ``gradient``.
If ``get_gradient`` is false, its value will be ``None``.
"""
residual_calculator_output = self._residual_calculator.get_residuals(
get_gradient=get_gradient, get_weights=self._use_weights, gradient_part=gradient_part)
residuals = residual_calculator_output['residuals']
if self.use_weights:
# weights (1/variance) need to be applied since they depend on the loss function
residual_norm = 0.5 * np.einsum(
'ijkh, ijkh, ijkh -> ...', residuals, residuals, residual_calculator_output['weights'])
else:
residual_norm = 0.5 * np.einsum(
'ijkh, ijkh -> ...', residuals, residuals)
if residual_norm < 1:
logger.warning(f'The residual norm value ({residual_norm}) is < 1.'
' Note that some optimizers change their convergence criteria for'
' loss functions < 1!')
return dict(residual_norm=residual_norm, gradient=residual_calculator_output['gradient'])
[docs] def get_estimate_of_lifschitz_constant(self) -> float:
"""
Calculate an estimate of the Lifschitz constant of this cost function. Used to determine a
safe step-size for certain optimization algorithms.
Returns
-------
lifschitz_constant
Lifschitz constant.
"""
matrix_norm = self._residual_calculator.get_estimate_of_matrix_norm()
return 2 / matrix_norm
@property
def _function_as_str(self) -> str:
""" Should return a string representation of the associated loss function. """
return 'L(r) = r ** 2'
@property
def _function_as_tex(self) -> str:
""" Should return a string representation of the associated loss function
in MathJax-renderable TeX."""
return r'$L(r) = r^2$'