Source code for mumott.optimization.optimizers.gradient_descent

import logging

from typing import Dict, Any

import numpy as np

from mumott.core.hashing import list_to_hash
from mumott.optimization.loss_functions.base_loss_function import LossFunction
from .base_optimizer import Optimizer


logger = logging.getLogger(__name__)


[docs]class GradientDescent(Optimizer): r"""This Optimizer is a gradient descent (sometimes called steepest descent) solver, which can be set to terminate based on the loss function and/or the maximum number of iterations. It also supports the use of Nestorov accelerated momentum, which features a look-ahead momentum term based on the gradient of the previous iterations. The update sequence may be written .. math:: x &\leftarrow x - (p + \alpha(\nabla(x - p) + \Lambda)) \\ p &\leftarrow \beta(p + \alpha(\nabla(x - p) + \Lambda)) where :math:`x` are the optimization coefficients, :math:`p` is the momentum, and :math:`\Lambda` is the regularization term. :math:`\alpha` is the step size and :math:`\beta` is the Nestorov momentum weight. Parameters ---------- loss_function : LossFunction The :ref:`loss function <loss_functions>` to be minimized using this algorithm. kwargs : Dict[str, Any] Miscellaneous options. See notes for valid entries. Notes ----- Valid entries in :attr:`kwargs` are x0 Initial guess for solution vector. Must be the same size as :attr:`residual_calculator.coefficients`. Defaults to :attr:`loss_function.initial_values`. step_size : float Step size for the gradient, labelled :math:`\alpha` above. Default value is 1. Must be strictly positive. nestorov_weight : float The size of the look-ahead term in each iteration, labelled :math:`\beta` above. Must be in the range ``[0, 1]``, including the endpoints. The default value is ``0``, which implies that the momentum term is not active. maxiter : int Maximum number of iterations. Default value is ``5``. ftol : float The tolerance for relative change in the loss function before termination. A termination can only be induced after at least 5 iterations. If ``None``, termination only occurs once :attr:`maxiter` iterations have been performed. Default value is ``None``. display_loss : bool If `True`, displays the change in loss at every iteration. Default is `False`. enforce_non_negativity : bool Enforces strict positivity on all the coefficients. Should only be used with local or scalar representations. Default value is ``False``. """ def __init__(self, loss_function: LossFunction, **kwargs: Dict[str, Any]): super().__init__(loss_function, **kwargs)
[docs] def optimize(self) -> Dict: """ Executes the optimization using the options stored in this class instance. The optimization will continue until convergence, or until the maximum number of iterations (:attr:`maxiter`) is exceeded. Returns ------- A ``dict`` of optimization results. See `scipy.optimize.OptimizeResult <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_ for details. The entry ``'x'``, which contains the result, will be reshaped using the shape of the gradient from :attr:`loss_function`. """ opt_kwargs = dict(x0=self._loss_function.initial_values, ftol=None, maxiter=5, enforce_non_negativity=False, display_loss=False, step_size=1., nestorov_weight=0.) for k in opt_kwargs: if k in dict(self): opt_kwargs[k] = self[k] for k in dict(self): if k not in opt_kwargs: logger.warning(f'Unknown option {k}, with value {self[k]}, has been ignored.') if opt_kwargs['x0'] is None: coefficients = self._loss_function.initial_values else: coefficients = opt_kwargs['x0'] if opt_kwargs['ftol'] is None: ftol = -np.inf else: ftol = opt_kwargs['ftol'] if opt_kwargs['step_size'] < 0: raise ValueError(f'step_size must be greater than 0, but its value is {opt_kwargs["step_size"]}!') if not 0 <= opt_kwargs['nestorov_weight'] <= 1: raise ValueError('nestorov_weight must be in the range [0, 1] inclusive,' f' but its value is {opt_kwargs["nestorov_weight"]}!') previous_loss = -1 previous_gradient = np.zeros_like(coefficients) for i in self._tqdm(opt_kwargs['maxiter']): d = self._loss_function.get_loss(coefficients - previous_gradient, get_gradient=True) relative_change = (previous_loss - d['loss']) / d['loss'] if opt_kwargs['display_loss']: logger.info(f'Iteration: {i} Loss function: {d["loss"]:.2e}' f' Relative change: {relative_change:.2e}') d['gradient'] *= opt_kwargs['step_size'] previous_gradient += d['gradient'] coefficients -= previous_gradient previous_loss = d['loss'] previous_gradient *= opt_kwargs['nestorov_weight'] if opt_kwargs['enforce_non_negativity']: np.clip(coefficients, 0, None, out=coefficients) if i > 5 and relative_change < ftol: logger.info(f'Relative change ({relative_change}) is less than ftol ({ftol})!' ' Optimization finished.') break result = dict(x=coefficients, loss=d['loss'], nit=i+1) return dict(result)
def __hash__(self) -> int: to_hash = [self._options, hash(self._loss_function)] return int(list_to_hash(to_hash), 16)