Coverage for local_installation_linux/mumott/optimization/optimizers/gradient_descent.py: 93%
50 statements
« prev ^ index » next coverage.py v7.3.2, created at 2025-05-05 21:21 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2025-05-05 21:21 +0000
1import logging
3from typing import Dict, Any
5import numpy as np
7from mumott.core.hashing import list_to_hash
8from mumott.optimization.loss_functions.base_loss_function import LossFunction
9from .base_optimizer import Optimizer
12logger = logging.getLogger(__name__)
15class GradientDescent(Optimizer):
16 r"""This Optimizer is a gradient descent (sometimes called steepest descent)
17 solver, which can be set to terminate based on the loss function and/or the maximum
18 number of iterations.
20 It also supports the use of Nestorov accelerated momentum, which features a look-ahead
21 momentum term based on the gradient of the previous iterations.
23 The update sequence may be written
25 .. math::
26 x &\leftarrow x - (p + \alpha(\nabla(x - p) + \Lambda)) \\
27 p &\leftarrow \beta(p + \alpha(\nabla(x - p) + \Lambda))
29 where :math:`x` are the optimization coefficients, :math:`p` is the momentum,
30 and :math:`\Lambda` is the regularization term. :math:`\alpha` is the step size
31 and :math:`\beta` is the Nestorov momentum weight.
33 Parameters
34 ----------
35 loss_function : LossFunction
36 The :ref:`loss function <loss_functions>` to be minimized using this algorithm.
37 kwargs : Dict[str, Any]
38 Miscellaneous options. See notes for valid entries.
40 Notes
41 -----
42 Valid entries in :attr:`kwargs` are
43 x0
44 Initial guess for solution vector. Must be the same size as
45 :attr:`residual_calculator.coefficients`. Defaults to :attr:`loss_function.initial_values`.
46 step_size : float
47 Step size for the gradient, labelled :math:`\alpha` above. Default value is 1.
48 Must be strictly positive.
49 nestorov_weight : float
50 The size of the look-ahead term in each iteration, labelled :math:`\beta` above.
51 Must be in the range ``[0, 1]``, including the endpoints.
52 The default value is ``0``, which implies that the momentum term is not active.
53 maxiter : int
54 Maximum number of iterations. Default value is ``5``.
55 ftol : float
56 The tolerance for relative change in the loss function before
57 termination. A termination can only be induced after at least 5 iterations.
58 If ``None``, termination only occurs once :attr:`maxiter` iterations
59 have been performed. Default value is ``None``.
60 display_loss : bool
61 If `True`, displays the change in loss at every iteration. Default is `False`.
62 enforce_non_negativity : bool
63 Enforces strict positivity on all the coefficients. Should only be used
64 with local or scalar representations. Default value is ``False``.
65 """
67 def __init__(self,
68 loss_function: LossFunction,
69 **kwargs: Dict[str, Any]):
70 super().__init__(loss_function, **kwargs)
72 def optimize(self) -> Dict:
73 """ Executes the optimization using the options stored in this class
74 instance. The optimization will continue until convergence,
75 or until the maximum number of iterations (:attr:`maxiter`) is exceeded.
77 Returns
78 -------
79 A ``dict`` of optimization results. See `scipy.optimize.OptimizeResult
80 <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_
81 for details. The entry ``'x'``, which contains the result, will be reshaped using
82 the shape of the gradient from :attr:`loss_function`.
83 """
84 opt_kwargs = dict(x0=self._loss_function.initial_values,
85 ftol=None,
86 maxiter=5,
87 enforce_non_negativity=False,
88 display_loss=False,
89 step_size=1.,
90 nestorov_weight=0.)
92 for k in opt_kwargs:
93 if k in dict(self):
94 opt_kwargs[k] = self[k]
96 for k in dict(self):
97 if k not in opt_kwargs:
98 logger.warning(f'Unknown option {k}, with value {self[k]}, has been ignored.')
100 if opt_kwargs['x0'] is None:
101 coefficients = self._loss_function.initial_values
102 else:
103 coefficients = opt_kwargs['x0']
105 if opt_kwargs['ftol'] is None:
106 ftol = -np.inf
107 else:
108 ftol = opt_kwargs['ftol']
110 if opt_kwargs['step_size'] < 0:
111 raise ValueError(f'step_size must be greater than 0, but its value is {opt_kwargs["step_size"]}!')
113 if not 0 <= opt_kwargs['nestorov_weight'] <= 1:
114 raise ValueError('nestorov_weight must be in the range [0, 1] inclusive,'
115 f' but its value is {opt_kwargs["nestorov_weight"]}!')
117 previous_loss = -1
118 previous_gradient = np.zeros_like(coefficients)
119 for i in self._tqdm(opt_kwargs['maxiter']):
120 d = self._loss_function.get_loss(coefficients - previous_gradient, get_gradient=True)
121 relative_change = (previous_loss - d['loss']) / d['loss']
122 if opt_kwargs['display_loss']: 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true
123 logger.info(f'Iteration: {i} Loss function: {d["loss"]:.2e}'
124 f' Relative change: {relative_change:.2e}')
125 d['gradient'] *= opt_kwargs['step_size']
126 previous_gradient += d['gradient']
127 coefficients -= previous_gradient
128 previous_loss = d['loss']
129 previous_gradient *= opt_kwargs['nestorov_weight']
130 if opt_kwargs['enforce_non_negativity']:
131 np.clip(coefficients, 0, None, out=coefficients)
132 if i > 5 and relative_change < ftol: 132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true
133 logger.info(f'Relative change ({relative_change}) is less than ftol ({ftol})!'
134 ' Optimization finished.')
135 break
137 result = dict(x=coefficients, loss=d['loss'], nit=i+1)
138 return dict(result)
140 def __hash__(self) -> int:
141 to_hash = [self._options, hash(self._loss_function)]
142 return int(list_to_hash(to_hash), 16)