Coverage for local_installation_linux/mumott/optimization/optimizers/gradient

1import logging

3from typing import Dict, Any

5import numpy as np

7from mumott.core.hashing import list_to_hash

8from mumott.optimization.loss_functions.base_loss_function import LossFunction

9from .base_optimizer import Optimizer

12logger = logging.getLogger(__name__)

15class GradientDescent(Optimizer):

16 r"""This Optimizer is a gradient descent (sometimes called steepest descent)

17 solver, which can be set to terminate based on the loss function and/or the maximum

18 number of iterations.

20 It also supports the use of Nestorov accelerated momentum, which features a look-ahead

21 momentum term based on the gradient of the previous iterations.

23 The update sequence may be written

25 .. math::

26 x &\leftarrow x - (p + \alpha(\nabla(x - p) + \Lambda)) \\

27 p &\leftarrow \beta(p + \alpha(\nabla(x - p) + \Lambda))

29 where :math:`x` are the optimization coefficients, :math:`p` is the momentum,

30 and :math:`\Lambda` is the regularization term. :math:`\alpha` is the step size

31 and :math:`\beta` is the Nestorov momentum weight.

33 Parameters

34 ----------

35 loss_function : LossFunction

36 The :ref:`loss function <loss_functions>` to be minimized using this algorithm.

37 kwargs : Dict[str, Any]

38 Miscellaneous options. See notes for valid entries.

40 Notes

41 -----

42 Valid entries in :attr:`kwargs` are

43 x0

44 Initial guess for solution vector. Must be the same size as

45 :attr:`residual_calculator.coefficients`. Defaults to :attr:`loss_function.initial_values`.

46 step_size : float

47 Step size for the gradient, labelled :math:`\alpha` above. Default value is 1.

48 Must be strictly positive.

49 nestorov_weight : float

50 The size of the look-ahead term in each iteration, labelled :math:`\beta` above.

51 Must be in the range ``[0, 1]``, including the endpoints.

52 The default value is ``0``, which implies that the momentum term is not active.

53 maxiter : int

54 Maximum number of iterations. Default value is ``5``.

55 ftol : float

56 The tolerance for relative change in the loss function before

57 termination. A termination can only be induced after at least 5 iterations.

58 If ``None``, termination only occurs once :attr:`maxiter` iterations

59 have been performed. Default value is ``None``.

60 display_loss : bool

61 If `True`, displays the change in loss at every iteration. Default is `False`.

62 enforce_non_negativity : bool

63 Enforces strict positivity on all the coefficients. Should only be used

64 with local or scalar representations. Default value is ``False``.

65 """

67 def __init__(self,

68 loss_function: LossFunction,

69 **kwargs: Dict[str, Any]):

70 super().__init__(loss_function, **kwargs)

72 def optimize(self) -> Dict:

73 """ Executes the optimization using the options stored in this class

74 instance. The optimization will continue until convergence,

75 or until the maximum number of iterations (:attr:`maxiter`) is exceeded.

77 Returns

78 -------

79 A ``dict`` of optimization results. See `scipy.optimize.OptimizeResult

80 <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_

81 for details. The entry ``'x'``, which contains the result, will be reshaped using

82 the shape of the gradient from :attr:`loss_function`.

83 """

84 opt_kwargs = dict(x0=self._loss_function.initial_values,

85 ftol=None,

86 maxiter=5,

87 enforce_non_negativity=False,

88 display_loss=False,

89 step_size=1.,

90 nestorov_weight=0.)

92 for k in opt_kwargs:

93 if k in dict(self):

94 opt_kwargs[k] = self[k]

96 for k in dict(self):

97 if k not in opt_kwargs:

98 logger.warning(f'Unknown option {k}, with value {self[k]}, has been ignored.')

100 if opt_kwargs['x0'] is None:

101 coefficients = self._loss_function.initial_values

102 else:

103 coefficients = opt_kwargs['x0']

104

105 if opt_kwargs['ftol'] is None:

106 ftol = -np.inf

107 else:

108 ftol = opt_kwargs['ftol']

109

110 if opt_kwargs['step_size'] < 0:

111 raise ValueError(f'step_size must be greater than 0, but its value is {opt_kwargs["step_size"]}!')

112

113 if not 0 <= opt_kwargs['nestorov_weight'] <= 1:

114 raise ValueError('nestorov_weight must be in the range [0, 1] inclusive,'

115 f' but its value is {opt_kwargs["nestorov_weight"]}!')

116

117 previous_loss = -1

118 previous_gradient = np.zeros_like(coefficients)

119 for i in self._tqdm(opt_kwargs['maxiter']):

120 d = self._loss_function.get_loss(coefficients - previous_gradient, get_gradient=True)

121 relative_change = (previous_loss - d['loss']) / d['loss']

122 if opt_kwargs['display_loss']: 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true

123 logger.info(f'Iteration: {i} Loss function: {d["loss"]:.2e}'

124 f' Relative change: {relative_change:.2e}')

125 d['gradient'] *= opt_kwargs['step_size']

126 previous_gradient += d['gradient']

127 coefficients -= previous_gradient

128 previous_loss = d['loss']

129 previous_gradient *= opt_kwargs['nestorov_weight']

130 if opt_kwargs['enforce_non_negativity']:

131 np.clip(coefficients, 0, None, out=coefficients)

132 if i > 5 and relative_change < ftol: 132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true

133 logger.info(f'Relative change ({relative_change}) is less than ftol ({ftol})!'

134 ' Optimization finished.')

135 break

136

137 result = dict(x=coefficients, loss=d['loss'], nit=i+1)

138 return dict(result)

139

140 def __hash__(self) -> int:

141 to_hash = [self._options, hash(self._loss_function)]

142 return int(list_to_hash(to_hash), 16)

Coverage for local_installation_linux/mumott/optimization/optimizers/gradient_descent.py: 93%

50 statements