Coverage for local_installation_linux/mumott/optimization/optimizers/gradient_descent.py: 93%

50 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-08-11 23:08 +0000

1import logging 

2 

3from typing import Dict, Any 

4 

5import numpy as np 

6 

7from mumott.core.hashing import list_to_hash 

8from mumott.optimization.loss_functions.base_loss_function import LossFunction 

9from .base_optimizer import Optimizer 

10 

11 

12logger = logging.getLogger(__name__) 

13 

14 

15class GradientDescent(Optimizer): 

16 r"""This Optimizer is a gradient descent (sometimes called steepest descent) 

17 solver, which can be set to terminate based on the loss function and/or the maximum 

18 number of iterations. 

19 

20 It also supports the use of Nestorov accelerated momentum, which features a look-ahead 

21 momentum term based on the gradient of the previous iterations. 

22 

23 The update sequence may be written 

24 

25 .. math:: 

26 x &\leftarrow x - (p + \alpha(\nabla(x - p) + \Lambda)) \\ 

27 p &\leftarrow \beta(p + \alpha(\nabla(x - p) + \Lambda)) 

28 

29 where :math:`x` are the optimization coefficients, :math:`p` is the momentum, 

30 and :math:`\Lambda` is the regularization term. :math:`\alpha` is the step size 

31 and :math:`\beta` is the Nestorov momentum weight. 

32 

33 Parameters 

34 ---------- 

35 loss_function : LossFunction 

36 The :ref:`loss function <loss_functions>` to be minimized using this algorithm. 

37 kwargs : Dict[str, Any] 

38 Miscellaneous options. See notes for valid entries. 

39 

40 Notes 

41 ----- 

42 Valid entries in :attr:`kwargs` are 

43 x0 

44 Initial guess for solution vector. Must be the same size as 

45 :attr:`residual_calculator.coefficients`. Defaults to :attr:`loss_function.initial_values`. 

46 step_size : float 

47 Step size for the gradient, labelled :math:`\alpha` above. Default value is 1. 

48 Must be strictly positive. 

49 nestorov_weight : float 

50 The size of the look-ahead term in each iteration, labelled :math:`\beta` above. 

51 Must be in the range ``[0, 1]``, including the endpoints. 

52 The default value is ``0``, which implies that the momentum term is not active. 

53 maxiter : int 

54 Maximum number of iterations. Default value is ``5``. 

55 ftol : float 

56 The tolerance for relative change in the loss function before 

57 termination. A termination can only be induced after at least 5 iterations. 

58 If ``None``, termination only occurs once :attr:`maxiter` iterations 

59 have been performed. Default value is ``None``. 

60 display_loss : bool 

61 If `True`, displays the change in loss at every iteration. Default is `False`. 

62 enforce_non_negativity : bool 

63 Enforces strict positivity on all the coefficients. Should only be used 

64 with local or scalar representations. Default value is ``False``. 

65 """ 

66 

67 def __init__(self, 

68 loss_function: LossFunction, 

69 **kwargs: Dict[str, Any]): 

70 super().__init__(loss_function, **kwargs) 

71 

72 def optimize(self) -> Dict: 

73 """ Executes the optimization using the options stored in this class 

74 instance. The optimization will continue until convergence, 

75 or until the maximum number of iterations (:attr:`maxiter`) is exceeded. 

76 

77 Returns 

78 ------- 

79 A ``dict`` of optimization results. See `scipy.optimize.OptimizeResult 

80 <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html>`_ 

81 for details. The entry ``'x'``, which contains the result, will be reshaped using 

82 the shape of the gradient from :attr:`loss_function`. 

83 """ 

84 opt_kwargs = dict(x0=self._loss_function.initial_values, 

85 ftol=None, 

86 maxiter=5, 

87 enforce_non_negativity=False, 

88 display_loss=False, 

89 step_size=1., 

90 nestorov_weight=0.) 

91 

92 for k in opt_kwargs: 

93 if k in dict(self): 

94 opt_kwargs[k] = self[k] 

95 

96 for k in dict(self): 

97 if k not in opt_kwargs: 

98 logger.warning(f'Unknown option {k}, with value {self[k]}, has been ignored.') 

99 

100 if opt_kwargs['x0'] is None: 

101 coefficients = self._loss_function.initial_values 

102 else: 

103 coefficients = opt_kwargs['x0'] 

104 

105 if opt_kwargs['ftol'] is None: 

106 ftol = -np.inf 

107 else: 

108 ftol = opt_kwargs['ftol'] 

109 

110 if opt_kwargs['step_size'] < 0: 

111 raise ValueError(f'step_size must be greater than 0, but its value is {opt_kwargs["step_size"]}!') 

112 

113 if not 0 <= opt_kwargs['nestorov_weight'] <= 1: 

114 raise ValueError('nestorov_weight must be in the range [0, 1] inclusive,' 

115 f' but its value is {opt_kwargs["nestorov_weight"]}!') 

116 

117 previous_loss = -1 

118 previous_gradient = np.zeros_like(coefficients) 

119 for i in self._tqdm(opt_kwargs['maxiter']): 

120 d = self._loss_function.get_loss(coefficients - previous_gradient, get_gradient=True) 

121 relative_change = (previous_loss - d['loss']) / d['loss'] 

122 if opt_kwargs['display_loss']: 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true

123 logger.info(f'Iteration: {i} Loss function: {d["loss"]:.2e}' 

124 f' Relative change: {relative_change:.2e}') 

125 d['gradient'] *= opt_kwargs['step_size'] 

126 previous_gradient += d['gradient'] 

127 coefficients -= previous_gradient 

128 previous_loss = d['loss'] 

129 previous_gradient *= opt_kwargs['nestorov_weight'] 

130 if opt_kwargs['enforce_non_negativity']: 

131 np.clip(coefficients, 0, None, out=coefficients) 

132 if i > 5 and relative_change < ftol: 132 ↛ 133line 132 didn't jump to line 133, because the condition on line 132 was never true

133 logger.info(f'Relative change ({relative_change}) is less than ftol ({ftol})!' 

134 ' Optimization finished.') 

135 break 

136 

137 result = dict(x=coefficients, loss=d['loss'], nit=i+1) 

138 return dict(result) 

139 

140 def __hash__(self) -> int: 

141 to_hash = [self._options, hash(self._loss_function)] 

142 return int(list_to_hash(to_hash), 16)