Coverage for local_installation_linux/mumott/pipelines/async_pipelines.py: 98%
182 statements
« prev ^ index » next coverage.py v7.3.2, created at 2025-05-05 21:21 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2025-05-05 21:21 +0000
1import sys
2import numpy as np
3import tqdm
4from numba import cuda
6from mumott.core.john_transform_cuda import (john_transform_cuda,
7 john_transform_adjoint_cuda)
8from mumott.core.cuda_kernels import (cuda_weighted_difference, cuda_scaled_difference,
9 cuda_sum, cuda_rescale, cuda_difference,
10 cuda_rescale_array, cuda_framewise_contraction,
11 cuda_framewise_contraction_adjoint,
12 cuda_l1_gradient, cuda_tv_gradient, cuda_weighted_sign,
13 cuda_lower_bound)
14from mumott.data_handling import DataContainer
15from mumott.methods.basis_sets import GaussianKernels
16from mumott.methods.basis_sets.base_basis_set import BasisSet
17from mumott.methods.projectors import SAXSProjectorCUDA
18from mumott.methods.utilities import get_tensor_sirt_weights, get_tensor_sirt_preconditioner
19from mumott.optimization.regularizers import L1Norm, TotalVariation
22def run_tensor_sirt(data_container: DataContainer,
23 maxiter: int = 10,
24 basis_set: BasisSet = None,
25 update_frequency: int = 5):
26 """An asynchronous implementation of the tensor SIRT algorithm.
27 This approach uses only asynchronous, in-place operations on the GPU,
28 and is therefore much faster than standard pipelines, as well as more memory-efficient.
30 Parameters
31 ----------
32 data_container
33 The data.
34 maxiter
35 Maximum number of iterations.
36 basis_set
37 User-provided basis set to be used, if desired.
38 By default, a ``GaussianKernels`` basis set is used.
39 update_frequency
40 Synchronization and norm reduction progress printing frequency. If set too small,
41 the optimization will be slower due to frequent host-device synchronization. This effect
42 can be seen by noting the iterations per second on the progress bar. The printed
43 norm does not account for the total variation regularization.
45 Returns
46 -------
47 Dictionary with ``reconstruction``, ``projector``, ``basis_set`` entries.
48 """
49 # Create projector for simple fetching of parameters
50 projector = SAXSProjectorCUDA(data_container.geometry)
51 if basis_set is None:
52 grid_scale = data_container.data.shape[-1] // 2 + 1
53 basis_set = GaussianKernels(grid_scale=grid_scale,
54 probed_coordinates=data_container.geometry.probed_coordinates)
55 # Get weights, etc
56 weights = get_tensor_sirt_weights(projector=projector,
57 basis_set=basis_set)
58 weights[data_container.projections.weights <= 0.] = 0.
59 weights = cuda.to_device(weights.astype(np.float32))
60 preconditioner = cuda.to_device(get_tensor_sirt_preconditioner(
61 projector=projector, basis_set=basis_set).astype(np.float32))
62 matrix = basis_set.projection_matrix.astype(np.float32)
63 # Allocate matricess, compile kernels
64 data = cuda.to_device(data_container.data.astype(np.float32))
65 reconstruction = cuda.to_device(
66 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))
67 projections = cuda.to_device(
68 np.zeros(data_container.data.shape[:-1] + (len(basis_set),),
69 dtype=np.float32))
70 data_approximation = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))
71 forward = john_transform_cuda(reconstruction, projections, *projector.john_transform_parameters)
72 adjoint = john_transform_adjoint_cuda(reconstruction, projections, *projector.john_transform_parameters)
73 contraction = cuda_framewise_contraction(data_container.data.shape[:-1], *matrix.shape[1:])
74 contraction_adjoint = cuda_framewise_contraction_adjoint(
75 data_container.data.shape[:-1],
76 *matrix.shape[1:])
77 weighted_difference = cuda_weighted_difference(data_approximation.shape)
78 scaled_difference = cuda_scaled_difference(reconstruction.shape)
79 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
80 matrix = cuda.to_device(matrix)
81 # create necessary host-side objects
82 pbar = tqdm.trange(maxiter, file=sys.stdout)
83 pbar.update(0)
84 # Run asynchronous reconstruction
85 for i in range(maxiter):
86 # compute residual gradient
87 weighted_difference(data, data_approximation, weights)
88 # compute gradient with respect to tensor representation
89 contraction_adjoint(data_approximation, matrix, projections)
90 # compute gradient with respect to john transform
91 adjoint(gradient, projections)
92 # update reconstruction
93 scaled_difference(gradient, reconstruction, preconditioner)
94 # forward john transform
95 forward(reconstruction, projections)
96 # forward tensor representation
97 contraction(projections, matrix, data_approximation)
98 # update user on progress.
99 if (i + 1) % update_frequency == 0:
100 cuda.synchronize()
101 pbar.update(update_frequency)
102 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,
103 projection=np.array(data_approximation))
106def run_motr(data_container: DataContainer,
107 maxiter: int = 10,
108 momentum: float = 0.9,
109 l1_weight: float = 1e-3,
110 tv_weight: float = 1e-3,
111 basis_set: BasisSet = None,
112 lower_bound: float = None,
113 step_size: float = 1.,
114 update_frequency: int = 5):
115 """ MOTR (MOmentum Total variation Reconstruction) pipeline
116 that uses asynchronous GPU (device) computations only
117 during the reconstruction. This leads to a large speedup compared to standard pipelines
118 which synchronize with the CPU several times per iteration. However, this implementation
119 is less modular than standard pipelines.
121 This pipeline uses Nestorov Momentum in combination with total variation and L1 regularization,
122 as well as the Tensor SIRT preconditioner-weight pair, which normalize the gradient based on
123 the projection geometry and basis set.
125 This is also a relatively efficient implementation with respect to device memory, as all arithmetic
126 operations are done in-place.
128 Parameters
129 ----------
130 data_container
131 The container for the data.
132 maxiter
133 Maximum number of iterations.
134 momentum
135 Momentum for the Nestorov gradient
136 l1_weight
137 Weight for L1 regularization.
138 tv_weight
139 Weight for total variation regularization.
140 basis_set
141 User-provided basis set to be used, if desired.
142 By default, a ``GaussianKernels`` basis set is used.
143 lower_bound
144 Lower bound to threshold coefficients at in each iteration. Can be used to
145 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.
146 Not used if set to ``None``, which is the default settingz.
147 step_size
148 Step size for each iteration in the reconstruction. Default value is ``1.`` which
149 should be a suitable value in the vast majority of cases, due to the normalizing
150 effect of the weight-preconditioner pair used.
151 update_frequency
152 Synchronization and norm reduction progress printing frequency. If set too small,
153 the optimization will be slower due to frequent host-device synchronization. This effect
154 can be seen by noting the iterations per second on the progress bar. The printed
155 norm does not account for the total variation regularization.
157 Returns
158 -------
159 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.
160 ``loss_curve`` is an array with ``maxiter // update_frequency`` rows where the entries
161 in each column are ``iteration, loss, residual_norm, tv_norm``.
162 """
163 projector = SAXSProjectorCUDA(data_container.geometry)
164 if basis_set is None:
165 grid_scale = data_container.data.shape[-1] // 2 + 1
166 basis_set = GaussianKernels(grid_scale=grid_scale,
167 probed_coordinates=data_container.geometry.probed_coordinates)
168 weights = get_tensor_sirt_weights(projector=projector,
169 basis_set=basis_set)
170 weights[data_container.projections.weights <= 0.] = 0.
171 host_weights = weights.astype(np.float32)
172 weights = cuda.to_device(host_weights)
173 step_size = np.float32(step_size)
174 preconditioner = cuda.to_device(
175 step_size * get_tensor_sirt_preconditioner(projector=projector,
176 basis_set=basis_set).astype(np.float32))
177 matrix = basis_set.projection_matrix.astype(np.float32)
178 reconstruction = cuda.to_device(
179 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))
180 # Compile all CUDA kernels:
181 data = cuda.to_device(data_container.data.astype(np.float32))
182 projections = cuda.to_device(
183 np.zeros(data_container.data.shape[:-1] + (len(basis_set),),
184 dtype=np.float32))
185 data_approximation = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))
186 forward = john_transform_cuda(reconstruction, projections, *projector.john_transform_parameters)
187 adjoint = john_transform_adjoint_cuda(reconstruction, projections, *projector.john_transform_parameters)
188 contraction = cuda_framewise_contraction(data_container.data.shape[:-1],
189 *matrix.shape[1:])
190 contraction_adjoint = cuda_framewise_contraction_adjoint(data_container.data.shape[:-1],
191 *matrix.shape[1:])
192 weighted_difference = cuda_weighted_difference(data_approximation.shape)
193 difference = cuda_difference(reconstruction.shape)
194 sum_kernel = cuda_sum(reconstruction.shape)
195 l1_gradient = cuda_l1_gradient(reconstruction.shape, l1_weight)
196 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)
197 scale_array = cuda_rescale_array(reconstruction.shape)
198 rescale = cuda_rescale(reconstruction.shape, momentum)
199 if lower_bound is not None: 199 ↛ 202line 199 didn't jump to line 202, because the condition on line 199 was never false
200 threshold_lower = cuda_lower_bound(reconstruction.shape, lower_bound)
201 # Allocate remaining CUDA arrays
202 matrix = cuda.to_device(matrix)
203 data = cuda.to_device(data_container.data.astype(np.float32))
204 host_data = data_container.data.astype(np.float32)
205 host_projections = np.array(data_approximation)
206 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
207 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
208 # create necessary host side objects
209 diff = host_projections - host_data
210 lf = (host_weights * diff * diff).sum()
211 pbar = tqdm.trange(maxiter, file=sys.stdout)
212 pbar.set_description(f'Loss: {lf:.2e}')
213 pbar.update(0)
214 loss_curve = []
215 host_l1 = L1Norm()
216 host_tv = TotalVariation(None)
217 # Actual reconstruction. This part executes asynchronously.
218 for i in range(maxiter):
219 # compute residual gradient
220 weighted_difference(data, data_approximation, weights)
221 # compute gradient with respect to tensor representation
222 contraction_adjoint(data_approximation, matrix, projections)
223 # compute gradient with respect to john transform
224 adjoint(gradient, projections)
225 # apply preconditioner
226 scale_array(gradient, preconditioner)
227 # apply regularization
228 l1_gradient(reconstruction, gradient)
229 tv_gradient(reconstruction, gradient)
230 # apply gradient (correction term)
231 difference(reconstruction, gradient)
232 # add to accumulated gradient
233 sum_kernel(total_gradient, gradient)
234 # scale by momentum coefficient
235 rescale(total_gradient)
236 # apply gradient (momentum term)
237 difference(reconstruction, total_gradient)
238 # threshold
239 if lower_bound is not None: 239 ↛ 242line 239 didn't jump to line 242, because the condition on line 239 was never false
240 threshold_lower(reconstruction)
241 # forward john transform
242 forward(reconstruction, projections)
243 # forward tensor representation
244 contraction(projections, matrix, data_approximation)
245 # compute host-side quantities to update user on progress.
246 if (i + 1) % update_frequency == 0:
247 # forces synchronization
248 host_projections = np.array(data_approximation)
249 host_recon = np.array(reconstruction)
250 rg1 = host_l1.get_regularization_norm(host_recon)['regularization_norm']
251 rg2 = host_tv.get_regularization_norm(host_recon)['regularization_norm']
252 diff = host_projections - host_data
253 rn = (host_weights * diff * diff).sum()
254 lf = rg1 * l1_weight + rg2 * tv_weight + rn
255 loss_curve.append((i, lf, rn, rg1, rg2))
256 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} L1 norm: {rg1:.2e} TV norm: {rg2:.2e}')
257 pbar.update(update_frequency)
258 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,
259 projection=np.array(data_approximation), loss_curve=np.array(loss_curve))
262def run_radtt(data_container: DataContainer,
263 maxiter: int = 10,
264 momentum: float = 0.9,
265 step_size: float = 1.,
266 delta: float = 1.,
267 tv_weight: float = 1e-3,
268 basis_set: BasisSet = None,
269 lower_bound: float = 0.,
270 update_frequency: int = 5):
271 """ RADTT (Robust And Denoised Tensor Tomography) pipeline
272 that uses asynchronous GPU (device) computations only
273 during the reconstruction. This leads to a large speedup compared to standard pipelines
274 which synchronize with the CPU several times per iteration. However, this implementation
275 is less modular than standard pipelines.
277 This pipeline uses Nestorov accelerated gradient descent, with a Huber loss function
278 and total variation regularization. This reconstruction approach requires a large number
279 of iterations, but is robust to outliers in the data.
281 This is also a relatively efficient implementation with respect to device memory, as all arithmetic
282 operations are done in-place.
284 Parameters
285 ----------
286 data_container
287 The container for the data.
288 maxiter
289 Maximum number of iterations.
290 momentum
291 Momentum for the Nestorov gradient
292 step_size
293 Step size for L1 optimization. Step size for
294 L2 part of optimization is ``step_size / (2 * delta)``. A good choice
295 for the step size is typically around the same order of magnitude
296 as each coefficient of the reconstruction.
297 delta
298 Threshold for transition to L2 optimization. Should be small relative to data.
299 Does not affect total variation.
300 tv_weight
301 Weight for total variation regularization.
302 basis_set
303 User-provided basis set to be used, if desired.
304 By default, a ``GaussianKernels`` basis set is used.
305 lower_bound
306 Lower bound to threshold coefficients at in each iteration. Can be used to
307 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.
308 Not used if set to ``None``, which is the default settings.
309 update_frequency
310 Synchronization and norm reduction progress printing frequency. If set too small,
311 the optimization will be slower due to frequent host-device synchronization. This effect
312 can be seen by noting the iterations per second on the progress bar. The printed
313 norm does not account for the total variation regularization.
315 Returns
316 -------
317 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.
318 """
319 projector = SAXSProjectorCUDA(data_container.geometry)
320 if basis_set is None: 320 ↛ 324line 320 didn't jump to line 324, because the condition on line 320 was never false
321 grid_scale = data_container.data.shape[-1] // 2 + 1
322 basis_set = GaussianKernels(grid_scale=grid_scale,
323 probed_coordinates=data_container.geometry.probed_coordinates)
324 weights = get_tensor_sirt_weights(projector=projector,
325 basis_set=basis_set)
326 weights[data_container.projections.weights <= 0.] = 0.
327 host_weights = weights.astype(np.float32)
328 weights = cuda.to_device(weights.astype(np.float32))
329 step_size = np.float32(step_size)
330 preconditioner = cuda.to_device(
331 step_size * get_tensor_sirt_preconditioner(
332 projector=projector, basis_set=basis_set).astype(np.float32))
333 matrix = basis_set.projection_matrix.astype(np.float32)
334 reconstruction = cuda.to_device(
335 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))
336 # Compile all CUDA kernels
337 data = cuda.to_device(data_container.data.astype(np.float32))
338 projections = cuda.to_device(
339 np.zeros(data_container.data.shape[:-1] + (len(basis_set),), dtype=np.float32))
340 data_approximation = cuda.to_device(np.zeros(data_container.data.shape, dtype=np.float32))
341 forward = john_transform_cuda(reconstruction, projections, *projector.john_transform_parameters)
342 adjoint = john_transform_adjoint_cuda(reconstruction, projections, *projector.john_transform_parameters)
343 contraction = cuda_framewise_contraction(data.shape[:-1], *matrix.shape[1:])
344 contraction_adjoint = cuda_framewise_contraction_adjoint(data.shape[:-1], *matrix.shape[1:])
345 weighted_sign = cuda_weighted_sign(data_approximation.shape, delta)
346 difference = cuda_difference(reconstruction.shape)
347 sum_kernel = cuda_sum(reconstruction.shape)
348 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)
349 scale_array = cuda_rescale_array(reconstruction.shape)
350 rescale = cuda_rescale(reconstruction.shape, momentum)
351 # Allocate remaining CUDA arrays
352 matrix = cuda.to_device(matrix)
353 host_data = data_container.data.astype(np.float32)
354 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
355 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
356 pbar = tqdm.trange(maxiter, file=sys.stdout)
357 host_projections = np.array(data_approximation)
358 lf = (host_weights * abs(host_projections - host_data)).sum()
359 rg = 0.
360 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {lf:.2e} TV norm: {rg:.2e}')
361 pbar.update(0)
362 if lower_bound is not None: 362 ↛ 364line 362 didn't jump to line 364, because the condition on line 362 was never false
363 threshold_lower = cuda_lower_bound(reconstruction.shape, lower_bound)
364 loss_curve = []
365 host_tv = TotalVariation(None)
366 # Actual reconstruction. This part executes asynchronously.
367 for i in range(maxiter):
368 # residual norm gradient
369 weighted_sign(data, data_approximation, weights)
370 # tensor representation gradient
371 contraction_adjoint(data_approximation, matrix, projections)
372 # john transform gradient
373 adjoint(gradient, projections)
374 # apply preconditioner
375 scale_array(gradient, preconditioner)
376 # apply total variation regularization
377 tv_gradient(reconstruction, gradient)
378 # update reconstruction by gradient (correction term)
379 difference(reconstruction, gradient)
380 # compute updated momentum term
381 sum_kernel(total_gradient, gradient)
382 # apply momentum decay
383 rescale(total_gradient)
384 # update reconstruction by gradient (momentum term)
385 difference(reconstruction, total_gradient)
386 # threshold by lower bound
387 if lower_bound is not None: 387 ↛ 390line 387 didn't jump to line 390, because the condition on line 387 was never false
388 threshold_lower(reconstruction)
389 # forward john transform
390 forward(reconstruction, projections)
391 # forward tensor projection
392 contraction(projections, matrix, data_approximation)
393 # compute host-side quantities to update user on progress
394 if (i + 1) % update_frequency == 0:
395 # forces synchronization
396 host_projections = np.array(data_approximation)
397 host_recon = np.array(reconstruction)
398 rg = host_tv.get_regularization_norm(host_recon)['regularization_norm']
399 diff = host_projections - host_data
400 rn = (host_weights * diff * diff).sum()
401 lf = rg * tv_weight + rn
402 loss_curve.append((i, lf, rn, rg))
403 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} TV norm: {rg:.2e}')
404 pbar.update(update_frequency)
405 # Synchronizes due to np.array(reconstruction)
406 return dict(reconstruction=np.array(reconstruction), projector=projector,
407 basis_set=basis_set, projection=np.array(data_approximation), loss_curve=np.array(loss_curve))