Coverage for local_installation_linux/mumott/pipelines/sparse_pipelines.py: 95%
162 statements
« prev ^ index » next coverage.py v7.3.2, created at 2025-05-05 21:21 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2025-05-05 21:21 +0000
1import sys
2import numpy as np
3import tqdm
4from numba import cuda
6from mumott.core.john_transform_sparse_cuda import (john_transform_sparse_cuda,
7 john_transform_adjoint_sparse_cuda)
8from mumott.core.cuda_kernels import (cuda_weighted_difference, cuda_scaled_difference,
9 cuda_sum, cuda_rescale, cuda_difference,
10 cuda_rescale_array,
11 cuda_l1_gradient, cuda_tv_gradient, cuda_weighted_sign,
12 cuda_lower_bound)
13from mumott.data_handling import DataContainer
14from mumott.methods.basis_sets import GaussianKernels
15from mumott.methods.basis_sets.base_basis_set import BasisSet
16from mumott.methods.projectors import SAXSProjectorCUDA
17from mumott.methods.utilities import get_tensor_sirt_weights, get_tensor_sirt_preconditioner
18from mumott.optimization.regularizers import L1Norm, TotalVariation
21def run_stsirt(data_container: DataContainer,
22 maxiter: int = 10,
23 sparsity_count: int = 3,
24 basis_set: BasisSet = None,
25 update_frequency: int = 5):
26 """A sparse basis implementation of the tensor SIRT algorithm (Sparse Tensor SIRT).
27 This approach uses only
28 asynchronous, in-place operations on the GPU, and is therefore much faster
29 than standard pipelines, as well as more memory-efficient.
30 In exchange, it is less modular and requires a relatively sparse basis set.
32 Parameters
33 ----------
34 data_container
35 The data.
36 maxiter
37 Maximum number of iterations.
38 sparsity_count
39 The maximum number of non-zero matrix elements per detector segment,
40 if using the default ``BasisSet``. Not used if a custom ``BasisSet`` is provided.
41 basis_set
42 A custom ``BasisSet`` instance. By default, a ``GaussianKernels`` with
43 ``enforce_sparsity=True`` and ``sparsity_count=sparsity_count`` is used.
44 update_frequency
45 Synchronization and norm reduction progress printing frequency. If set too small,
46 the optimization will be slower due to frequent host-device synchronization. This effect
47 can be seen by noting the iterations per second on the progress bar. The printed
48 norm does not account for the total variation regularization.
50 Returns
51 -------
52 Dictionary with ``reconstruction``, ``projector``, ``basis_set`` entries.
53 """
54 # Create projector for simple fetching of parameters
55 projector = SAXSProjectorCUDA(data_container.geometry)
56 if basis_set is None:
57 grid_scale = data_container.data.shape[-1] // 2 + 1
58 basis_set = GaussianKernels(grid_scale=grid_scale,
59 probed_coordinates=data_container.geometry.probed_coordinates,
60 enforce_sparsity=True,
61 sparsity_count=sparsity_count)
62 # Get weights, etc
63 weights = get_tensor_sirt_weights(projector=projector,
64 basis_set=basis_set)
65 weights[data_container.projections.weights <= 0.] = 0.
66 weights = cuda.to_device(weights.astype(np.float32))
67 preconditioner = cuda.to_device(get_tensor_sirt_preconditioner(
68 projector=projector, basis_set=basis_set).astype(np.float32))
69 sparse_matrix = basis_set.csr_representation
70 # Allocate matricess, compile kernels
71 reconstruction = cuda.to_device(
72 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))
73 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))
74 forward = john_transform_sparse_cuda(reconstruction, projections,
75 sparse_matrix, *projector.john_transform_parameters)
76 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections,
77 sparse_matrix, *projector.john_transform_parameters)
78 weighted_difference = cuda_weighted_difference(projections.shape)
79 scaled_difference = cuda_scaled_difference(reconstruction.shape)
80 data = cuda.to_device(data_container.data.astype(np.float32))
81 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
82 # Run asynchronous reconstruction
83 pbar = tqdm.trange(maxiter, file=sys.stdout)
84 pbar.update(0)
85 for i in range(maxiter):
86 # compute residual gradient
87 weighted_difference(data, projections, weights)
88 # compute gradient with respect to john transform
89 adjoint(gradient, projections)
90 # update reconstruction
91 scaled_difference(gradient, reconstruction, preconditioner)
92 # forward john transform
93 forward(reconstruction, projections)
94 # update user on progress
95 if (i + 1) % update_frequency == 0: 95 ↛ 85line 95 didn't jump to line 85, because the condition on line 95 was never false
96 cuda.synchronize()
97 pbar.update(update_frequency)
98 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,
99 projection=np.array(projections))
102def run_smotr(data_container: DataContainer,
103 maxiter: int = 10,
104 sparsity_count: int = 3,
105 momentum: float = 0.9,
106 l1_weight: float = 1e-3,
107 tv_weight: float = 1e-3,
108 basis_set: BasisSet = None,
109 update_frequency: int = 5,
110 lower_bound: float = None,
111 step_size: float = 1.):
112 """ SMOTR (Sparse MOmentum Total variation Reconstruction) pipeline
113 that uses asynchronous GPU (device) computations only
114 during the reconstruction. This leads to a large speedup compared to standard pipelines
115 which synchronize with the CPU several times per iteration. However, this implementation
116 is less modular than standard pipelines.
118 This pipeline uses Nestorov Momentum in combination with total variation and L1 regularization,
119 as well as the Tensor SIRT preconditioner-weight pair.
121 This is also a highly efficient implementation with respect to device memory, as all arithmetic
122 operations are done in-place.
124 Parameters
125 ----------
126 data_container
127 The container for the data.
128 maxiter
129 Maximum number of iterations.
130 sparsity_count
131 The maximum number of non-zero matrix elements per detector segment.
132 momentum
133 Momentum for the Nestorov gradient
134 l1_weight
135 Weight for L1 regularization.
136 tv_weight
137 Weight for total variation regularization.
138 basis_set
139 User-provided basis set to be used, if desired.
140 By default, a ``GaussianKernels`` basis set is used.
141 update_frequency
142 Synchronization and norm reduction progress printing frequency. If set too small,
143 the optimization will be slower due to frequent host-device synchronization. This effect
144 can be seen by noting the iterations per second on the progress bar. The printed
145 norm does not account for the total variation regularization.
146 lower_bound
147 Lower bound to threshold coefficients at in each iteration. Can be used to
148 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.
149 Not used if set to ``None``, which is the default settingz.
150 step_size
151 Step size for each iteration in the reconstruction. Default value is ``1.`` which
152 should be a suitable value in the vast majority of cases, due to the normalizing
153 effect of the weight-preconditioner pair used.
155 Returns
156 -------
157 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.
158 ``loss_curve`` is an array with ``maxiter // update_frequency`` rows where the entries
159 in each column are ``iteration, loss, residual_norm, tv_norm``.
160 """
161 projector = SAXSProjectorCUDA(data_container.geometry)
162 if basis_set is None:
163 grid_scale = data_container.data.shape[-1] // 2 + 1
164 basis_set = GaussianKernels(grid_scale=grid_scale,
165 probed_coordinates=data_container.geometry.probed_coordinates,
166 enforce_sparsity=True,
167 sparsity_count=sparsity_count)
168 weights = get_tensor_sirt_weights(projector=projector,
169 basis_set=basis_set)
170 weights[data_container.projections.weights <= 0.] = 0.
171 host_weights = weights.astype(np.float32)
172 weights = cuda.to_device(host_weights)
173 step_size = np.float32(step_size)
174 preconditioner = cuda.to_device(
175 step_size * get_tensor_sirt_preconditioner(
176 projector=projector, basis_set=basis_set).astype(np.float32))
177 sparse_matrix = basis_set.csr_representation
178 reconstruction = cuda.to_device(
179 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))
180 # Compile all CUDA kernels:
181 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))
182 forward = john_transform_sparse_cuda(reconstruction, projections,
183 sparse_matrix, *projector.john_transform_parameters)
184 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections,
185 sparse_matrix, *projector.john_transform_parameters)
186 weighted_difference = cuda_weighted_difference(projections.shape)
187 difference = cuda_difference(reconstruction.shape)
188 sum_kernel = cuda_sum(reconstruction.shape)
189 l1_gradient = cuda_l1_gradient(reconstruction.shape, l1_weight)
190 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)
191 scale_array = cuda_rescale_array(reconstruction.shape)
192 rescale = cuda_rescale(reconstruction.shape, momentum)
193 # Allocate remaining CUDA arrays
194 data = cuda.to_device(data_container.data.astype(np.float32))
195 host_data = data_container.data.astype(np.float32)
196 host_projections = np.array(projections)
197 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
198 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
199 diff = host_projections - host_data
200 lf = (host_weights * diff * diff).sum()
201 pbar = tqdm.trange(maxiter, file=sys.stdout)
202 pbar.set_description(f'Loss: {lf:.2e}')
203 pbar.update(0)
204 loss_curve = []
205 host_l1 = L1Norm()
206 host_tv = TotalVariation(None)
207 if lower_bound is not None: 207 ↛ 208line 207 didn't jump to line 208, because the condition on line 207 was never true
208 lower_kernel = cuda_lower_bound(reconstruction.shape, lower_bound)
209 # Actual reconstruction. This part executes asynchronously.
210 for i in range(maxiter):
211 # compute residual gradient
212 weighted_difference(data, projections, weights)
213 # compute gradient with respect to john transform
214 adjoint(gradient, projections)
215 # apply preconditioner
216 scale_array(gradient, preconditioner)
217 # apply regularization
218 l1_gradient(reconstruction, gradient)
219 tv_gradient(reconstruction, gradient)
220 # apply gradient (correction term)
221 difference(reconstruction, gradient)
222 # add to accumulated gradient
223 sum_kernel(total_gradient, gradient)
224 # scale by momentum coefficient
225 rescale(total_gradient)
226 # apply gradient (momentum term)
227 difference(reconstruction, total_gradient)
228 # threshold
229 if lower_bound is not None: 229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true
230 lower_kernel(reconstruction)
231 # forward john transform
232 forward(reconstruction, projections)
233 # compute host-side quantities to update user on progress.
234 if (i + 1) % update_frequency == 0: 234 ↛ 210line 234 didn't jump to line 210, because the condition on line 234 was never false
235 # forces synchronization
236 host_projections = np.array(projections)
237 host_recon = np.array(reconstruction)
238 rg1 = host_l1.get_regularization_norm(host_recon)['regularization_norm']
239 rg2 = host_tv.get_regularization_norm(host_recon)['regularization_norm']
240 diff = host_projections - host_data
241 rn = (host_weights * diff * diff).sum()
242 lf = rg1 * l1_weight + rg2 * tv_weight + rn
243 loss_curve.append((i, lf, rn, rg1, rg2))
244 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} L1 norm: {rg1:.2e} TV norm: {rg2:.2e}')
245 pbar.update(update_frequency)
246 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,
247 projection=np.array(projections), loss_curve=np.array(loss_curve))
250def run_spradtt(data_container: DataContainer,
251 maxiter: int = 10,
252 sparsity_count: int = 3,
253 momentum: float = 0.9,
254 step_size: float = 1.,
255 delta: float = 1.,
256 tv_weight: float = 1e-3,
257 basis_set: BasisSet = None,
258 lower_bound: float = 0.,
259 update_frequency: int = 5):
260 """ SPRADTT (SParse Robust And Denoised Tensor Tomography) pipeline
261 that uses asynchronous GPU (device) computations only
262 during the reconstruction. This leads to a large speedup compared to standard pipelines
263 which synchronize with the CPU several times per iteration. However, this is only suitable
264 for sparse representations and therefore this implementation
265 is less modular than standard pipelines.
267 This pipeline uses Nestorov accelerated gradient descent, with a Huber loss function
268 and total variation regularization. This reconstruction approach requires a large number
269 of iterations, but is robust to outliers in the data.
271 This is also a highly efficient implementation with respect to device memory, as all arithmetic
272 operations are done in-place.
274 Parameters
275 ----------
276 data_container
277 The container for the data.
278 maxiter
279 Maximum number of iterations.
280 sparsity_count
281 The maximum number of non-zero matrix elements per detector segment.
282 momentum
283 Momentum for the Nestorov gradient. Should be in the range ``[0., 1.]``.
284 step_size
285 Step size for L1 optimization. Step size for
286 L2 part of optimization is ``step_size / (2 * delta)``. A good choice
287 for the step size is typically around the same order of magnitude
288 as each coefficient of the reconstruction.
289 delta
290 Threshold for transition to L2 optimization. Should be small relative to data.
291 Does not affect total variation.
292 tv_weight
293 Weight for total variation regularization.
294 basis_set
295 User-provided basis set to be used, if desired.
296 By default, a ``GaussianKernels`` basis set is used.
297 lower_bound
298 Lower bound to threshold coefficients at in each iteration. Can be used to
299 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.
300 Not used if set to ``None``, which is the default settingz.
301 update_frequency
302 Synchronization and norm reduction progress printing frequency. If set too small,
303 the optimization will be slower due to frequent host-device synchronization. This effect
304 can be seen by noting the iterations per second on the progress bar. The printed
305 norm does not account for the total variation regularization.
307 Returns
308 -------
309 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.
310 """
311 projector = SAXSProjectorCUDA(data_container.geometry)
312 if basis_set is None: 312 ↛ 318line 312 didn't jump to line 318, because the condition on line 312 was never false
313 grid_scale = data_container.data.shape[-1] // 2 + 1
314 basis_set = GaussianKernels(grid_scale=grid_scale,
315 probed_coordinates=data_container.geometry.probed_coordinates,
316 enforce_sparsity=True,
317 sparsity_count=sparsity_count)
318 weights = get_tensor_sirt_weights(projector=projector,
319 basis_set=basis_set)
320 weights[data_container.projections.weights <= 0.] = 0.
321 host_weights = weights.astype(np.float32)
322 weights = cuda.to_device(weights.astype(np.float32))
323 preconditioner = cuda.to_device(
324 (step_size) * get_tensor_sirt_preconditioner(
325 projector=projector, basis_set=basis_set).astype(np.float32))
326 sparse_matrix = basis_set.csr_representation
327 reconstruction = cuda.to_device(
328 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))
329 # Compile all CUDA kernels
330 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))
331 forward = john_transform_sparse_cuda(reconstruction, projections,
332 sparse_matrix, *projector.john_transform_parameters)
333 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections,
334 sparse_matrix, *projector.john_transform_parameters)
335 weighted_sign = cuda_weighted_sign(projections.shape, delta)
336 difference = cuda_difference(reconstruction.shape)
337 sum_kernel = cuda_sum(reconstruction.shape)
338 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)
339 scale_array = cuda_rescale_array(reconstruction.shape)
340 rescale = cuda_rescale(reconstruction.shape, momentum)
341 if lower_bound is not None: 341 ↛ 344line 341 didn't jump to line 344, because the condition on line 341 was never false
342 threshold_lower = cuda_lower_bound(reconstruction.shape, lower_bound)
343 # Allocate remaining CUDA arrays
344 host_data = data_container.data.astype(np.float32)
345 data = cuda.to_device(data_container.data.astype(np.float32))
346 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
347 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))
348 # Create necessary host-side objects for output
349 pbar = tqdm.trange(maxiter, file=sys.stdout)
350 host_projections = np.array(projections)
351 lf = (host_weights * abs(host_projections - host_data)).sum()
352 rg = 0.
353 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {lf:.2e} TV norm: {rg:.2e}')
354 pbar.update(0)
355 loss_curve = []
356 host_tv = TotalVariation(None)
357 # Actual reconstruction. This part executes asynchronously.
358 for i in range(maxiter):
359 # residual norm gradient
360 weighted_sign(data, projections, weights)
361 # john transform gradient
362 adjoint(gradient, projections)
363 # apply preconditioner
364 scale_array(gradient, preconditioner)
365 # apply total variation regularization
366 tv_gradient(reconstruction, gradient)
367 # update reconstruction by gradient (correction term)
368 difference(reconstruction, gradient)
369 # compute updated momentum term
370 sum_kernel(total_gradient, gradient)
371 # apply momentum decay
372 rescale(total_gradient)
373 # update reconstruction by gradient (momentum term)
374 difference(reconstruction, total_gradient)
375 # threshold by lower bound
376 if lower_bound is not None: 376 ↛ 379line 376 didn't jump to line 379, because the condition on line 376 was never false
377 threshold_lower(reconstruction)
378 # forward john transform
379 forward(reconstruction, projections)
380 # compute host-side quantities to update user on progress
381 if (i + 1) % update_frequency == 0: 381 ↛ 358line 381 didn't jump to line 358, because the condition on line 381 was never false
382 # forces synchronization
383 host_projections = np.array(projections)
384 host_recon = np.array(reconstruction)
385 rg = host_tv.get_regularization_norm(host_recon)['regularization_norm']
386 diff = host_projections - host_data
387 rn = (host_weights * diff * diff).sum()
388 lf = rg * tv_weight + rn
389 loss_curve.append((i, lf, rn, rg))
390 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} TV norm: {rg:.2e}')
391 pbar.update(update_frequency)
392 return dict(reconstruction=np.array(reconstruction), projector=projector,
393 basis_set=basis_set, projection=np.array(projections), loss_curve=np.array(loss_curve))