Coverage for local_installation_linux/mumott/pipelines/sparse

1import sys

2import numpy as np

3import tqdm

4from numba import cuda

6from mumott.core.john_transform_sparse_cuda import (john_transform_sparse_cuda,

7 john_transform_adjoint_sparse_cuda)

8from mumott.core.cuda_kernels import (cuda_weighted_difference, cuda_scaled_difference,

9 cuda_sum, cuda_rescale, cuda_difference,

10 cuda_rescale_array,

11 cuda_l1_gradient, cuda_tv_gradient, cuda_weighted_sign,

12 cuda_lower_bound)

13from mumott.data_handling import DataContainer

14from mumott.methods.basis_sets import GaussianKernels

15from mumott.methods.basis_sets.base_basis_set import BasisSet

16from mumott.methods.projectors import SAXSProjectorCUDA

17from mumott.methods.utilities import get_tensor_sirt_weights, get_tensor_sirt_preconditioner

18from mumott.optimization.regularizers import L1Norm, TotalVariation

21def run_stsirt(data_container: DataContainer,

22 maxiter: int = 10,

23 sparsity_count: int = 3,

24 basis_set: BasisSet = None,

25 update_frequency: int = 5):

26 """A sparse basis implementation of the tensor SIRT algorithm (Sparse Tensor SIRT).

27 This approach uses only

28 asynchronous, in-place operations on the GPU, and is therefore much faster

29 than standard pipelines, as well as more memory-efficient.

30 In exchange, it is less modular and requires a relatively sparse basis set.

32 Parameters

33 ----------

34 data_container

35 The data.

36 maxiter

37 Maximum number of iterations.

38 sparsity_count

39 The maximum number of non-zero matrix elements per detector segment,

40 if using the default ``BasisSet``. Not used if a custom ``BasisSet`` is provided.

41 basis_set

42 A custom ``BasisSet`` instance. By default, a ``GaussianKernels`` with

43 ``enforce_sparsity=True`` and ``sparsity_count=sparsity_count`` is used.

44 update_frequency

45 Synchronization and norm reduction progress printing frequency. If set too small,

46 the optimization will be slower due to frequent host-device synchronization. This effect

47 can be seen by noting the iterations per second on the progress bar. The printed

48 norm does not account for the total variation regularization.

50 Returns

51 -------

52 Dictionary with ``reconstruction``, ``projector``, ``basis_set`` entries.

53 """

54 # Create projector for simple fetching of parameters

55 projector = SAXSProjectorCUDA(data_container.geometry)

56 if basis_set is None:

57 grid_scale = data_container.data.shape[-1] // 2 + 1

58 basis_set = GaussianKernels(grid_scale=grid_scale,

59 probed_coordinates=data_container.geometry.probed_coordinates,

60 enforce_sparsity=True,

61 sparsity_count=sparsity_count)

62 # Get weights, etc

63 weights = get_tensor_sirt_weights(projector=projector,

64 basis_set=basis_set)

65 weights[data_container.projections.weights <= 0.] = 0.

66 weights = cuda.to_device(weights.astype(np.float32))

67 preconditioner = cuda.to_device(get_tensor_sirt_preconditioner(

68 projector=projector, basis_set=basis_set).astype(np.float32))

69 sparse_matrix = basis_set.csr_representation

70 # Allocate matricess, compile kernels

71 reconstruction = cuda.to_device(

72 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))

73 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))

74 forward = john_transform_sparse_cuda(reconstruction, projections,

75 sparse_matrix, *projector.john_transform_parameters)

76 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections,

77 sparse_matrix, *projector.john_transform_parameters)

78 weighted_difference = cuda_weighted_difference(projections.shape)

79 scaled_difference = cuda_scaled_difference(reconstruction.shape)

80 data = cuda.to_device(data_container.data.astype(np.float32))

81 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

82 # Run asynchronous reconstruction

83 pbar = tqdm.trange(maxiter, file=sys.stdout)

84 pbar.update(0)

85 for i in range(maxiter):

86 # compute residual gradient

87 weighted_difference(data, projections, weights)

88 # compute gradient with respect to john transform

89 adjoint(gradient, projections)

90 # update reconstruction

91 scaled_difference(gradient, reconstruction, preconditioner)

92 # forward john transform

93 forward(reconstruction, projections)

94 # update user on progress

95 if (i + 1) % update_frequency == 0: 95 ↛ 85line 95 didn't jump to line 85, because the condition on line 95 was never false

96 cuda.synchronize()

97 pbar.update(update_frequency)

98 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,

99 projection=np.array(projections))

100

101

102def run_smotr(data_container: DataContainer,

103 maxiter: int = 10,

104 sparsity_count: int = 3,

105 momentum: float = 0.9,

106 l1_weight: float = 1e-3,

107 tv_weight: float = 1e-3,

108 basis_set: BasisSet = None,

109 update_frequency: int = 5,

110 lower_bound: float = None,

111 step_size: float = 1.):

112 """ SMOTR (Sparse MOmentum Total variation Reconstruction) pipeline

113 that uses asynchronous GPU (device) computations only

114 during the reconstruction. This leads to a large speedup compared to standard pipelines

115 which synchronize with the CPU several times per iteration. However, this implementation

116 is less modular than standard pipelines.

117

118 This pipeline uses Nestorov Momentum in combination with total variation and L1 regularization,

119 as well as the Tensor SIRT preconditioner-weight pair.

120

121 This is also a highly efficient implementation with respect to device memory, as all arithmetic

122 operations are done in-place.

123

124 Parameters

125 ----------

126 data_container

127 The container for the data.

128 maxiter

129 Maximum number of iterations.

130 sparsity_count

131 The maximum number of non-zero matrix elements per detector segment.

132 momentum

133 Momentum for the Nestorov gradient

134 l1_weight

135 Weight for L1 regularization.

136 tv_weight

137 Weight for total variation regularization.

138 basis_set

139 User-provided basis set to be used, if desired.

140 By default, a ``GaussianKernels`` basis set is used.

141 update_frequency

142 Synchronization and norm reduction progress printing frequency. If set too small,

143 the optimization will be slower due to frequent host-device synchronization. This effect

144 can be seen by noting the iterations per second on the progress bar. The printed

145 norm does not account for the total variation regularization.

146 lower_bound

147 Lower bound to threshold coefficients at in each iteration. Can be used to

148 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.

149 Not used if set to ``None``, which is the default settingz.

150 step_size

151 Step size for each iteration in the reconstruction. Default value is ``1.`` which

152 should be a suitable value in the vast majority of cases, due to the normalizing

153 effect of the weight-preconditioner pair used.

154

155 Returns

156 -------

157 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.

158 ``loss_curve`` is an array with ``maxiter // update_frequency`` rows where the entries

159 in each column are ``iteration, loss, residual_norm, tv_norm``.

160 """

161 projector = SAXSProjectorCUDA(data_container.geometry)

162 if basis_set is None:

163 grid_scale = data_container.data.shape[-1] // 2 + 1

164 basis_set = GaussianKernels(grid_scale=grid_scale,

165 probed_coordinates=data_container.geometry.probed_coordinates,

166 enforce_sparsity=True,

167 sparsity_count=sparsity_count)

168 weights = get_tensor_sirt_weights(projector=projector,

169 basis_set=basis_set)

170 weights[data_container.projections.weights <= 0.] = 0.

171 host_weights = weights.astype(np.float32)

172 weights = cuda.to_device(host_weights)

173 step_size = np.float32(step_size)

174 preconditioner = cuda.to_device(

175 step_size * get_tensor_sirt_preconditioner(

176 projector=projector, basis_set=basis_set).astype(np.float32))

177 sparse_matrix = basis_set.csr_representation

178 reconstruction = cuda.to_device(

179 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))

180 # Compile all CUDA kernels:

181 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))

182 forward = john_transform_sparse_cuda(reconstruction, projections,

183 sparse_matrix, *projector.john_transform_parameters)

184 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections,

185 sparse_matrix, *projector.john_transform_parameters)

186 weighted_difference = cuda_weighted_difference(projections.shape)

187 difference = cuda_difference(reconstruction.shape)

188 sum_kernel = cuda_sum(reconstruction.shape)

189 l1_gradient = cuda_l1_gradient(reconstruction.shape, l1_weight)

190 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)

191 scale_array = cuda_rescale_array(reconstruction.shape)

192 rescale = cuda_rescale(reconstruction.shape, momentum)

193 # Allocate remaining CUDA arrays

194 data = cuda.to_device(data_container.data.astype(np.float32))

195 host_data = data_container.data.astype(np.float32)

196 host_projections = np.array(projections)

197 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

198 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

199 diff = host_projections - host_data

200 lf = (host_weights * diff * diff).sum()

201 pbar = tqdm.trange(maxiter, file=sys.stdout)

202 pbar.set_description(f'Loss: {lf:.2e}')

203 pbar.update(0)

204 loss_curve = []

205 host_l1 = L1Norm()

206 host_tv = TotalVariation(None)

207 if lower_bound is not None: 207 ↛ 208line 207 didn't jump to line 208, because the condition on line 207 was never true

208 lower_kernel = cuda_lower_bound(reconstruction.shape, lower_bound)

209 # Actual reconstruction. This part executes asynchronously.

210 for i in range(maxiter):

211 # compute residual gradient

212 weighted_difference(data, projections, weights)

213 # compute gradient with respect to john transform

214 adjoint(gradient, projections)

215 # apply preconditioner

216 scale_array(gradient, preconditioner)

217 # apply regularization

218 l1_gradient(reconstruction, gradient)

219 tv_gradient(reconstruction, gradient)

220 # apply gradient (correction term)

221 difference(reconstruction, gradient)

222 # add to accumulated gradient

223 sum_kernel(total_gradient, gradient)

224 # scale by momentum coefficient

225 rescale(total_gradient)

226 # apply gradient (momentum term)

227 difference(reconstruction, total_gradient)

228 # threshold

229 if lower_bound is not None: 229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true

230 lower_kernel(reconstruction)

231 # forward john transform

232 forward(reconstruction, projections)

233 # compute host-side quantities to update user on progress.

234 if (i + 1) % update_frequency == 0: 234 ↛ 210line 234 didn't jump to line 210, because the condition on line 234 was never false

235 # forces synchronization

236 host_projections = np.array(projections)

237 host_recon = np.array(reconstruction)

238 rg1 = host_l1.get_regularization_norm(host_recon)['regularization_norm']

239 rg2 = host_tv.get_regularization_norm(host_recon)['regularization_norm']

240 diff = host_projections - host_data

241 rn = (host_weights * diff * diff).sum()

242 lf = rg1 * l1_weight + rg2 * tv_weight + rn

243 loss_curve.append((i, lf, rn, rg1, rg2))

244 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} L1 norm: {rg1:.2e} TV norm: {rg2:.2e}')

245 pbar.update(update_frequency)

246 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,

247 projection=np.array(projections), loss_curve=np.array(loss_curve))

248

249

250def run_spradtt(data_container: DataContainer,

251 maxiter: int = 10,

252 sparsity_count: int = 3,

253 momentum: float = 0.9,

254 step_size: float = 1.,

255 delta: float = 1.,

256 tv_weight: float = 1e-3,

257 basis_set: BasisSet = None,

258 lower_bound: float = 0.,

259 update_frequency: int = 5):

260 """ SPRADTT (SParse Robust And Denoised Tensor Tomography) pipeline

261 that uses asynchronous GPU (device) computations only

262 during the reconstruction. This leads to a large speedup compared to standard pipelines

263 which synchronize with the CPU several times per iteration. However, this is only suitable

264 for sparse representations and therefore this implementation

265 is less modular than standard pipelines.

266

267 This pipeline uses Nestorov accelerated gradient descent, with a Huber loss function

268 and total variation regularization. This reconstruction approach requires a large number

269 of iterations, but is robust to outliers in the data.

270

271 This is also a highly efficient implementation with respect to device memory, as all arithmetic

272 operations are done in-place.

273

274 Parameters

275 ----------

276 data_container

277 The container for the data.

278 maxiter

279 Maximum number of iterations.

280 sparsity_count

281 The maximum number of non-zero matrix elements per detector segment.

282 momentum

283 Momentum for the Nestorov gradient. Should be in the range ``[0., 1.]``.

284 step_size

285 Step size for L1 optimization. Step size for

286 L2 part of optimization is ``step_size / (2 * delta)``. A good choice

287 for the step size is typically around the same order of magnitude

288 as each coefficient of the reconstruction.

289 delta

290 Threshold for transition to L2 optimization. Should be small relative to data.

291 Does not affect total variation.

292 tv_weight

293 Weight for total variation regularization.

294 basis_set

295 User-provided basis set to be used, if desired.

296 By default, a ``GaussianKernels`` basis set is used.

297 lower_bound

298 Lower bound to threshold coefficients at in each iteration. Can be used to

299 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.

300 Not used if set to ``None``, which is the default settingz.

301 update_frequency

302 Synchronization and norm reduction progress printing frequency. If set too small,

303 the optimization will be slower due to frequent host-device synchronization. This effect

304 can be seen by noting the iterations per second on the progress bar. The printed

305 norm does not account for the total variation regularization.

306

307 Returns

308 -------

309 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.

310 """

311 projector = SAXSProjectorCUDA(data_container.geometry)

312 if basis_set is None: 312 ↛ 318line 312 didn't jump to line 318, because the condition on line 312 was never false

313 grid_scale = data_container.data.shape[-1] // 2 + 1

314 basis_set = GaussianKernels(grid_scale=grid_scale,

315 probed_coordinates=data_container.geometry.probed_coordinates,

316 enforce_sparsity=True,

317 sparsity_count=sparsity_count)

318 weights = get_tensor_sirt_weights(projector=projector,

319 basis_set=basis_set)

320 weights[data_container.projections.weights <= 0.] = 0.

321 host_weights = weights.astype(np.float32)

322 weights = cuda.to_device(weights.astype(np.float32))

323 preconditioner = cuda.to_device(

324 (step_size) * get_tensor_sirt_preconditioner(

325 projector=projector, basis_set=basis_set).astype(np.float32))

326 sparse_matrix = basis_set.csr_representation

327 reconstruction = cuda.to_device(

328 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))

329 # Compile all CUDA kernels

330 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))

331 forward = john_transform_sparse_cuda(reconstruction, projections,

332 sparse_matrix, *projector.john_transform_parameters)

333 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections,

334 sparse_matrix, *projector.john_transform_parameters)

335 weighted_sign = cuda_weighted_sign(projections.shape, delta)

336 difference = cuda_difference(reconstruction.shape)

337 sum_kernel = cuda_sum(reconstruction.shape)

338 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)

339 scale_array = cuda_rescale_array(reconstruction.shape)

340 rescale = cuda_rescale(reconstruction.shape, momentum)

341 if lower_bound is not None: 341 ↛ 344line 341 didn't jump to line 344, because the condition on line 341 was never false

342 threshold_lower = cuda_lower_bound(reconstruction.shape, lower_bound)

343 # Allocate remaining CUDA arrays

344 host_data = data_container.data.astype(np.float32)

345 data = cuda.to_device(data_container.data.astype(np.float32))

346 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

347 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

348 # Create necessary host-side objects for output

349 pbar = tqdm.trange(maxiter, file=sys.stdout)

350 host_projections = np.array(projections)

351 lf = (host_weights * abs(host_projections - host_data)).sum()

352 rg = 0.

353 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {lf:.2e} TV norm: {rg:.2e}')

354 pbar.update(0)

355 loss_curve = []

356 host_tv = TotalVariation(None)

357 # Actual reconstruction. This part executes asynchronously.

358 for i in range(maxiter):

359 # residual norm gradient

360 weighted_sign(data, projections, weights)

361 # john transform gradient

362 adjoint(gradient, projections)

363 # apply preconditioner

364 scale_array(gradient, preconditioner)

365 # apply total variation regularization

366 tv_gradient(reconstruction, gradient)

367 # update reconstruction by gradient (correction term)

368 difference(reconstruction, gradient)

369 # compute updated momentum term

370 sum_kernel(total_gradient, gradient)

371 # apply momentum decay

372 rescale(total_gradient)

373 # update reconstruction by gradient (momentum term)

374 difference(reconstruction, total_gradient)

375 # threshold by lower bound

376 if lower_bound is not None: 376 ↛ 379line 376 didn't jump to line 379, because the condition on line 376 was never false

377 threshold_lower(reconstruction)

378 # forward john transform

379 forward(reconstruction, projections)

380 # compute host-side quantities to update user on progress

381 if (i + 1) % update_frequency == 0: 381 ↛ 358line 381 didn't jump to line 358, because the condition on line 381 was never false

382 # forces synchronization

383 host_projections = np.array(projections)

384 host_recon = np.array(reconstruction)

385 rg = host_tv.get_regularization_norm(host_recon)['regularization_norm']

386 diff = host_projections - host_data

387 rn = (host_weights * diff * diff).sum()

388 lf = rg * tv_weight + rn

389 loss_curve.append((i, lf, rn, rg))

390 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} TV norm: {rg:.2e}')

391 pbar.update(update_frequency)

392 return dict(reconstruction=np.array(reconstruction), projector=projector,

393 basis_set=basis_set, projection=np.array(projections), loss_curve=np.array(loss_curve))

Coverage for local_installation_linux/mumott/pipelines/sparse_pipelines.py: 95%

162 statements