Coverage for local_installation_linux/mumott/pipelines/sparse_pipelines.py: 95%

162 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-08-11 23:08 +0000

1import sys 

2import numpy as np 

3import tqdm 

4from numba import cuda 

5 

6from mumott.core.john_transform_sparse_cuda import (john_transform_sparse_cuda, 

7 john_transform_adjoint_sparse_cuda) 

8from mumott.core.cuda_kernels import (cuda_weighted_difference, cuda_scaled_difference, 

9 cuda_sum, cuda_rescale, cuda_difference, 

10 cuda_rescale_array, 

11 cuda_l1_gradient, cuda_tv_gradient, cuda_weighted_sign, 

12 cuda_lower_bound) 

13from mumott.data_handling import DataContainer 

14from mumott.methods.basis_sets import GaussianKernels 

15from mumott.methods.basis_sets.base_basis_set import BasisSet 

16from mumott.methods.projectors import SAXSProjectorCUDA 

17from mumott.methods.utilities import get_tensor_sirt_weights, get_tensor_sirt_preconditioner 

18from mumott.optimization.regularizers import L1Norm, TotalVariation 

19 

20 

21def run_stsirt(data_container: DataContainer, 

22 maxiter: int = 10, 

23 sparsity_count: int = 3, 

24 basis_set: BasisSet = None, 

25 update_frequency: int = 5): 

26 """A sparse basis implementation of the tensor SIRT algorithm (Sparse Tensor SIRT). 

27 This approach uses only 

28 asynchronous, in-place operations on the GPU, and is therefore much faster 

29 than standard pipelines, as well as more memory-efficient. 

30 In exchange, it is less modular and requires a relatively sparse basis set. 

31 

32 Parameters 

33 ---------- 

34 data_container 

35 The data. 

36 maxiter 

37 Maximum number of iterations. 

38 sparsity_count 

39 The maximum number of non-zero matrix elements per detector segment, 

40 if using the default ``BasisSet``. Not used if a custom ``BasisSet`` is provided. 

41 basis_set 

42 A custom ``BasisSet`` instance. By default, a ``GaussianKernels`` with 

43 ``enforce_sparsity=True`` and ``sparsity_count=sparsity_count`` is used. 

44 update_frequency 

45 Synchronization and norm reduction progress printing frequency. If set too small, 

46 the optimization will be slower due to frequent host-device synchronization. This effect 

47 can be seen by noting the iterations per second on the progress bar. The printed 

48 norm does not account for the total variation regularization. 

49 

50 Returns 

51 ------- 

52 Dictionary with ``reconstruction``, ``projector``, ``basis_set`` entries. 

53 """ 

54 # Create projector for simple fetching of parameters 

55 projector = SAXSProjectorCUDA(data_container.geometry) 

56 if basis_set is None: 

57 grid_scale = data_container.data.shape[-1] // 2 + 1 

58 basis_set = GaussianKernels(grid_scale=grid_scale, 

59 probed_coordinates=data_container.geometry.probed_coordinates, 

60 enforce_sparsity=True, 

61 sparsity_count=sparsity_count) 

62 # Get weights, etc 

63 weights = get_tensor_sirt_weights(projector=projector, 

64 basis_set=basis_set) 

65 weights[data_container.projections.weights <= 0.] = 0. 

66 weights = cuda.to_device(weights.astype(np.float32)) 

67 preconditioner = cuda.to_device(get_tensor_sirt_preconditioner( 

68 projector=projector, basis_set=basis_set).astype(np.float32)) 

69 sparse_matrix = basis_set.csr_representation 

70 # Allocate matricess, compile kernels 

71 reconstruction = cuda.to_device( 

72 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32)) 

73 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32)) 

74 forward = john_transform_sparse_cuda(reconstruction, projections, 

75 sparse_matrix, *projector.john_transform_parameters) 

76 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections, 

77 sparse_matrix, *projector.john_transform_parameters) 

78 weighted_difference = cuda_weighted_difference(projections.shape) 

79 scaled_difference = cuda_scaled_difference(reconstruction.shape) 

80 data = cuda.to_device(data_container.data.astype(np.float32)) 

81 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32)) 

82 # Run asynchronous reconstruction 

83 pbar = tqdm.trange(maxiter, file=sys.stdout) 

84 pbar.update(0) 

85 for i in range(maxiter): 

86 # compute residual gradient 

87 weighted_difference(data, projections, weights) 

88 # compute gradient with respect to john transform 

89 adjoint(gradient, projections) 

90 # update reconstruction 

91 scaled_difference(gradient, reconstruction, preconditioner) 

92 # forward john transform 

93 forward(reconstruction, projections) 

94 # update user on progress 

95 if (i + 1) % update_frequency == 0: 95 ↛ 85line 95 didn't jump to line 85, because the condition on line 95 was never false

96 cuda.synchronize() 

97 pbar.update(update_frequency) 

98 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set, 

99 projection=np.array(projections)) 

100 

101 

102def run_smotr(data_container: DataContainer, 

103 maxiter: int = 10, 

104 sparsity_count: int = 3, 

105 momentum: float = 0.9, 

106 l1_weight: float = 1e-3, 

107 tv_weight: float = 1e-3, 

108 basis_set: BasisSet = None, 

109 update_frequency: int = 5, 

110 lower_bound: float = None, 

111 step_size: float = 1.): 

112 """ SMOTR (Sparse MOmentum Total variation Reconstruction) pipeline 

113 that uses asynchronous GPU (device) computations only 

114 during the reconstruction. This leads to a large speedup compared to standard pipelines 

115 which synchronize with the CPU several times per iteration. However, this implementation 

116 is less modular than standard pipelines. 

117 

118 This pipeline uses Nestorov Momentum in combination with total variation and L1 regularization, 

119 as well as the Tensor SIRT preconditioner-weight pair. 

120 

121 This is also a highly efficient implementation with respect to device memory, as all arithmetic 

122 operations are done in-place. 

123 

124 Parameters 

125 ---------- 

126 data_container 

127 The container for the data. 

128 maxiter 

129 Maximum number of iterations. 

130 sparsity_count 

131 The maximum number of non-zero matrix elements per detector segment. 

132 momentum 

133 Momentum for the Nestorov gradient 

134 l1_weight 

135 Weight for L1 regularization. 

136 tv_weight 

137 Weight for total variation regularization. 

138 basis_set 

139 User-provided basis set to be used, if desired. 

140 By default, a ``GaussianKernels`` basis set is used. 

141 update_frequency 

142 Synchronization and norm reduction progress printing frequency. If set too small, 

143 the optimization will be slower due to frequent host-device synchronization. This effect 

144 can be seen by noting the iterations per second on the progress bar. The printed 

145 norm does not account for the total variation regularization. 

146 lower_bound 

147 Lower bound to threshold coefficients at in each iteration. Can be used to 

148 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``. 

149 Not used if set to ``None``, which is the default settingz. 

150 step_size 

151 Step size for each iteration in the reconstruction. Default value is ``1.`` which 

152 should be a suitable value in the vast majority of cases, due to the normalizing 

153 effect of the weight-preconditioner pair used. 

154 

155 Returns 

156 ------- 

157 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries. 

158 ``loss_curve`` is an array with ``maxiter // update_frequency`` rows where the entries 

159 in each column are ``iteration, loss, residual_norm, tv_norm``. 

160 """ 

161 projector = SAXSProjectorCUDA(data_container.geometry) 

162 if basis_set is None: 

163 grid_scale = data_container.data.shape[-1] // 2 + 1 

164 basis_set = GaussianKernels(grid_scale=grid_scale, 

165 probed_coordinates=data_container.geometry.probed_coordinates, 

166 enforce_sparsity=True, 

167 sparsity_count=sparsity_count) 

168 weights = get_tensor_sirt_weights(projector=projector, 

169 basis_set=basis_set) 

170 weights[data_container.projections.weights <= 0.] = 0. 

171 host_weights = weights.astype(np.float32) 

172 weights = cuda.to_device(host_weights) 

173 step_size = np.float32(step_size) 

174 preconditioner = cuda.to_device( 

175 step_size * get_tensor_sirt_preconditioner( 

176 projector=projector, basis_set=basis_set).astype(np.float32)) 

177 sparse_matrix = basis_set.csr_representation 

178 reconstruction = cuda.to_device( 

179 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32)) 

180 # Compile all CUDA kernels: 

181 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32)) 

182 forward = john_transform_sparse_cuda(reconstruction, projections, 

183 sparse_matrix, *projector.john_transform_parameters) 

184 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections, 

185 sparse_matrix, *projector.john_transform_parameters) 

186 weighted_difference = cuda_weighted_difference(projections.shape) 

187 difference = cuda_difference(reconstruction.shape) 

188 sum_kernel = cuda_sum(reconstruction.shape) 

189 l1_gradient = cuda_l1_gradient(reconstruction.shape, l1_weight) 

190 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight) 

191 scale_array = cuda_rescale_array(reconstruction.shape) 

192 rescale = cuda_rescale(reconstruction.shape, momentum) 

193 # Allocate remaining CUDA arrays 

194 data = cuda.to_device(data_container.data.astype(np.float32)) 

195 host_data = data_container.data.astype(np.float32) 

196 host_projections = np.array(projections) 

197 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32)) 

198 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32)) 

199 diff = host_projections - host_data 

200 lf = (host_weights * diff * diff).sum() 

201 pbar = tqdm.trange(maxiter, file=sys.stdout) 

202 pbar.set_description(f'Loss: {lf:.2e}') 

203 pbar.update(0) 

204 loss_curve = [] 

205 host_l1 = L1Norm() 

206 host_tv = TotalVariation(None) 

207 if lower_bound is not None: 207 ↛ 208line 207 didn't jump to line 208, because the condition on line 207 was never true

208 lower_kernel = cuda_lower_bound(reconstruction.shape, lower_bound) 

209 # Actual reconstruction. This part executes asynchronously. 

210 for i in range(maxiter): 

211 # compute residual gradient 

212 weighted_difference(data, projections, weights) 

213 # compute gradient with respect to john transform 

214 adjoint(gradient, projections) 

215 # apply preconditioner 

216 scale_array(gradient, preconditioner) 

217 # apply regularization 

218 l1_gradient(reconstruction, gradient) 

219 tv_gradient(reconstruction, gradient) 

220 # apply gradient (correction term) 

221 difference(reconstruction, gradient) 

222 # add to accumulated gradient 

223 sum_kernel(total_gradient, gradient) 

224 # scale by momentum coefficient 

225 rescale(total_gradient) 

226 # apply gradient (momentum term) 

227 difference(reconstruction, total_gradient) 

228 # threshold 

229 if lower_bound is not None: 229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true

230 lower_kernel(reconstruction) 

231 # forward john transform 

232 forward(reconstruction, projections) 

233 # compute host-side quantities to update user on progress. 

234 if (i + 1) % update_frequency == 0: 234 ↛ 210line 234 didn't jump to line 210, because the condition on line 234 was never false

235 # forces synchronization 

236 host_projections = np.array(projections) 

237 host_recon = np.array(reconstruction) 

238 rg1 = host_l1.get_regularization_norm(host_recon)['regularization_norm'] 

239 rg2 = host_tv.get_regularization_norm(host_recon)['regularization_norm'] 

240 diff = host_projections - host_data 

241 rn = (host_weights * diff * diff).sum() 

242 lf = rg1 * l1_weight + rg2 * tv_weight + rn 

243 loss_curve.append((i, lf, rn, rg1, rg2)) 

244 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} L1 norm: {rg1:.2e} TV norm: {rg2:.2e}') 

245 pbar.update(update_frequency) 

246 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set, 

247 projection=np.array(projections), loss_curve=np.array(loss_curve)) 

248 

249 

250def run_spradtt(data_container: DataContainer, 

251 maxiter: int = 10, 

252 sparsity_count: int = 3, 

253 momentum: float = 0.9, 

254 step_size: float = 1., 

255 delta: float = 1., 

256 tv_weight: float = 1e-3, 

257 basis_set: BasisSet = None, 

258 lower_bound: float = 0., 

259 update_frequency: int = 5): 

260 """ SPRADTT (SParse Robust And Denoised Tensor Tomography) pipeline 

261 that uses asynchronous GPU (device) computations only 

262 during the reconstruction. This leads to a large speedup compared to standard pipelines 

263 which synchronize with the CPU several times per iteration. However, this is only suitable 

264 for sparse representations and therefore this implementation 

265 is less modular than standard pipelines. 

266 

267 This pipeline uses Nestorov accelerated gradient descent, with a Huber loss function 

268 and total variation regularization. This reconstruction approach requires a large number 

269 of iterations, but is robust to outliers in the data. 

270 

271 This is also a highly efficient implementation with respect to device memory, as all arithmetic 

272 operations are done in-place. 

273 

274 Parameters 

275 ---------- 

276 data_container 

277 The container for the data. 

278 maxiter 

279 Maximum number of iterations. 

280 sparsity_count 

281 The maximum number of non-zero matrix elements per detector segment. 

282 momentum 

283 Momentum for the Nestorov gradient. Should be in the range ``[0., 1.]``. 

284 step_size 

285 Step size for L1 optimization. Step size for 

286 L2 part of optimization is ``step_size / (2 * delta)``. A good choice 

287 for the step size is typically around the same order of magnitude 

288 as each coefficient of the reconstruction. 

289 delta 

290 Threshold for transition to L2 optimization. Should be small relative to data. 

291 Does not affect total variation. 

292 tv_weight 

293 Weight for total variation regularization. 

294 basis_set 

295 User-provided basis set to be used, if desired. 

296 By default, a ``GaussianKernels`` basis set is used. 

297 lower_bound 

298 Lower bound to threshold coefficients at in each iteration. Can be used to 

299 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``. 

300 Not used if set to ``None``, which is the default settingz. 

301 update_frequency 

302 Synchronization and norm reduction progress printing frequency. If set too small, 

303 the optimization will be slower due to frequent host-device synchronization. This effect 

304 can be seen by noting the iterations per second on the progress bar. The printed 

305 norm does not account for the total variation regularization. 

306 

307 Returns 

308 ------- 

309 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries. 

310 """ 

311 projector = SAXSProjectorCUDA(data_container.geometry) 

312 if basis_set is None: 312 ↛ 318line 312 didn't jump to line 318, because the condition on line 312 was never false

313 grid_scale = data_container.data.shape[-1] // 2 + 1 

314 basis_set = GaussianKernels(grid_scale=grid_scale, 

315 probed_coordinates=data_container.geometry.probed_coordinates, 

316 enforce_sparsity=True, 

317 sparsity_count=sparsity_count) 

318 weights = get_tensor_sirt_weights(projector=projector, 

319 basis_set=basis_set) 

320 weights[data_container.projections.weights <= 0.] = 0. 

321 host_weights = weights.astype(np.float32) 

322 weights = cuda.to_device(weights.astype(np.float32)) 

323 preconditioner = cuda.to_device( 

324 (step_size) * get_tensor_sirt_preconditioner( 

325 projector=projector, basis_set=basis_set).astype(np.float32)) 

326 sparse_matrix = basis_set.csr_representation 

327 reconstruction = cuda.to_device( 

328 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32)) 

329 # Compile all CUDA kernels 

330 projections = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32)) 

331 forward = john_transform_sparse_cuda(reconstruction, projections, 

332 sparse_matrix, *projector.john_transform_parameters) 

333 adjoint = john_transform_adjoint_sparse_cuda(reconstruction, projections, 

334 sparse_matrix, *projector.john_transform_parameters) 

335 weighted_sign = cuda_weighted_sign(projections.shape, delta) 

336 difference = cuda_difference(reconstruction.shape) 

337 sum_kernel = cuda_sum(reconstruction.shape) 

338 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight) 

339 scale_array = cuda_rescale_array(reconstruction.shape) 

340 rescale = cuda_rescale(reconstruction.shape, momentum) 

341 if lower_bound is not None: 341 ↛ 344line 341 didn't jump to line 344, because the condition on line 341 was never false

342 threshold_lower = cuda_lower_bound(reconstruction.shape, lower_bound) 

343 # Allocate remaining CUDA arrays 

344 host_data = data_container.data.astype(np.float32) 

345 data = cuda.to_device(data_container.data.astype(np.float32)) 

346 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32)) 

347 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32)) 

348 # Create necessary host-side objects for output 

349 pbar = tqdm.trange(maxiter, file=sys.stdout) 

350 host_projections = np.array(projections) 

351 lf = (host_weights * abs(host_projections - host_data)).sum() 

352 rg = 0. 

353 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {lf:.2e} TV norm: {rg:.2e}') 

354 pbar.update(0) 

355 loss_curve = [] 

356 host_tv = TotalVariation(None) 

357 # Actual reconstruction. This part executes asynchronously. 

358 for i in range(maxiter): 

359 # residual norm gradient 

360 weighted_sign(data, projections, weights) 

361 # john transform gradient 

362 adjoint(gradient, projections) 

363 # apply preconditioner 

364 scale_array(gradient, preconditioner) 

365 # apply total variation regularization 

366 tv_gradient(reconstruction, gradient) 

367 # update reconstruction by gradient (correction term) 

368 difference(reconstruction, gradient) 

369 # compute updated momentum term 

370 sum_kernel(total_gradient, gradient) 

371 # apply momentum decay 

372 rescale(total_gradient) 

373 # update reconstruction by gradient (momentum term) 

374 difference(reconstruction, total_gradient) 

375 # threshold by lower bound 

376 if lower_bound is not None: 376 ↛ 379line 376 didn't jump to line 379, because the condition on line 376 was never false

377 threshold_lower(reconstruction) 

378 # forward john transform 

379 forward(reconstruction, projections) 

380 # compute host-side quantities to update user on progress 

381 if (i + 1) % update_frequency == 0: 381 ↛ 358line 381 didn't jump to line 358, because the condition on line 381 was never false

382 # forces synchronization 

383 host_projections = np.array(projections) 

384 host_recon = np.array(reconstruction) 

385 rg = host_tv.get_regularization_norm(host_recon)['regularization_norm'] 

386 diff = host_projections - host_data 

387 rn = (host_weights * diff * diff).sum() 

388 lf = rg * tv_weight + rn 

389 loss_curve.append((i, lf, rn, rg)) 

390 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} TV norm: {rg:.2e}') 

391 pbar.update(update_frequency) 

392 return dict(reconstruction=np.array(reconstruction), projector=projector, 

393 basis_set=basis_set, projection=np.array(projections), loss_curve=np.array(loss_curve))