Coverage for local_installation_linux/mumott/pipelines/async

1import sys

2import numpy as np

3import tqdm

4from numba import cuda

6from mumott.core.john_transform_cuda import (john_transform_cuda,

7 john_transform_adjoint_cuda)

8from mumott.core.cuda_kernels import (cuda_weighted_difference, cuda_scaled_difference,

9 cuda_sum, cuda_rescale, cuda_difference,

10 cuda_rescale_array, cuda_framewise_contraction,

11 cuda_framewise_contraction_adjoint,

12 cuda_l1_gradient, cuda_tv_gradient, cuda_weighted_sign,

13 cuda_lower_bound)

14from mumott.data_handling import DataContainer

15from mumott.methods.basis_sets import GaussianKernels

16from mumott.methods.basis_sets.base_basis_set import BasisSet

17from mumott.methods.projectors import SAXSProjectorCUDA

18from mumott.methods.utilities import get_tensor_sirt_weights, get_tensor_sirt_preconditioner

19from mumott.optimization.regularizers import L1Norm, TotalVariation

22def run_tensor_sirt(data_container: DataContainer,

23 maxiter: int = 10,

24 basis_set: BasisSet = None,

25 update_frequency: int = 5):

26 """An asynchronous implementation of the tensor SIRT algorithm.

27 This approach uses only asynchronous, in-place operations on the GPU,

28 and is therefore much faster than standard pipelines, as well as more memory-efficient.

30 Parameters

31 ----------

32 data_container

33 The data.

34 maxiter

35 Maximum number of iterations.

36 basis_set

37 User-provided basis set to be used, if desired.

38 By default, a ``GaussianKernels`` basis set is used.

39 update_frequency

40 Synchronization and norm reduction progress printing frequency. If set too small,

41 the optimization will be slower due to frequent host-device synchronization. This effect

42 can be seen by noting the iterations per second on the progress bar. The printed

43 norm does not account for the total variation regularization.

45 Returns

46 -------

47 Dictionary with ``reconstruction``, ``projector``, ``basis_set`` entries.

48 """

49 # Create projector for simple fetching of parameters

50 projector = SAXSProjectorCUDA(data_container.geometry)

51 if basis_set is None:

52 grid_scale = data_container.data.shape[-1] // 2 + 1

53 basis_set = GaussianKernels(grid_scale=grid_scale,

54 probed_coordinates=data_container.geometry.probed_coordinates)

55 # Get weights, etc

56 weights = get_tensor_sirt_weights(projector=projector,

57 basis_set=basis_set)

58 weights[data_container.projections.weights <= 0.] = 0.

59 weights = cuda.to_device(weights.astype(np.float32))

60 preconditioner = cuda.to_device(get_tensor_sirt_preconditioner(

61 projector=projector, basis_set=basis_set).astype(np.float32))

62 matrix = basis_set.projection_matrix.astype(np.float32)

63 # Allocate matricess, compile kernels

64 data = cuda.to_device(data_container.data.astype(np.float32))

65 reconstruction = cuda.to_device(

66 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))

67 projections = cuda.to_device(

68 np.zeros(data_container.data.shape[:-1] + (len(basis_set),),

69 dtype=np.float32))

70 data_approximation = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))

71 forward = john_transform_cuda(reconstruction, projections, *projector.john_transform_parameters)

72 adjoint = john_transform_adjoint_cuda(reconstruction, projections, *projector.john_transform_parameters)

73 contraction = cuda_framewise_contraction(data_container.data.shape[:-1], *matrix.shape[1:])

74 contraction_adjoint = cuda_framewise_contraction_adjoint(

75 data_container.data.shape[:-1],

76 *matrix.shape[1:])

77 weighted_difference = cuda_weighted_difference(data_approximation.shape)

78 scaled_difference = cuda_scaled_difference(reconstruction.shape)

79 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

80 matrix = cuda.to_device(matrix)

81 # create necessary host-side objects

82 pbar = tqdm.trange(maxiter, file=sys.stdout)

83 pbar.update(0)

84 # Run asynchronous reconstruction

85 for i in range(maxiter):

86 # compute residual gradient

87 weighted_difference(data, data_approximation, weights)

88 # compute gradient with respect to tensor representation

89 contraction_adjoint(data_approximation, matrix, projections)

90 # compute gradient with respect to john transform

91 adjoint(gradient, projections)

92 # update reconstruction

93 scaled_difference(gradient, reconstruction, preconditioner)

94 # forward john transform

95 forward(reconstruction, projections)

96 # forward tensor representation

97 contraction(projections, matrix, data_approximation)

98 # update user on progress.

99 if (i + 1) % update_frequency == 0:

100 cuda.synchronize()

101 pbar.update(update_frequency)

102 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,

103 projection=np.array(data_approximation))

104

105

106def run_motr(data_container: DataContainer,

107 maxiter: int = 10,

108 momentum: float = 0.9,

109 l1_weight: float = 1e-3,

110 tv_weight: float = 1e-3,

111 basis_set: BasisSet = None,

112 lower_bound: float = None,

113 step_size: float = 1.,

114 update_frequency: int = 5):

115 """ MOTR (MOmentum Total variation Reconstruction) pipeline

116 that uses asynchronous GPU (device) computations only

117 during the reconstruction. This leads to a large speedup compared to standard pipelines

118 which synchronize with the CPU several times per iteration. However, this implementation

119 is less modular than standard pipelines.

120

121 This pipeline uses Nestorov Momentum in combination with total variation and L1 regularization,

122 as well as the Tensor SIRT preconditioner-weight pair, which normalize the gradient based on

123 the projection geometry and basis set.

124

125 This is also a relatively efficient implementation with respect to device memory, as all arithmetic

126 operations are done in-place.

127

128 Parameters

129 ----------

130 data_container

131 The container for the data.

132 maxiter

133 Maximum number of iterations.

134 momentum

135 Momentum for the Nestorov gradient

136 l1_weight

137 Weight for L1 regularization.

138 tv_weight

139 Weight for total variation regularization.

140 basis_set

141 User-provided basis set to be used, if desired.

142 By default, a ``GaussianKernels`` basis set is used.

143 lower_bound

144 Lower bound to threshold coefficients at in each iteration. Can be used to

145 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.

146 Not used if set to ``None``, which is the default settingz.

147 step_size

148 Step size for each iteration in the reconstruction. Default value is ``1.`` which

149 should be a suitable value in the vast majority of cases, due to the normalizing

150 effect of the weight-preconditioner pair used.

151 update_frequency

152 Synchronization and norm reduction progress printing frequency. If set too small,

153 the optimization will be slower due to frequent host-device synchronization. This effect

154 can be seen by noting the iterations per second on the progress bar. The printed

155 norm does not account for the total variation regularization.

156

157 Returns

158 -------

159 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.

160 ``loss_curve`` is an array with ``maxiter // update_frequency`` rows where the entries

161 in each column are ``iteration, loss, residual_norm, tv_norm``.

162 """

163 projector = SAXSProjectorCUDA(data_container.geometry)

164 if basis_set is None:

165 grid_scale = data_container.data.shape[-1] // 2 + 1

166 basis_set = GaussianKernels(grid_scale=grid_scale,

167 probed_coordinates=data_container.geometry.probed_coordinates)

168 weights = get_tensor_sirt_weights(projector=projector,

169 basis_set=basis_set)

170 weights[data_container.projections.weights <= 0.] = 0.

171 host_weights = weights.astype(np.float32)

172 weights = cuda.to_device(host_weights)

173 step_size = np.float32(step_size)

174 preconditioner = cuda.to_device(

175 step_size * get_tensor_sirt_preconditioner(projector=projector,

176 basis_set=basis_set).astype(np.float32))

177 matrix = basis_set.projection_matrix.astype(np.float32)

178 reconstruction = cuda.to_device(

179 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))

180 # Compile all CUDA kernels:

181 data = cuda.to_device(data_container.data.astype(np.float32))

182 projections = cuda.to_device(

183 np.zeros(data_container.data.shape[:-1] + (len(basis_set),),

184 dtype=np.float32))

185 data_approximation = cuda.to_device(np.zeros_like(data_container.data, dtype=np.float32))

186 forward = john_transform_cuda(reconstruction, projections, *projector.john_transform_parameters)

187 adjoint = john_transform_adjoint_cuda(reconstruction, projections, *projector.john_transform_parameters)

188 contraction = cuda_framewise_contraction(data_container.data.shape[:-1],

189 *matrix.shape[1:])

190 contraction_adjoint = cuda_framewise_contraction_adjoint(data_container.data.shape[:-1],

191 *matrix.shape[1:])

192 weighted_difference = cuda_weighted_difference(data_approximation.shape)

193 difference = cuda_difference(reconstruction.shape)

194 sum_kernel = cuda_sum(reconstruction.shape)

195 l1_gradient = cuda_l1_gradient(reconstruction.shape, l1_weight)

196 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)

197 scale_array = cuda_rescale_array(reconstruction.shape)

198 rescale = cuda_rescale(reconstruction.shape, momentum)

199 if lower_bound is not None: 199 ↛ 202line 199 didn't jump to line 202, because the condition on line 199 was never false

200 threshold_lower = cuda_lower_bound(reconstruction.shape, lower_bound)

201 # Allocate remaining CUDA arrays

202 matrix = cuda.to_device(matrix)

203 data = cuda.to_device(data_container.data.astype(np.float32))

204 host_data = data_container.data.astype(np.float32)

205 host_projections = np.array(data_approximation)

206 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

207 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

208 # create necessary host side objects

209 diff = host_projections - host_data

210 lf = (host_weights * diff * diff).sum()

211 pbar = tqdm.trange(maxiter, file=sys.stdout)

212 pbar.set_description(f'Loss: {lf:.2e}')

213 pbar.update(0)

214 loss_curve = []

215 host_l1 = L1Norm()

216 host_tv = TotalVariation(None)

217 # Actual reconstruction. This part executes asynchronously.

218 for i in range(maxiter):

219 # compute residual gradient

220 weighted_difference(data, data_approximation, weights)

221 # compute gradient with respect to tensor representation

222 contraction_adjoint(data_approximation, matrix, projections)

223 # compute gradient with respect to john transform

224 adjoint(gradient, projections)

225 # apply preconditioner

226 scale_array(gradient, preconditioner)

227 # apply regularization

228 l1_gradient(reconstruction, gradient)

229 tv_gradient(reconstruction, gradient)

230 # apply gradient (correction term)

231 difference(reconstruction, gradient)

232 # add to accumulated gradient

233 sum_kernel(total_gradient, gradient)

234 # scale by momentum coefficient

235 rescale(total_gradient)

236 # apply gradient (momentum term)

237 difference(reconstruction, total_gradient)

238 # threshold

239 if lower_bound is not None: 239 ↛ 242line 239 didn't jump to line 242, because the condition on line 239 was never false

240 threshold_lower(reconstruction)

241 # forward john transform

242 forward(reconstruction, projections)

243 # forward tensor representation

244 contraction(projections, matrix, data_approximation)

245 # compute host-side quantities to update user on progress.

246 if (i + 1) % update_frequency == 0:

247 # forces synchronization

248 host_projections = np.array(data_approximation)

249 host_recon = np.array(reconstruction)

250 rg1 = host_l1.get_regularization_norm(host_recon)['regularization_norm']

251 rg2 = host_tv.get_regularization_norm(host_recon)['regularization_norm']

252 diff = host_projections - host_data

253 rn = (host_weights * diff * diff).sum()

254 lf = rg1 * l1_weight + rg2 * tv_weight + rn

255 loss_curve.append((i, lf, rn, rg1, rg2))

256 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} L1 norm: {rg1:.2e} TV norm: {rg2:.2e}')

257 pbar.update(update_frequency)

258 return dict(reconstruction=np.array(reconstruction), projector=projector, basis_set=basis_set,

259 projection=np.array(data_approximation), loss_curve=np.array(loss_curve))

260

261

262def run_radtt(data_container: DataContainer,

263 maxiter: int = 10,

264 momentum: float = 0.9,

265 step_size: float = 1.,

266 delta: float = 1.,

267 tv_weight: float = 1e-3,

268 basis_set: BasisSet = None,

269 lower_bound: float = 0.,

270 update_frequency: int = 5):

271 """ RADTT (Robust And Denoised Tensor Tomography) pipeline

272 that uses asynchronous GPU (device) computations only

273 during the reconstruction. This leads to a large speedup compared to standard pipelines

274 which synchronize with the CPU several times per iteration. However, this implementation

275 is less modular than standard pipelines.

276

277 This pipeline uses Nestorov accelerated gradient descent, with a Huber loss function

278 and total variation regularization. This reconstruction approach requires a large number

279 of iterations, but is robust to outliers in the data.

280

281 This is also a relatively efficient implementation with respect to device memory, as all arithmetic

282 operations are done in-place.

283

284 Parameters

285 ----------

286 data_container

287 The container for the data.

288 maxiter

289 Maximum number of iterations.

290 momentum

291 Momentum for the Nestorov gradient

292 step_size

293 Step size for L1 optimization. Step size for

294 L2 part of optimization is ``step_size / (2 * delta)``. A good choice

295 for the step size is typically around the same order of magnitude

296 as each coefficient of the reconstruction.

297 delta

298 Threshold for transition to L2 optimization. Should be small relative to data.

299 Does not affect total variation.

300 tv_weight

301 Weight for total variation regularization.

302 basis_set

303 User-provided basis set to be used, if desired.

304 By default, a ``GaussianKernels`` basis set is used.

305 lower_bound

306 Lower bound to threshold coefficients at in each iteration. Can be used to

307 e.g. enforce non-negativity for local basis sets such as ``GaussianKernels``.

308 Not used if set to ``None``, which is the default settings.

309 update_frequency

310 Synchronization and norm reduction progress printing frequency. If set too small,

311 the optimization will be slower due to frequent host-device synchronization. This effect

312 can be seen by noting the iterations per second on the progress bar. The printed

313 norm does not account for the total variation regularization.

314

315 Returns

316 -------

317 Dictionary with ``reconstruction``, ``projector``, ``basis_set``, ``loss_curve`` entries.

318 """

319 projector = SAXSProjectorCUDA(data_container.geometry)

320 if basis_set is None: 320 ↛ 324line 320 didn't jump to line 324, because the condition on line 320 was never false

321 grid_scale = data_container.data.shape[-1] // 2 + 1

322 basis_set = GaussianKernels(grid_scale=grid_scale,

323 probed_coordinates=data_container.geometry.probed_coordinates)

324 weights = get_tensor_sirt_weights(projector=projector,

325 basis_set=basis_set)

326 weights[data_container.projections.weights <= 0.] = 0.

327 host_weights = weights.astype(np.float32)

328 weights = cuda.to_device(weights.astype(np.float32))

329 step_size = np.float32(step_size)

330 preconditioner = cuda.to_device(

331 step_size * get_tensor_sirt_preconditioner(

332 projector=projector, basis_set=basis_set).astype(np.float32))

333 matrix = basis_set.projection_matrix.astype(np.float32)

334 reconstruction = cuda.to_device(

335 np.zeros(tuple(data_container.geometry.volume_shape) + (len(basis_set),), dtype=np.float32))

336 # Compile all CUDA kernels

337 data = cuda.to_device(data_container.data.astype(np.float32))

338 projections = cuda.to_device(

339 np.zeros(data_container.data.shape[:-1] + (len(basis_set),), dtype=np.float32))

340 data_approximation = cuda.to_device(np.zeros(data_container.data.shape, dtype=np.float32))

341 forward = john_transform_cuda(reconstruction, projections, *projector.john_transform_parameters)

342 adjoint = john_transform_adjoint_cuda(reconstruction, projections, *projector.john_transform_parameters)

343 contraction = cuda_framewise_contraction(data.shape[:-1], *matrix.shape[1:])

344 contraction_adjoint = cuda_framewise_contraction_adjoint(data.shape[:-1], *matrix.shape[1:])

345 weighted_sign = cuda_weighted_sign(data_approximation.shape, delta)

346 difference = cuda_difference(reconstruction.shape)

347 sum_kernel = cuda_sum(reconstruction.shape)

348 tv_gradient = cuda_tv_gradient(reconstruction.shape, tv_weight)

349 scale_array = cuda_rescale_array(reconstruction.shape)

350 rescale = cuda_rescale(reconstruction.shape, momentum)

351 # Allocate remaining CUDA arrays

352 matrix = cuda.to_device(matrix)

353 host_data = data_container.data.astype(np.float32)

354 gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

355 total_gradient = cuda.to_device(np.zeros(reconstruction.shape, dtype=np.float32))

356 pbar = tqdm.trange(maxiter, file=sys.stdout)

357 host_projections = np.array(data_approximation)

358 lf = (host_weights * abs(host_projections - host_data)).sum()

359 rg = 0.

360 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {lf:.2e} TV norm: {rg:.2e}')

361 pbar.update(0)

362 if lower_bound is not None: 362 ↛ 364line 362 didn't jump to line 364, because the condition on line 362 was never false

363 threshold_lower = cuda_lower_bound(reconstruction.shape, lower_bound)

364 loss_curve = []

365 host_tv = TotalVariation(None)

366 # Actual reconstruction. This part executes asynchronously.

367 for i in range(maxiter):

368 # residual norm gradient

369 weighted_sign(data, data_approximation, weights)

370 # tensor representation gradient

371 contraction_adjoint(data_approximation, matrix, projections)

372 # john transform gradient

373 adjoint(gradient, projections)

374 # apply preconditioner

375 scale_array(gradient, preconditioner)

376 # apply total variation regularization

377 tv_gradient(reconstruction, gradient)

378 # update reconstruction by gradient (correction term)

379 difference(reconstruction, gradient)

380 # compute updated momentum term

381 sum_kernel(total_gradient, gradient)

382 # apply momentum decay

383 rescale(total_gradient)

384 # update reconstruction by gradient (momentum term)

385 difference(reconstruction, total_gradient)

386 # threshold by lower bound

387 if lower_bound is not None: 387 ↛ 390line 387 didn't jump to line 390, because the condition on line 387 was never false

388 threshold_lower(reconstruction)

389 # forward john transform

390 forward(reconstruction, projections)

391 # forward tensor projection

392 contraction(projections, matrix, data_approximation)

393 # compute host-side quantities to update user on progress

394 if (i + 1) % update_frequency == 0:

395 # forces synchronization

396 host_projections = np.array(data_approximation)

397 host_recon = np.array(reconstruction)

398 rg = host_tv.get_regularization_norm(host_recon)['regularization_norm']

399 diff = host_projections - host_data

400 rn = (host_weights * diff * diff).sum()

401 lf = rg * tv_weight + rn

402 loss_curve.append((i, lf, rn, rg))

403 pbar.set_description(f'Loss: {lf:.2e} Res.norm: {rn:.2e} TV norm: {rg:.2e}')

404 pbar.update(update_frequency)

405 # Synchronizes due to np.array(reconstruction)

406 return dict(reconstruction=np.array(reconstruction), projector=projector,

407 basis_set=basis_set, projection=np.array(data_approximation), loss_curve=np.array(loss_curve))

Coverage for local_installation_linux/mumott/pipelines/async_pipelines.py: 98%

182 statements