WarpX
Loading...
Searching...
No Matches
JacobiPC.H
Go to the documentation of this file.
1/* Copyright 2024 Debojyoti Ghosh
2 *
3 * This file is part of WarpX.
4 *
5 * License: BSD-3-Clause-LBNL
6 */
7#ifndef JACOBI_PC_H_
8#define JACOBI_PC_H_
9
10#include "Fields.H"
11#include "Utils/WarpXConst.H"
12#include "Preconditioner.H"
13
15
16#include <AMReX.H>
17#include <AMReX_ParmParse.H>
18#include <AMReX_Array.H>
20#include <AMReX_Reduce.H>
21#include <AMReX_Vector.H>
22#include <AMReX_MultiFab.H>
23
56
57template <class T, class Ops>
58class JacobiPC : public Preconditioner<T,Ops>
59{
60 public:
61
62 using RT = typename T::value_type;
63
64 JacobiPC () = default;
65
66 ~JacobiPC () override = default;
67
68 JacobiPC(const JacobiPC&) = delete;
69 JacobiPC& operator=(const JacobiPC&) = delete;
70 JacobiPC(JacobiPC&&) noexcept = delete;
71 JacobiPC& operator=(JacobiPC&&) noexcept = delete;
72
73 void Define (const T&, Ops*) override;
74
75 void Update (const T& a_U) override;
76
80 void Apply (T&, const T&) override;
81
82 void printParameters() const override;
83
84 [[nodiscard]] inline bool IsDefined () const override { return m_is_defined; }
85
93 RT computeResidualNorm (const T& a_x, const T& a_b) const;
94
95 protected:
96
98
99 bool m_is_defined = false;
100
101 bool m_verbose = false;
102 int m_max_iter = 200;
103 RT m_omega = RT(1.0);
104 bool m_adaptive_omega = true;
105
106 RT m_atol = 1.0e-16;
107 RT m_rtol = 1.0e-4;
108
109 Ops* m_ops = nullptr;
110
112
114
115 bool m_has_offdiag = false;
116 bool m_work_defined = false;
117
120
122
123 void readParameters();
124
125 void copyToGhostAndFill (
127
128 private:
129
130};
131
132template <class T, class Ops>
134{
135 using namespace amrex;
137 Print() << pc_name << " verbose: " << (m_verbose?"true":"false") << "\n";
138 Print() << pc_name << " max iter: " << m_max_iter << "\n";
139 Print() << pc_name << " omega: " << m_omega << "\n";
140 Print() << pc_name << " adaptive omega: " << (m_adaptive_omega?"true":"false") << "\n";
141 Print() << pc_name << " absolute tolerance: " << m_atol << "\n";
142 Print() << pc_name << " relative tolerance: " << m_rtol << "\n";
143}
144
145template <class T, class Ops>
147{
149 pp.query("verbose", m_verbose);
150 pp.query("max_iter", m_max_iter);
151 if (pp.query("omega", m_omega)) {
152 m_adaptive_omega = false;
153 }
154 pp.query("adaptive_omega", m_adaptive_omega);
155 pp.query("absolute_tolerance", m_atol);
156 pp.query("relative_tolerance", m_rtol);
157}
158
159template <class T, class Ops>
162{
163 for (int n = 0; n < m_num_amr_levels; n++) {
164 const auto& geom = m_ops->GetGeometry(n);
165 for (int dim = 0; dim < 3; dim++) {
166 m_x_ghost[n][dim].setVal(RT(0.0));
167 amrex::MultiFab::Copy(m_x_ghost[n][dim], *(a_x_mfarrvec[n][dim]),
169 m_x_ghost[n][dim].FillBoundary(geom.periodicity());
170 }
171 }
172}
173
174template <class T, class Ops>
175auto JacobiPC<T,Ops>::computeResidualNorm (const T& a_x, const T& a_b) const -> RT
176{
177 using namespace amrex;
178 auto& b_mfarrvec = a_b.getArrayVec();
179 auto& x_mfarrvec = a_x.getArrayVec();
180
181 RT res_sq = RT(0.0);
182 for (int n = 0; n < m_num_amr_levels; n++) {
183 for (int dim = 0; dim < 3; dim++) {
184 const auto& b_mf = *(b_mfarrvec[n][dim]);
185 const auto& a_mf = *((*m_bcoefs)[n][dim]);
186 const int ncomp = a_mf.nComp();
187
188 GpuArray<int,3> nc = {1, 1, 1};
189 GpuArray<int,3> w = {0, 0, 0};
190 if (ncomp > 1) {
191 for (int d = 0; d < AMREX_SPACEDIM; d++) {
192 nc[d] = m_ops->GetMassMatricesPCnComp(dim, d);
193 w[d] = (nc[d] - 1) / 2;
194 }
195 }
196
197 ReduceOps<ReduceOpSum> reduce_op;
198 ReduceData<RT> reduce_data(reduce_op);
199 using ReduceTuple = typename decltype(reduce_data)::Type;
200
201 // Use the ghost-padded copy for stencil access when available
202 const auto& xg_mf = (ncomp > 1 && m_work_defined)
203 ? m_x_ghost[n][dim] : *(x_mfarrvec[n][dim]);
204
205 for (MFIter mfi(b_mf); mfi.isValid(); ++mfi) {
206 const Box bx = mfi.validbox();
207 auto xg_arr = xg_mf.const_array(mfi);
208 auto b_arr = b_mf.const_array(mfi);
209 auto a_arr = a_mf.const_array(mfi);
210
211 if (ncomp == 1) {
212 auto x_arr = x_mfarrvec[n][dim]->const_array(mfi);
213 reduce_op.eval(bx, reduce_data,
214 [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
215 {
216 const RT Ax = x_arr(i,j,k) * (RT(1.0) + a_arr(i,j,k,0));
217 const RT r = b_arr(i,j,k) - Ax;
218 return {r * r};
219 });
220 } else {
221 reduce_op.eval(bx, reduce_data,
222 [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
223 {
224 RT mx = RT(0.0);
225 int comp = 0;
226 for (int c2 = 0; c2 < nc[2]; c2++) {
227 const int kk = k + c2 - w[2];
228 for (int c1 = 0; c1 < nc[1]; c1++) {
229 const int jj = j + c1 - w[1];
230 for (int c0 = 0; c0 < nc[0]; c0++) {
231 const int ii = i + c0 - w[0];
232 mx += a_arr(i,j,k,comp) * xg_arr(ii,jj,kk);
233 comp++;
234 }
235 }
236 }
237 const RT Ax = xg_arr(i,j,k) + mx;
238 const RT r = b_arr(i,j,k) - Ax;
239 return {r * r};
240 });
241 }
242 }
243 RT dim_res_sq = amrex::get<0>(reduce_data.value(reduce_op));
245 res_sq += dim_res_sq;
246 }
247 }
248 return std::sqrt(res_sq);
249}
250
251template <class T, class Ops>
252void JacobiPC<T,Ops>::Define ( const T& a_U,
253 Ops* const a_ops )
254{
255 BL_PROFILE("JacobiPC::Define()");
256 using namespace amrex;
257
259 !IsDefined(),
260 "JacobiPC::Define() called on defined object" );
262 (a_ops != nullptr),
263 "JacobiPC::Define(): a_ops is nullptr" );
265 a_U.getArrayVecType()==warpx::fields::FieldType::Efield_fp,
266 "JacobiPC::Define() must be called with Efield_fp type");
267
268 m_ops = a_ops;
269 m_num_amr_levels = m_ops->numAMRLevels();
270 m_bcoefs = m_ops->GetMassMatricesCoeff();
271
273
274 m_is_defined = true;
275}
276
277template <class T, class Ops>
278void JacobiPC<T,Ops>::Update (const T& a_U)
279{
280 BL_PROFILE("JacobiPC::Update()");
281 using namespace amrex;
282
284 IsDefined(),
285 "JacobiPC::Update() called on undefined object" );
286
287 if (m_bcoefs != nullptr && !m_work_defined) {
288 auto& u_mfarrvec = a_U.getArrayVec();
289 m_work.resize(m_num_amr_levels);
291
292 // Determine max stencil width needed across all dims/levels
293 m_stencil_width = 0;
294 for (int n = 0; n < m_num_amr_levels; n++) {
295 for (int dim = 0; dim < 3; dim++) {
296 if ((*m_bcoefs)[n][dim]->nComp() > 1) {
297 m_has_offdiag = true;
298 for (int d = 0; d < AMREX_SPACEDIM; d++) {
299 const int nc_d = m_ops->GetMassMatricesPCnComp(dim, d);
300 const int w_d = (nc_d - 1) / 2;
301 m_stencil_width = std::max(m_stencil_width, w_d);
302 }
303 }
304 }
305 }
306
307 const amrex::IntVect nghost(m_stencil_width);
308 for (int n = 0; n < m_num_amr_levels; n++) {
309 for (int dim = 0; dim < 3; dim++) {
310 m_work[n][dim].define(
311 u_mfarrvec[n][dim]->boxArray(),
312 u_mfarrvec[n][dim]->DistributionMap(),
313 1, 0);
314 if (m_has_offdiag) {
315 m_x_ghost[n][dim].define(
316 u_mfarrvec[n][dim]->boxArray(),
317 u_mfarrvec[n][dim]->DistributionMap(),
318 1, nghost);
319 }
320 }
321 }
322 m_work_defined = true;
323 }
324
325 if (m_verbose) {
327 << ": theta*dt = " << this->m_dt << "\n";
328 }
329}
330
331template <class T, class Ops>
332void JacobiPC<T,Ops>::Apply (T& a_x, const T& a_b)
333{
334 BL_PROFILE("JacobiPC::Apply()");
335 using namespace amrex;
336
338 IsDefined(),
339 "JacobiPC::Apply() called on undefined object" );
341 a_x.getArrayVecType()==warpx::fields::FieldType::Efield_fp,
342 "JacobiPC::Apply() - a_x must be Efield_fp type");
344 a_b.getArrayVecType()==warpx::fields::FieldType::Efield_fp,
345 "JacobiPC::Apply() - a_b must be Efield_fp type");
346
347 if (m_bcoefs == nullptr) {
348 a_x.Copy(a_b);
349 return;
350 }
351
352 auto& b_mfarrvec = a_b.getArrayVec();
353 auto& x_mfarrvec = a_x.getArrayVec();
355 ((b_mfarrvec.size() == m_num_amr_levels) && (x_mfarrvec.size() == m_num_amr_levels)),
356 "Error in JacobiPC::Apply() - mismatch in number of levels." );
357
358 const int max_omega_reductions = m_adaptive_omega ? 10 : 0;
359 RT omega = m_omega;
360
361 for (int adapt = 0; adapt <= max_omega_reductions; adapt++) {
362
363 // Initial guess: x = b / (1 + M_diag)
364 for (int n = 0; n < m_num_amr_levels; n++) {
365 for (int dim = 0; dim < 3; dim++) {
366 const auto& b_mf = *(b_mfarrvec[n][dim]);
367 const auto& a_mf = *((*m_bcoefs)[n][dim]);
368 auto& x_mf = *(x_mfarrvec[n][dim]);
369 const int diag_comp = (a_mf.nComp() - 1) / 2;
370
371 for (MFIter mfi(x_mf,TilingIfNotGPU()); mfi.isValid(); ++mfi) {
372 const Box bx = mfi.tilebox();
373 auto x_arr = x_mf.array(mfi);
374 auto b_arr = b_mf.const_array(mfi);
375 auto a_arr = a_mf.const_array(mfi);
376
377 ParallelFor(bx,
378 [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
379 {
380 x_arr(i,j,k) = b_arr(i,j,k)
381 / (RT(1.0) + a_arr(i,j,k,diag_comp));
382 });
383 }
384 }
385 }
386
387 if (!m_has_offdiag) {
388 if (m_verbose) {
389 const RT bnorm = a_b.norm2();
390 const RT res = computeResidualNorm(a_x, a_b);
391 amrex::Print() << " pc_jacobi (diag): residual = " << res
392 << ", " << res / std::max(bnorm, RT(1.e-30))
393 << " (rel.)\n";
394 }
395 return;
396 }
397
398 copyToGhostAndFill(x_mfarrvec);
399
400 const RT bnorm = a_b.norm2();
401 const RT res0 = computeResidualNorm(a_x, a_b);
402 RT res_prev = res0;
403
404 if (m_verbose) {
405 amrex::Print() << " pc_jacobi: iter = 0, residual = " << res0
406 << ", " << res0 / std::max(bnorm, RT(1.e-30))
407 << " (rel.), omega = " << omega << "\n";
408 }
409
410 bool diverged = false;
411
412 for (int iter = 0; iter < m_max_iter; iter++) {
413
414 copyToGhostAndFill(x_mfarrvec);
415
416 for (int n = 0; n < m_num_amr_levels; n++) {
417 for (int dim = 0; dim < 3; dim++) {
418 const auto& b_mf = *(b_mfarrvec[n][dim]);
419 const auto& a_mf = *((*m_bcoefs)[n][dim]);
420 auto& x_mf = *(x_mfarrvec[n][dim]);
421 auto& w_mf = m_work[n][dim];
422 const auto& xg_mf = m_x_ghost[n][dim];
423
424 const int ncomp = a_mf.nComp();
425 if (ncomp <= 1) { continue; }
426
427 const int diag_comp = (ncomp - 1) / 2;
428
429 amrex::GpuArray<int,3> nc = {1, 1, 1};
430 amrex::GpuArray<int,3> w = {0, 0, 0};
431 for (int d = 0; d < AMREX_SPACEDIM; d++) {
432 nc[d] = m_ops->GetMassMatricesPCnComp(dim, d);
433 w[d] = (nc[d] - 1) / 2;
434 }
435
436 const RT om = omega;
437
438 for (MFIter mfi(x_mf,TilingIfNotGPU()); mfi.isValid(); ++mfi)
439 {
440 const Box bx = mfi.tilebox();
441 auto w_arr = w_mf.array(mfi);
442 auto xg_arr = xg_mf.const_array(mfi);
443 auto a_arr = a_mf.const_array(mfi);
444
445 ParallelFor(bx,
446 [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
447 {
448 RT mx = RT(0.0);
449 int comp = 0;
450 for (int c2 = 0; c2 < nc[2]; c2++) {
451 const int kk = k + c2 - w[2];
452 for (int c1 = 0; c1 < nc[1]; c1++) {
453 const int jj = j + c1 - w[1];
454 for (int c0 = 0; c0 < nc[0]; c0++) {
455 const int ii = i + c0 - w[0];
456 mx += a_arr(i,j,k,comp)
457 * xg_arr(ii,jj,kk);
458 comp++;
459 }
460 }
461 }
462 w_arr(i,j,k) = xg_arr(i,j,k) + mx;
463 });
464 }
465
466 for (MFIter mfi(x_mf,TilingIfNotGPU()); mfi.isValid(); ++mfi)
467 {
468 const Box bx = mfi.tilebox();
469 auto x_arr = x_mf.array(mfi);
470 auto b_arr = b_mf.const_array(mfi);
471 auto w_arr = w_mf.const_array(mfi);
472 auto a_arr = a_mf.const_array(mfi);
473
474 ParallelFor(bx,
475 [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
476 {
477 const RT r = b_arr(i,j,k) - w_arr(i,j,k);
478 x_arr(i,j,k) += om * r
479 / (RT(1.0) + a_arr(i,j,k,diag_comp));
480 });
481 }
482 }
483 }
484
485 copyToGhostAndFill(x_mfarrvec);
486
487 const RT res_norm = computeResidualNorm(a_x, a_b);
488 if (m_verbose) {
489 amrex::Print() << " pc_jacobi: iter = " << (iter+1)
490 << ", residual = " << res_norm
491 << ", " << res_norm / std::max(bnorm, RT(1.e-30))
492 << " (rel.)\n";
493 }
494
495 if (m_adaptive_omega && res_norm > res_prev) {
496 omega *= RT(0.9);
497 if (m_verbose) {
498 amrex::Print() << " pc_jacobi: residual increased, "
499 << "reducing omega to " << omega << "\n";
500 }
501 diverged = true;
502 break;
503 }
504
505 if (res_norm < m_atol || res_norm < m_rtol * res0) {
506 if (m_verbose) {
507 amrex::Print() << " pc_jacobi: converged at iter "
508 << (iter+1) << "\n";
509 }
510 m_omega = omega;
511 return;
512 }
513 res_prev = res_norm;
514 }
515
516 if (!diverged) { break; }
517 }
518
519 m_omega = omega;
520}
521
522#endif
#define BL_PROFILE(a)
#define AMREX_GPU_DEVICE
amrex::ParmParse pp
@ pc_jacobi
Definition PreconditionerLibrary.H:14
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition TextMsg.H:13
bool m_is_defined
Definition JacobiPC.H:99
JacobiPC(JacobiPC &&) noexcept=delete
void Apply(T &, const T &) override
Solve (I + M) x = b via damped Jacobi iteration.
Definition JacobiPC.H:332
void printParameters() const override
Print parameters.
Definition JacobiPC.H:133
int m_stencil_width
Definition JacobiPC.H:121
void copyToGhostAndFill(const amrex::Vector< amrex::Array< amrex::MultiFab *, 3 > > &a_x_mfarrvec)
Definition JacobiPC.H:160
~JacobiPC() override=default
RT m_rtol
Definition JacobiPC.H:107
amrex::Array< amrex::MultiFab, 3 > MFArr
Definition JacobiPC.H:97
RT m_omega
Definition JacobiPC.H:103
const amrex::Vector< amrex::Array< amrex::MultiFab *, 3 > > * m_bcoefs
Definition JacobiPC.H:113
bool m_has_offdiag
Definition JacobiPC.H:115
Ops * m_ops
Definition JacobiPC.H:109
JacobiPC(const JacobiPC &)=delete
typename T::value_type RT
Definition JacobiPC.H:62
JacobiPC()=default
bool m_adaptive_omega
Definition JacobiPC.H:104
bool m_work_defined
Definition JacobiPC.H:116
amrex::Vector< amrex::Array< amrex::MultiFab, 3 > > m_work
Definition JacobiPC.H:118
RT computeResidualNorm(const T &a_x, const T &a_b) const
Compute ||b - (I+M)*x|| for residual monitoring. Uses the ghost-padded m_x_ghost (which must already ...
Definition JacobiPC.H:175
void Update(const T &a_U) override
Update the preconditioner.
Definition JacobiPC.H:278
amrex::Vector< amrex::Array< amrex::MultiFab, 3 > > m_x_ghost
Definition JacobiPC.H:119
int m_num_amr_levels
Definition JacobiPC.H:111
void readParameters()
Definition JacobiPC.H:146
int m_max_iter
Definition JacobiPC.H:102
RT m_atol
Definition JacobiPC.H:106
JacobiPC & operator=(const JacobiPC &)=delete
void Define(const T &, Ops *) override
Define the preconditioner.
Definition JacobiPC.H:252
bool m_verbose
Definition JacobiPC.H:101
bool IsDefined() const override
Check if the nonlinear solver has been defined.
Definition JacobiPC.H:84
RT m_dt
Definition Preconditioner.H:117
Preconditioner()=default
Default constructor.
__host__ static __device__ constexpr IntVectND< dim > TheZeroVector() noexcept
static void Copy(MultiFab &dst, const MultiFab &src, int srccomp, int dstcomp, int numcomp, int nghost)
std::enable_if_t< IsFabArray< MF >::value > eval(MF const &mf, IntVect const &nghost, D &reduce_data, F &&f)
std::array< T, N > Array
void Sum(T &v, MPI_Comm comm)
MPI_Comm CommunicatorSub() noexcept
int nComp(FabArrayBase const &fa)
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
DistributionMapping const & DistributionMap(FabArrayBase const &fa)
std::string getEnumNameString(T const &v)
BoxND< 3 > Box
IntVectND< 3 > IntVect
bool TilingIfNotGPU() noexcept
BoxArray const & boxArray(FabArrayBase const &fa)
__host__ __device__ constexpr int get(IntVectND< dim > const &iv) noexcept
@ Efield_fp
Definition Fields.H:97