_static/doxyhtml/_matrix_p_c_8_h_source.html

/* Copyright 2024 Debojyoti Ghosh

 *

 * This file is part of WarpX.

 *

 * License: BSD-3-Clause-LBNL

 */

#ifndef MATRIX_PC_H_

#define MATRIX_PC_H_


#include "Fields.H"

#include "Utils/WarpXConst.H"

#include "Utils/TextMsg.H"

#include "Preconditioner.H"


#include <ablastr/fields/MultiFabRegister.H>


#include <AMReX.H>

#include <AMReX_ParmParse.H>

#include <AMReX_Array.H>

#include <AMReX_Vector.H>

#include <AMReX_MultiFab.H>


namespace MatrixPCUtils

{

    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE


    bool insertOrAdd( const int a_cidx,

                      const amrex::Real a_val,

                      int* const a_cidxs,

                      amrex::Real* const a_aij,

                      const int a_nnz,

                      int& a_ncol )

    {

        if (a_cidx < 0) { return true; /* outside domain */ }

        int loc = -1;

        for (int icol = 0; icol < std::min(a_ncol,a_nnz); icol++) {

            if (a_cidxs[icol] == a_cidx) {

                loc = icol;

                break;

            }

        }

        if (loc < 0) {

            a_ncol++;

            if (a_ncol > a_nnz) { return false; }

            else {

                // column index not found; add new entry

                a_cidxs[a_ncol-1] = a_cidx;

                a_aij[a_ncol-1] = a_val;

            }

        } else {

            // column index already exists; add to value

            a_aij[loc] += a_val;

        }

        return true;

    }


}


template <class T, class Ops>


class MatrixPC : public Preconditioner<T,Ops>

{

    public:


        using RT = typename T::value_type;


        MatrixPC () = default;


        ~MatrixPC () override = default;


        // Prohibit move and copy operations

        MatrixPC(const MatrixPC&) = delete;

        MatrixPC& operator=(const MatrixPC&) = delete;

        MatrixPC(MatrixPC&&) noexcept = delete;

        MatrixPC& operator=(MatrixPC&&) noexcept = delete;


        void Define (const T&, Ops*) override;


        void Update (const T& a_U) override;


        int Assemble (const T& a_U);


        void Apply (T&, const T&) override;


        inline void getPCMatrix (amrex::Gpu::DeviceVector<int>& a_r_indices_g,

                                 amrex::Gpu::DeviceVector<int>& a_num_nz,

                                 amrex::Gpu::DeviceVector<int>& a_c_indices_g,

                                 amrex::Gpu::DeviceVector<RT>& a_a_ij,

                                 int& a_n, int& a_ncols_max ) override

        {

            a_n = m_ndofs_l;

            a_ncols_max = m_pc_mat_nnz;


            a_r_indices_g.resize(m_r_indices_g.size());

            amrex::Gpu::copyAsync( amrex::Gpu::deviceToDevice,

                                   m_r_indices_g.begin(),

                                   m_r_indices_g.end(),

                                   a_r_indices_g.begin() );


            a_num_nz.resize(m_num_nz.size());

            amrex::Gpu::copyAsync( amrex::Gpu::deviceToDevice,

                                   m_num_nz.begin(),

                                   m_num_nz.end(),

                                   a_num_nz.begin() );


            a_c_indices_g.resize(m_c_indices_g.size());

            amrex::Gpu::copyAsync( amrex::Gpu::deviceToDevice,

                                   m_c_indices_g.begin(),

                                   m_c_indices_g.end(),

                                   a_c_indices_g.begin() );


            a_a_ij.resize(m_a_ij.size());

            amrex::Gpu::copyAsync( amrex::Gpu::deviceToDevice,

                                   m_a_ij.begin(),

                                   m_a_ij.end(),

                                   a_a_ij.begin() );

            amrex::Gpu::streamSynchronize();

        }


        void printParameters() const override;


        [[nodiscard]] inline bool IsDefined () const override { return m_is_defined; }


        inline void setName (const std::string& a_name) override { m_name = a_name; }


    protected:


        bool m_is_defined = false;

        bool m_verbose = true;


        Ops* m_ops = nullptr;


        int m_num_amr_levels = 0;

        amrex::Vector<amrex::Geometry> m_geom;


        int m_ndofs_l = 0;

        int m_ndofs_g = 0;

        bool m_pc_diag_only = false;

        int m_pc_mat_nnz = 1;

        bool m_include_mass_matrices = false;


        std::string m_name = "noname";


        amrex::Gpu::DeviceVector<int> m_r_indices_g;

        amrex::Gpu::DeviceVector<int> m_num_nz;

        amrex::Gpu::DeviceVector<int> m_c_indices_g;

        amrex::Gpu::DeviceVector<amrex::Real> m_a_ij;


        const amrex::Vector<amrex::Array<amrex::MultiFab*,3>>* m_bcoefs = nullptr;


        int m_num_realloc = 0;


        void readParameters();


    private:


};


template <class T, class Ops>


void MatrixPC<T,Ops>::printParameters() const

{

    using namespace amrex;

    Print() << m_name << " verbose:                " << (m_verbose?"true":"false") << "\n";

    Print() << m_name << " pc_diagonal_only:       " << (m_pc_diag_only?"true":"false") << "\n";

    Print() << m_name << " include_mass_matrices:  " << (m_include_mass_matrices?"true":"false") << "\n";

}


template <class T, class Ops>


void MatrixPC<T,Ops>::readParameters()

{

    const amrex::ParmParse pp(m_name);

    pp.query("verbose", m_verbose);

    pp.query("pc_diagonal_only", m_pc_diag_only);

}


template <class T, class Ops>


void MatrixPC<T,Ops>::Define ( const T& a_U,

                              Ops* const a_ops )

{

    BL_PROFILE("MatrixPC::Define()");

    using namespace amrex;


    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        !IsDefined(),

        "MatrixPC::Define() called on defined object" );

    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        (a_ops != nullptr),

        "MatrixPC::Define(): a_ops is nullptr" );

    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        a_U.getArrayVecType()==warpx::fields::FieldType::Efield_fp,

        "MatrixPC::Define() must be called with Efield_fp type");


    m_ops = a_ops;

    // read preconditioner parameters

    readParameters();


    // a_U is not needed

    amrex::ignore_unused(a_U);


    // Set number of AMR levels and create geometry, grids, and

    // distribution mapping vectors.

    m_num_amr_levels = m_ops->numAMRLevels();

    if (m_num_amr_levels > 1) {

        WARPX_ABORT_WITH_MESSAGE("MatrixPC::Define(): m_num_amr_levels > 1");

    }

    m_geom.resize(m_num_amr_levels);

    for (int n = 0; n < m_num_amr_levels; n++) {

        m_geom[n] = m_ops->GetGeometry(n);

    }


    m_ndofs_l = a_U.nDOF_local();

    m_ndofs_g = a_U.nDOF_global();


    auto n_rows = size_t(m_ndofs_l);

    auto n_cols = size_t(m_pc_mat_nnz) * size_t(m_ndofs_l);


    m_r_indices_g.resize(n_rows);

    m_num_nz.resize(n_rows);

    m_c_indices_g.resize(n_cols);

    m_a_ij.resize(n_cols);


    m_bcoefs = m_ops->GetMassMatricesCoeff();

    if (m_bcoefs != nullptr) {

        m_include_mass_matrices = true;

    }


    m_is_defined = true;

}


template <class T, class Ops>


void MatrixPC<T,Ops>::Update (const T& a_U)

{

    BL_PROFILE("MatrixPC::Update()");

    using namespace amrex;


    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        IsDefined(),

        "MatrixPC::Update() called on undefined object" );


    while(true) {


        auto nnz_diff = Assemble(a_U);

        AMREX_ALWAYS_ASSERT(nnz_diff >= 0);

        if (nnz_diff) {


            m_pc_mat_nnz += nnz_diff;

            m_num_realloc++;


        } else {

            break;

        }

    }


    if (m_num_realloc > 1) {

        std::stringstream warning_message;

        warning_message << "Number of times arrays were reallocated due to new nonzero elements "

                        << "is greater than 1 (" << m_num_realloc <<"). This is unexpected.\n";

        ablastr::warn_manager::WMRecordWarning("MatrixPC", warning_message.str());

    }


}


template <class T, class Ops>


int MatrixPC<T,Ops>::Assemble (const T& a_U)

{

    // Assemble the sparse matrix representation of the preconditioner

    //      A = curl (alpha * curl []) + M

    // where M is the mass matrix. The following data is set in this function:

    // - m_r_indices_g: integer array of size n with the global row indices

    // - m_num_nz:      integer array of size n with the number of non-zero elements

    //                  in each row

    // - m_c_indices:   integer array of size n*ncmax with the global column indices

    //                  of non-zero elements in each row (row-major)

    // - m_a_ij:        real-type array of size n*ncmax with the matrix element values

    //                  (row-major format)

    // where n is the local number of rows, and ncmax is the maximum number of non-zero

    // elements per row.


    BL_PROFILE("MatrixPC::Assemble()");

    using namespace amrex;


    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        IsDefined(),

        "MatrixPC::Assemble() called on undefined object" );


    // set the alpha coefficient for the curl-curl op

    const RT thetaDt = m_ops->GetThetaForPC()*this->m_dt;

    const RT alpha = (thetaDt*PhysConst::c) * (thetaDt*PhysConst::c);

    if (m_verbose) {

        amrex::Print() << "Updating " << m_name

                       << ": theta*dt = " << thetaDt << ", "

                       << " coefficients: "

                       << "alpha = " << alpha << "\n";

    }


    // Get DOF object from a_U

    const auto& dofs_obj = a_U.getDOFsObject();

    const auto& dofs_mfarrvec = dofs_obj->m_array;

    AMREX_ALWAYS_ASSERT(m_ndofs_l == dofs_obj->m_nDoFs_l);

    AMREX_ALWAYS_ASSERT(m_ndofs_g == dofs_obj->m_nDoFs_g);


    m_r_indices_g.clear();

    m_num_nz.clear();

    m_c_indices_g.clear();

    m_a_ij.clear();


    auto n_rows = size_t(m_ndofs_l);

    auto n_cols = size_t(m_pc_mat_nnz) * size_t(m_ndofs_l);


    m_r_indices_g.resize(n_rows);

    m_num_nz.resize(n_rows);

    m_c_indices_g.resize(n_cols);

    m_a_ij.resize(n_cols);


    auto* r_indices_g_ptr = m_r_indices_g.data();

    auto* num_nz_ptr = m_num_nz.data();

    auto* c_indices_g_ptr = m_c_indices_g.data();

    auto* a_ij_ptr = m_a_ij.data();


    const auto nnz_max = m_pc_mat_nnz;

    auto nnz_actual = nnz_max;


    for (int lev = 0; lev < m_num_amr_levels; lev++) {


        auto ncomp = dofs_mfarrvec[lev][0]->nComp();

        AMREX_ALWAYS_ASSERT(ncomp == 2); // local, global


        const auto& geom = m_geom[lev];

        const auto dxi = geom.InvCellSizeArray();


        Gpu::Buffer<int> nnz_actual_d({nnz_max});

        auto* nnz_actual_ptr = nnz_actual_d.data();


        for (int dir = 0; dir < 3; dir++) {


            for (amrex::MFIter mfi(*dofs_mfarrvec[lev][dir]); mfi.isValid(); ++mfi) {


                const amrex::Box bx = mfi.tilebox();

                const amrex::Box full_bx = mfi.fabbox();


                auto dof_arr = dofs_mfarrvec[lev][dir]->const_array(mfi);


                // Set row indices and identity diagonal (unconditional)

                ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k)

                {

                    const int ridx_l = dof_arr(i,j,k,0);

                    if (ridx_l < 0) { return; }


                    int icol = 0;

                    const int ridx_g = dof_arr(i,j,k,1);


                    r_indices_g_ptr[ridx_l] = ridx_g;


                    {

                        const int cidx_g_lhs = dof_arr(i,j,k,1);

                        const amrex::Real val = 1.0_rt;

                        auto flag = MatrixPCUtils::insertOrAdd( cidx_g_lhs, val,

                                                                &c_indices_g_ptr[ridx_l*nnz_max],

                                                                &a_ij_ptr[ridx_l*nnz_max],

                                                                nnz_max, icol );

                        if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                    }


                    num_nz_ptr[ridx_l] = icol;

                });


                // Add the curl-curl stencil entries (only when alpha > 0)

                if (thetaDt > 0.0) {


#if defined(WARPX_DIM_RSPHERE)

                    // 1D spherical geometry is electrostatic

                    amrex::ignore_unused(dxi);

#else

                    const amrex::MultiFab* BC_mask_Edir = m_ops->GetCurl2BCmask(lev,dir);

                    AMREX_ALWAYS_ASSERT(BC_mask_Edir != nullptr);

                    const auto BC_mask_Edir_arr = BC_mask_Edir->const_array(mfi);

#endif


#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)

                    int tdir = -1;

                    if  (dir == 0) { tdir = 2; }

                    else if (dir == 2) { tdir = 0; }

                    else { tdir = 1; }

                    auto dof_tdir_arr = dofs_mfarrvec[lev][tdir]->const_array(mfi);

#elif defined(WARPX_DIM_3D)

                    const int tdir1 = (dir + 1) % 3;

                    const int tdir2 = (dir + 2) % 3;

                    GpuArray<Array4<const int>, AMREX_SPACEDIM>

                        const dof_arrays {{ AMREX_D_DECL( dofs_mfarrvec[lev][dir]->const_array(mfi),

                                                          dofs_mfarrvec[lev][tdir1]->const_array(mfi),

                                                          dofs_mfarrvec[lev][tdir2]->const_array(mfi) ) }};

#endif


                    ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k)

                    {

                        const int ridx_l = dof_arr(i,j,k,0);

                        if (ridx_l < 0) { return; }


                        int icol = num_nz_ptr[ridx_l]; //NOLINT (misc-const-correctness)


#if defined(WARPX_DIM_1D_Z)

                        // dir = 0: xhat\cdot[\nabla\times\nabla E] = -[d2/dx2]Ex

                        // dir = 1: yhat\cdot[\nabla\times\nabla E] = -[d2/dx2]Ey

                        // dir = 2: zhat\cdot[\nabla\times\nabla E] = 0

                        if (dir != 2) {

                            {

                                // diagonal component (i) of 2nd derivative operator

                                const int cidx_g_rhs = dof_arr(i,j,k,1);

                                const amrex::Real val = 2.0_rt*alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,0);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                        &c_indices_g_ptr[ridx_l*nnz_max],

                                                                        &a_ij_ptr[ridx_l*nnz_max],

                                                                        nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                // left component (i-1) of 2nd derivative operator

                                const int cidx_g_rhs = dof_arr(i-1,j,k,1);

                                const amrex::Real val = -alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,1);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                        &c_indices_g_ptr[ridx_l*nnz_max],

                                                                        &a_ij_ptr[ridx_l*nnz_max],

                                                                        nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                // right component (i+1) of 2nd derivative operator

                                const int cidx_g_rhs = dof_arr(i+1,j,k,1);

                                const amrex::Real val = -alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,1);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                        &c_indices_g_ptr[ridx_l*nnz_max],

                                                                        &a_ij_ptr[ridx_l*nnz_max],

                                                                        nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                        }

#elif defined(WARPX_DIM_RCYLINDER)

                        // dir = 0: rhat\cdot[\nabla\times\nabla E] = 0

                        // dir = 1: that\cdot[\nabla\times\nabla E] = -d/dr[1/r*d/dr(r*Et)]

                        // dir = 2: zhat\cdot[\nabla\times\nabla E] = -1/r*d/dr[r*dEz/dr]

                        if (dir != 0) {

                            const auto i_real = static_cast<amrex::Real>(i);

                            {

                                // diagonal component (i) of 2nd derivative operator

                                int cidx_g_rhs = dof_arr(i,j,k,1);

                                amrex::Real geom_factor = 1.0_rt;

                                if (dir == 1) {

                                    // r_{i} / r_{i-1/2} + r_{i} / r_{i+1/2}

                                    geom_factor = i_real / (i_real - 0.5_rt) + i_real / (i_real + 0.5_rt);

                                }

                                else if (dir == 2) {

                                    geom_factor = 2.0_rt; // (r_{i+1/2} + r_{i-1/2}) / r_{i}

                                }

                                amrex::Real val = geom_factor * alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,0);

                                auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                        &c_indices_g_ptr[ridx_l*nnz_max],

                                                                        &a_ij_ptr[ridx_l*nnz_max],

                                                                        nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                // left component (i-1) of 2nd derivative operator

                                int cidx_g_rhs = dof_arr(i-1,j,k,1);

                                amrex::Real geom_factor = 1.0_rt;

                                if (dir == 1) {

                                    geom_factor = (i_real - 1.0_rt) / (i_real - 0.5_rt); // r_{i-1} / r_{i-1/2}

                                }

                                else if (dir == 2 && i != 0) {

                                    geom_factor = 1.0_rt - 0.5_rt / i_real; // r_{i-1/2} / r_{i}

                                }

                                amrex::Real val = -geom_factor * alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,1);

                                auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                        &c_indices_g_ptr[ridx_l*nnz_max],

                                                                        &a_ij_ptr[ridx_l*nnz_max],

                                                                        nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                // right component (i+1) of 2nd derivative operator

                                int cidx_g_rhs = dof_arr(i+1,j,k,1);

                                amrex::Real geom_factor = 1.0_rt;

                                if (dir == 1) {

                                    geom_factor = (i_real + 1.0_rt) / (i_real + 0.5_rt); // r_{i+1} / r_{i+1/2}

                                }

                                else if (dir == 2 && i != 0) {

                                    geom_factor = 1.0_rt + 0.5_rt / i_real; // r_{i+1/2} / r_{i}

                                }

                                amrex::Real val = -geom_factor * alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,1);

                                auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                        &c_indices_g_ptr[ridx_l*nnz_max],

                                                                        &a_ij_ptr[ridx_l*nnz_max],

                                                                        nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                        }

#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)

                        // DIM_XZ:

                        // dir = 0: xhat\cdot[\nabla\times\nabla E] = d/dx[dEz/dz] - [d2/dz2]Ex

                        // dir = 1: yhat\cdot[\nabla\times\nabla E] = -[d2/dx2 + d2/dz2]Ey

                        // dir = 2: zhat\cdot[\nabla\times\nabla E] = d/dz[dEx/dx] - [d2/dx2]Ez

                        // DIM_RZ:

                        // dir = 0: rhat\cdot[\nabla\times\nabla E] = d/dr[dEz/dz] - [d2/dz2]Er

                        // dir = 1: that\cdot[\nabla\times\nabla E] = -[d2/dz2]Et - d/dr[1/r*d/dr(r*Et)]

                        // dir = 2: zhat\cdot[\nabla\times\nabla E] = 1/r*d/dr[r*dEr/dz] - 1/r*d/dr[r*dEz/dr]

#if defined(WARPX_DIM_RZ)

                        const auto i_real = static_cast<amrex::Real>(i);

#endif

                        {

                            // diagonal component (i,j) of second derivative operator

                            const int cidx_g_rhs = dof_arr(i,j,k,1);

                            amrex::Real val = 0.0_rt;

                            if (dir == 0) {

                                val = 2.0_rt * alpha * dxi[1]*dxi[1] * BC_mask_Edir_arr(i,j,k,0);

                            } else if (dir == 2) {

                                val = 2.0_rt * alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,0);

                            } else if (dir == 1) {

#if defined(WARPX_DIM_RZ)

                                const amrex::Real geom_factor = i_real / (i_real - 0.5_rt) + i_real / (i_real + 0.5_rt);

#else

                                const amrex::Real geom_factor = 2.0_rt;

#endif

                                val = geom_factor * alpha * dxi[0]*dxi[0] * BC_mask_Edir_arr(i,j,k,0)

                                    +      2.0_rt * alpha * dxi[1]*dxi[1] * BC_mask_Edir_arr(i,j,k,2);

                            }

                            auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                    &c_indices_g_ptr[ridx_l*nnz_max],

                                                                    &a_ij_ptr[ridx_l*nnz_max],

                                                                    nnz_max, icol );

                            if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                        }

                        if ((dir == 0) || (dir == 2)) {

                            {

                                // left component (i-1,j) or (i,j-1) of second derivative operator

#if defined(WARPX_DIM_RZ)

                                const amrex::Real geom_factor = (dir == 2 && i != 0 ? 1.0_rt - 0.5_rt / i_real : 1.0_rt);

#else

                                const amrex::Real geom_factor = 1.0_rt;

#endif

                                const int cidx_g_rhs = (dir == 0 ? dof_arr(i,j-1,k,1) : dof_arr(i-1,j,k,1));

                                const amrex::Real val = -geom_factor * alpha * dxi[dir==0?1:0] * dxi[dir==0?1:0] * BC_mask_Edir_arr(i,j,k,1);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                              &c_indices_g_ptr[ridx_l*nnz_max],

                                                                              &a_ij_ptr[ridx_l*nnz_max],

                                                                              nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                // right component (i+1,j) or (i,j+1) of second derivative operator

#if defined(WARPX_DIM_RZ)

                                const amrex::Real geom_factor = (dir == 2 && i != 0 ? 1.0_rt + 0.5_rt / i_real : 1.0_rt);

#else

                                const amrex::Real geom_factor = 1.0_rt;

#endif

                                const int cidx_g_rhs = (dir == 0 ? dof_arr(i,j+1,k,1) : dof_arr(i+1,j,k,1));

                                const amrex::Real val = -geom_factor * alpha * dxi[dir==0?1:0] * dxi[dir==0?1:0] * BC_mask_Edir_arr(i,j,k,1);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                              &c_indices_g_ptr[ridx_l*nnz_max],

                                                                              &a_ij_ptr[ridx_l*nnz_max],

                                                                              nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            // The following four blocks are for the mixed derivative terms of the curl curl operator

                            // DIM_XZ:

                            // dir = 0: d/dx(dEz/dz) at Ex(i,j) with Ex centered in x and nodal in z

                            // dir = 2: d/dz(dEx/dx) at Ez(i,j) with Ez centered in z and nodal in x

                            // DIM_RZ:

                            // dir = 0: d/dr(dEz/dz) at Er(i,j) with Er centered in r and nodal in z

                            // dir = 2: 1/r*d/dr[r*dEr/dz] at Ez(i,j) with Ez centered in z and nodal in r

#if defined(WARPX_DIM_RZ)

                            const amrex::Real geom_m = (dir == 2 && i != 0 ? 1.0_rt - 0.5_rt / i_real : 1.0_rt);

                            const amrex::Real geom_p = (dir == 2 && i != 0 ? 1.0_rt + 0.5_rt / i_real : 1.0_rt);

#else

                            const amrex::Real geom_m = 1.0_rt;

                            const amrex::Real geom_p = 1.0_rt;

#endif

                            {

                                const int cidx_g_rhs = (dir == 0 ? dof_tdir_arr(i,j-1,k,1) : dof_tdir_arr(i-1,j,k,1));

                                const amrex::Real val = geom_m * alpha * dxi[0] * dxi[1] * BC_mask_Edir_arr(i,j,k,2);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                              &c_indices_g_ptr[ridx_l*nnz_max],

                                                                              &a_ij_ptr[ridx_l*nnz_max],

                                                                              nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                const int cidx_g_rhs = dof_tdir_arr(i,j,k,1);

                                const amrex::Real val = -geom_p * alpha * dxi[0] * dxi[1] * BC_mask_Edir_arr(i,j,k,2);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                              &c_indices_g_ptr[ridx_l*nnz_max],

                                                                              &a_ij_ptr[ridx_l*nnz_max],

                                                                              nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                const int cidx_g_rhs = (dir == 0 ? dof_tdir_arr(i+1,j-1,k,1) : dof_tdir_arr(i-1,j+1,k,1));

                                const amrex::Real val = -geom_m * alpha * dxi[0] * dxi[1] * BC_mask_Edir_arr(i,j,k,2);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                              &c_indices_g_ptr[ridx_l*nnz_max],

                                                                              &a_ij_ptr[ridx_l*nnz_max],

                                                                              nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                            {

                                const int cidx_g_rhs = (dir == 0 ? dof_tdir_arr(i+1,j,k,1) : dof_tdir_arr(i,j+1,k,1));

                                const amrex::Real val = geom_p * alpha * dxi[0] * dxi[1] * BC_mask_Edir_arr(i,j,k,2);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                              &c_indices_g_ptr[ridx_l*nnz_max],

                                                                              &a_ij_ptr[ridx_l*nnz_max],

                                                                              nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                        } else if (dir==1) {

                            for (int jdir = 0; jdir <= 2; jdir+=2) {

                                {

#if defined(WARPX_DIM_RZ)

                                    const amrex::Real geom_factor = (jdir == 0 ? (i_real - 1.0_rt) / (i_real - 0.5_rt) : 1.0_rt);

#else

                                    const amrex::Real geom_factor = 1.0_rt;

#endif

                                    const int cidx_g_rhs = (jdir == 0 ? dof_arr(i-1,j,k,1) : dof_arr(i,j-1,k,1));

                                    const amrex::Real val = -geom_factor * alpha * dxi[jdir==0?0:1] * dxi[jdir==0?0:1] * BC_mask_Edir_arr(i,j,k,jdir+1);

                                    const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                                  &c_indices_g_ptr[ridx_l*nnz_max],

                                                                                  &a_ij_ptr[ridx_l*nnz_max],

                                                                                  nnz_max, icol );

                                    if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                                }

                                {

#if defined(WARPX_DIM_RZ)

                                    const amrex::Real geom_factor = (jdir == 0 ? (i_real + 1.0_rt) / (i_real + 0.5_rt) : 1.0_rt);

#else

                                    const amrex::Real geom_factor = 1.0_rt;

#endif

                                    const int cidx_g_rhs = (jdir == 0 ? dof_arr(i+1,j,k,1) : dof_arr(i,j+1,k,1));

                                    const amrex::Real val = -geom_factor * alpha * dxi[jdir==0?0:1] * dxi[jdir==0?0:1] * BC_mask_Edir_arr(i,j,k,jdir+1);

                                    const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                                  &c_indices_g_ptr[ridx_l*nnz_max],

                                                                                  &a_ij_ptr[ridx_l*nnz_max],

                                                                                  nnz_max, icol );

                                    if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                                }

                            }

                        }

#elif defined(WARPX_DIM_3D)

                        // xhat\cdot[\nabla\times\nabla E] = d/dx[dEy/dy + dEz/dz] - [d2/dy2 + d2/dz2]Ex

                        // yhat\cdot[\nabla\times\nabla E] = d/dy[dEx/dx + dEz/dz] - [d2/dx2 + d2/dz2]Ey

                        // zhat\cdot[\nabla\times\nabla E] = d/dz[dEx/dx + dEy/dy] - [d2/dx2 + d2/dy2]Ez

                        amrex::IntVect dvec(AMREX_D_DECL(dir,tdir1,tdir2));

                        const amrex::IntVect ic(AMREX_D_DECL(i,j,k));

                        {

                            // diagonal component (i,j,k) of second derivative operator

                            const int cidx_g_rhs = dof_arrays[0](ic,1);

                            const amrex::Real val = 2.0_rt * alpha * ( dxi[dvec[1]]*dxi[dvec[1]] * BC_mask_Edir_arr(i,j,k,0)

                                                               + dxi[dvec[2]]*dxi[dvec[2]] * BC_mask_Edir_arr(i,j,k,3) );

                            const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                    &c_indices_g_ptr[ridx_l*nnz_max],

                                                                    &a_ij_ptr[ridx_l*nnz_max],

                                                                    nnz_max, icol );

                            if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                        }

                        // left and right components (i+/-1,j,k), (i,j+/-1,k), or (i,j,k+/-1) of second derivative operator

                        for (int ctr = -1; ctr <= 1; ctr += 2) {

                            for (int tdir = 1; tdir <= 2; tdir++) {

                                auto iv = ic; iv[dvec[tdir]] += ctr;

                                const int cidx_g_rhs = dof_arrays[0](iv,1);

                                const int comp_shift = (dvec[tdir] == tdir1) ? 0:3;

                                const amrex::Real val = -alpha * dxi[dvec[tdir]]*dxi[dvec[tdir]] * BC_mask_Edir_arr(i,j,k,comp_shift+1);

                                const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                        &c_indices_g_ptr[ridx_l*nnz_max],

                                                                        &a_ij_ptr[ridx_l*nnz_max],

                                                                        nnz_max, icol );

                                if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                            }

                        }

                        // mixed derivative terms of the curl curl operator

                        for (int ctr_dir = -1; ctr_dir <= 0; ctr_dir++) {

                            for (int ctr_tdir = -1; ctr_tdir <= 0; ctr_tdir++) {

                                for (int tdir = 1; tdir <= 2; tdir++) {

                                    auto iv = ic; iv[dvec[0]] += (ctr_dir+1); iv[dvec[tdir]] += ctr_tdir;

                                    const auto sign = std::copysign(1,ctr_dir) * std::copysign(1,ctr_tdir);

                                    const int cidx_g_rhs = dof_arrays[tdir](iv,1);

                                    const int comp_shift = (dvec[tdir] == tdir1) ? 0:3;

                                    const amrex::Real val = amrex::Real(sign) * alpha * dxi[dvec[0]]*dxi[dvec[tdir]] * BC_mask_Edir_arr(i,j,k,comp_shift+2);

                                    const auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                            &c_indices_g_ptr[ridx_l*nnz_max],

                                                                            &a_ij_ptr[ridx_l*nnz_max],

                                                                            nnz_max, icol );

                                    if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                                }

                            }

                        }

#endif

                        num_nz_ptr[ridx_l] = icol;

                    });

                }


                // Add the mass matrix piece

                // See Figure B.9 of JCP 491, 112383 (2023) for an illustrative diagram

                // of the mass matrices (https://doi.org/10.1016/j.jcp.2023.112383).

                //

                // The coupling of Jx(i,j,k) to Ex(i+i0,j+j0,k+k0), where i0 ranges from

                // -MM_width[0] to +MM_width[0], j0 ranges from -MM_width[1] to +MM_width[1], and k0 ranges

                // from -MM_width[2] to +MM_width[2], is stored as components of m_bcoefs[dir=0] (mass matrices).

                // Similarly for Jy/Ey (m_bcoefs[dir=1]) and Jz/Ez (m_bcoefs[dir=2]).

                // The mapping to the components is given by:

                // mm_comp = i0 + MM_width[0] + MM_comp[0]*(j0 + MM_width[1]) + (MM_comp[0] + MM_comp[1])*(k0 + MM_width[2])

                if (m_include_mass_matrices) {


                    auto sigma_ii_arr = (*m_bcoefs)[lev][dir]->const_array(mfi);

                    amrex::GpuArray<int,3> MM_ncomp = {1,1,1};

                    amrex::GpuArray<int,3> MM_width = {0,0,0};

                    for (int space_dir=0; space_dir<AMREX_SPACEDIM; space_dir++) {

                        MM_ncomp[space_dir] = m_ops->GetMassMatricesPCnComp(dir,space_dir);

                        MM_width[space_dir] = (MM_ncomp[space_dir] - 1)/2;

                    }


                    ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k)

                    {

                        const int ridx_l = dof_arr(i,j,k,0);

                        if (ridx_l < 0) { return; }


                        const amrex::IntVect iv_base = IntVect(AMREX_D_DECL(i,j,k));

                        int icol = num_nz_ptr[ridx_l];


                        int mm_comp = 0;

                        for (int comp2 = 0; comp2 < MM_ncomp[2]; comp2++) {

                            [[maybe_unused]] const int kk0 = comp2 - MM_width[2];

                            for (int comp1 = 0; comp1 < MM_ncomp[1]; comp1++) {

                                [[maybe_unused]] const int jj0 = comp1 - MM_width[1];

                                for (int comp0 = 0; comp0 < MM_ncomp[0]; comp0++) {

                                    const int ii0 = comp0 - MM_width[0]; // ..., -2, -1, 0, 1, 2, ...

                                    const amrex::IntVect iv_shift = IntVect(AMREX_D_DECL(ii0, jj0, kk0));

                                    if (full_bx.contains(iv_base + iv_shift)) {

                                        const int cidx_g_rhs = dof_arr(iv_base + iv_shift,1);

                                        const amrex::Real val = sigma_ii_arr(iv_base,mm_comp);

                                        auto flag = MatrixPCUtils::insertOrAdd( cidx_g_rhs, val,

                                                                                &c_indices_g_ptr[ridx_l*nnz_max],

                                                                                &a_ij_ptr[ridx_l*nnz_max],

                                                                                nnz_max, icol );

                                        if (!flag) { Gpu::Atomic::Max(nnz_actual_ptr, icol); }

                                    }

                                    ++mm_comp;

                                }

                            }

                        }


                        num_nz_ptr[ridx_l] = icol;

                    });

                }

                Gpu::synchronize();

            }

        }


        nnz_actual = std::max(nnz_actual, *(nnz_actual_d.copyToHost()));

    }


    amrex::ParallelDescriptor::ReduceIntMax(&nnz_actual, 1);

    return (nnz_actual - nnz_max);

}


template <class T, class Ops>


void MatrixPC<T,Ops>::Apply (T& a_x, const T& a_b)

{

    //  Given a right-hand-side b, solve:

    //      A x = b

    //  where A is the linear operator, in this case, the curl-curl

    //  operator:

    //      A x = curl (alpha * curl (x) ) + beta * x


    BL_PROFILE("MatrixPC::Apply()");

    using namespace amrex;


    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        IsDefined(),

        "MatrixPC::Apply() called on undefined object" );

    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        a_x.getArrayVecType()==warpx::fields::FieldType::Efield_fp,

        "MatrixPC::Apply() - a_x must be Efield_fp type");

    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(

        a_b.getArrayVecType()==warpx::fields::FieldType::Efield_fp,

        "MatrixPC::Apply() - a_b must be Efield_fp type");


    WARPX_ABORT_WITH_MESSAGE("MatrixPC<T,Ops>::Apply() - native matrix solvers not implemented. Use with external library, eg, PETSc.");


}


#endif

AMReX.H

AMReX_Array.H

BL_PROFILE
#define BL_PROFILE(a)

AMREX_ALWAYS_ASSERT
#define AMREX_ALWAYS_ASSERT(EX)

AMREX_FORCE_INLINE
#define AMREX_FORCE_INLINE

AMREX_GPU_DEVICE
#define AMREX_GPU_DEVICE

AMREX_GPU_HOST_DEVICE
#define AMREX_GPU_HOST_DEVICE

pp
amrex::ParmParse pp

AMReX_MultiFab.H

AMReX_ParmParse.H

AMREX_D_DECL
#define AMREX_D_DECL(a, b, c)

AMReX_Vector.H

Fields.H

MultiFabRegister.H

Preconditioner.H

TextMsg.H

WARPX_ABORT_WITH_MESSAGE
#define WARPX_ABORT_WITH_MESSAGE(MSG)
Definition TextMsg.H:15

WARPX_ALWAYS_ASSERT_WITH_MESSAGE
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition TextMsg.H:13

WarpXConst.H

MatrixPC::IsDefined
bool IsDefined() const override
Check if the nonlinear solver has been defined.
Definition MatrixPC.H:171

MatrixPC::Apply
void Apply(T &, const T &) override
Apply (solve) the preconditioner given a RHS.
Definition MatrixPC.H:815

MatrixPC::Define
void Define(const T &, Ops *) override
Define the preconditioner.
Definition MatrixPC.H:229

MatrixPC::m_ops
Ops * m_ops
Definition MatrixPC.H:180

MatrixPC::m_verbose
bool m_verbose
Definition MatrixPC.H:178

MatrixPC::m_r_indices_g
amrex::Gpu::DeviceVector< int > m_r_indices_g
Definition MatrixPC.H:193

MatrixPC::MatrixPC
MatrixPC(const MatrixPC &)=delete

MatrixPC::m_c_indices_g
amrex::Gpu::DeviceVector< int > m_c_indices_g
Definition MatrixPC.H:195

MatrixPC::printParameters
void printParameters() const override
Print parameters.
Definition MatrixPC.H:212

MatrixPC::Update
void Update(const T &a_U) override
Update the preconditioner.
Definition MatrixPC.H:284

MatrixPC::m_num_nz
amrex::Gpu::DeviceVector< int > m_num_nz
Definition MatrixPC.H:194

MatrixPC::setName
void setName(const std::string &a_name) override
Set the name for screen output and parsing inputs.
Definition MatrixPC.H:173

MatrixPC::getPCMatrix
void getPCMatrix(amrex::Gpu::DeviceVector< int > &a_r_indices_g, amrex::Gpu::DeviceVector< int > &a_num_nz, amrex::Gpu::DeviceVector< int > &a_c_indices_g, amrex::Gpu::DeviceVector< RT > &a_a_ij, int &a_n, int &a_ncols_max) override
Get the sparse matrix form of the preconditioner.
Definition MatrixPC.H:128

MatrixPC::m_pc_diag_only
bool m_pc_diag_only
Definition MatrixPC.H:187

MatrixPC::m_bcoefs
const amrex::Vector< amrex::Array< amrex::MultiFab *, 3 > > * m_bcoefs
Definition MatrixPC.H:198

MatrixPC::m_ndofs_l
int m_ndofs_l
Definition MatrixPC.H:185

MatrixPC::m_num_realloc
int m_num_realloc
Definition MatrixPC.H:200

MatrixPC::MatrixPC
MatrixPC()=default
Default constructor.

MatrixPC::m_geom
amrex::Vector< amrex::Geometry > m_geom
Definition MatrixPC.H:183

MatrixPC::m_num_amr_levels
int m_num_amr_levels
Definition MatrixPC.H:182

MatrixPC::RT
typename T::value_type RT
Definition MatrixPC.H:80

MatrixPC::readParameters
void readParameters()
Read parameters.
Definition MatrixPC.H:221

MatrixPC::operator=
MatrixPC & operator=(const MatrixPC &)=delete

MatrixPC::m_ndofs_g
int m_ndofs_g
Definition MatrixPC.H:186

MatrixPC::m_include_mass_matrices
bool m_include_mass_matrices
Definition MatrixPC.H:189

MatrixPC::Assemble
int Assemble(const T &a_U)
Assemble the matrix.
Definition MatrixPC.H:317

MatrixPC::m_pc_mat_nnz
int m_pc_mat_nnz
Definition MatrixPC.H:188

MatrixPC::m_name
std::string m_name
Definition MatrixPC.H:191

MatrixPC::~MatrixPC
~MatrixPC() override=default
Default destructor.

MatrixPC::MatrixPC
MatrixPC(MatrixPC &&) noexcept=delete

MatrixPC::m_is_defined
bool m_is_defined
Definition MatrixPC.H:177

MatrixPC::m_a_ij
amrex::Gpu::DeviceVector< amrex::Real > m_a_ij
Definition MatrixPC.H:196

Preconditioner::m_dt
RT m_dt
Definition Preconditioner.H:117

Preconditioner::Preconditioner
Preconditioner()=default
Default constructor.

amrex::BoxND::contains
__host__ __device__ bool contains(const IntVectND< dim > &p) const noexcept

amrex::FabArray::const_array
Array4< typename FabArray< FAB >::value_type const > const_array(const MFIter &mfi) const noexcept

amrex::Gpu::Buffer

amrex::Gpu::Buffer::data
T const * data() const noexcept

amrex::MultiFab

amrex::ParmParse

amrex::Print

amrex::Vector

amrex::Real
amrex_real Real

amrex::Gpu::DeviceVector
PODVector< T, ArenaAllocator< T > > DeviceVector

amrex::ParallelDescriptor::ReduceIntMax
void ReduceIntMax(int &)

MatrixPCUtils
Definition MatrixPC.H:24

MatrixPCUtils::insertOrAdd
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool insertOrAdd(const int a_cidx, const amrex::Real a_val, int *const a_cidxs, amrex::Real *const a_aij, const int a_nnz, int &a_ncol)
Definition MatrixPC.H:26

ablastr::constant::SI::c
constexpr auto c
vacuum speed of light [m/s]
Definition constant.H:153

ablastr::warn_manager::WMRecordWarning
void WMRecordWarning(const std::string &topic, const std::string &text, const WarnPriority &priority=WarnPriority::medium)
Helper function to abbreviate the call to WarnManager::GetInstance().RecordWarning (recording a warni...
Definition WarnManager.cpp:320

amrex::Gpu::Atomic::Max
__host__ __device__ AMREX_FORCE_INLINE T Max(T *const m, T const value) noexcept

amrex::Gpu::synchronize
void synchronize() noexcept

amrex::Gpu::copyAsync
void copyAsync(HostToDevice, InIter begin, InIter end, OutIter result) noexcept

amrex::Gpu::deviceToDevice
static constexpr DeviceToDevice deviceToDevice

amrex::Gpu::streamSynchronize
void streamSynchronize() noexcept

amrex

amrex::ignore_unused
__host__ __device__ void ignore_unused(const Ts &...)

amrex::ParallelFor
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)

amrex::Box
BoxND< 3 > Box

amrex::IntVect
IntVectND< 3 > IntVect

warpx::fields::FieldType::Efield_fp
@ Efield_fp
Definition Fields.H:97

amrex::GpuArray

amrex::MFIter