4.9/dgmassinv__kernels_8hpp_source.html

// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced

// at the Lawrence Livermore National Laboratory. All Rights reserved. See files

// LICENSE and NOTICE for details. LLNL-CODE-806117.

//

// This file is part of the MFEM library. For more information and source code

// availability visit https://mfem.org.

//

// MFEM is free software; you can redistribute it and/or modify it under the

// terms of the BSD-3 license. We welcome feedback and contributions, see file

// CONTRIBUTING.md for details.


#ifndef MFEM_DGMASSINV_KERNELS_HPP

#define MFEM_DGMASSINV_KERNELS_HPP


#include "../linalg/kernels.hpp"

#include "kernels.hpp"

#include "integ/bilininteg_mass_kernels.hpp"


namespace mfem

{


namespace internal

{


template <int DIM, int D1D, int Q1D>

MFEM_HOST_DEVICE inline

void DGMassApply(const int e,

                 const int NE,

                 const real_t *B,

                 const real_t *Bt,

                 const real_t *pa_data,

                 const real_t *x,

                 real_t *y,

                 const int d1d = 0,

                 const int q1d = 0)

{

   constexpr bool use_smem = (D1D > 0 && Q1D > 0);

   constexpr bool ACCUM = false;

   constexpr int NBZ = 1;


   if (DIM == 1)

   {

      PAMassApply1D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);

      return;

   }


   if (use_smem)

   {

      // cannot specialize functions below with D1D or Q1D equal to zero

      // (this branch only runs with D1D and Q1D are both positive)

      constexpr int TD1D = D1D ? D1D : 1;

      constexpr int TQ1D = Q1D ? Q1D : 1;

      if (DIM == 2)

      {

         SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);

      }

      else if (DIM == 3)

      {

         SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);

      }

      else

      {

         MFEM_ABORT_KERNEL("Unsupported dimension.");

      }

   }

   else

   {

      if (DIM == 2)

      {

         PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);

      }

      else if (DIM == 3)

      {

         PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);

      }

      else

      {

         MFEM_ABORT_KERNEL("Unsupported dimension.");

      }

   }

}


MFEM_HOST_DEVICE inline

void DGMassPreconditioner(const int e,

                          const int NE,

                          const int ND,

                          const real_t *dinv,

                          const real_t *x,

                          real_t *y)

{

   const auto X = ConstDeviceMatrix(x, ND, NE);

   const auto D = ConstDeviceMatrix(dinv, ND, NE);

   auto Y = DeviceMatrix(y, ND, NE);


   const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);

   const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);


   for (int i = tid; i < ND; i += bxy)

   {

      Y(i, e) = D(i, e)*X(i, e);

   }

   MFEM_SYNC_THREAD;

}


MFEM_HOST_DEVICE inline

void DGMassAxpy(const int e,

                const int NE,

                const int ND,

                const real_t a,

                const real_t *x,

                const real_t b,

                const real_t *y,

                real_t *z)

{

   const auto X = ConstDeviceMatrix(x, ND, NE);

   const auto Y = ConstDeviceMatrix(y, ND, NE);

   auto Z = DeviceMatrix(z, ND, NE);


   const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);

   const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);


   for (int i = tid; i < ND; i += bxy)

   {

      Z(i, e) = a*X(i, e) + b*Y(i, e);

   }

   MFEM_SYNC_THREAD;

}


template <int NB>

MFEM_HOST_DEVICE inline

real_t DGMassDot(const int e,

                 const int NE,

                 const int ND,

                 const real_t *x,

                 const real_t *y)

{

   const auto X = ConstDeviceMatrix(x, ND, NE);

   const auto Y = ConstDeviceMatrix(y, ND, NE);


   const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);

   const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);


   MFEM_SHARED real_t s_dot[NB*NB];

   s_dot[tid] = 0.0;


   for (int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }

   MFEM_SYNC_THREAD;


   if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }

   MFEM_SYNC_THREAD;


   if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }

   MFEM_SYNC_THREAD;


   if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }

   MFEM_SYNC_THREAD;


   if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }

   MFEM_SYNC_THREAD;


   if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }

   MFEM_SYNC_THREAD;


   if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }

   MFEM_SYNC_THREAD;


   if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }

   MFEM_SYNC_THREAD;


   if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }

   MFEM_SYNC_THREAD;


   if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }

   MFEM_SYNC_THREAD;


   if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }

   MFEM_SYNC_THREAD;


   return s_dot[0];

}


template<int T_D1D = 0>

MFEM_HOST_DEVICE inline

void DGMassBasis1D(const int e,

                   const int NE,

                   const real_t *b_,

                   const real_t *x_,

                   real_t *y_,

                   const int d1d = 0)

{

   const int D1D = T_D1D ? T_D1D : d1d;


   const auto b = Reshape(b_, D1D, D1D);

   const auto x = Reshape(x_, D1D, NE);

   auto y = Reshape(y_, D1D, NE);


   constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;

   real_t Y[MD1];


   MFEM_FOREACH_THREAD(i,x,D1D)

   {

      real_t val = 0.0;

      for (int j = 0; j < D1D; ++j)

      {

         val += b(i,j)*x(j,e);

      }

      Y[i] = val;

   }

   MFEM_SYNC_THREAD;

   if (MFEM_THREAD_ID(y) == 0)

   {

      MFEM_FOREACH_THREAD(i,x,D1D)

      {

         y(i,e) = Y[i];

      }

   }

}


template<int T_D1D = 0>

MFEM_HOST_DEVICE inline

void DGMassBasis2D(const int e,

                   const int NE,

                   const real_t *b_,

                   const real_t *x_,

                   real_t *y_,

                   const int d1d = 0)

{

   constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;

   const int D1D = T_D1D ? T_D1D : d1d;


   const auto b = Reshape(b_, D1D, D1D);

   const auto x = Reshape(x_, D1D, D1D, NE);

   auto y = Reshape(y_, D1D, D1D, NE);


   MFEM_SHARED real_t sB[MD1*MD1];

   MFEM_SHARED real_t sm0[MD1*MD1];

   MFEM_SHARED real_t sm1[MD1*MD1];


   kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);


   ConstDeviceMatrix B(sB, D1D,D1D);

   DeviceMatrix DD(sm0, MD1, MD1);

   DeviceMatrix DQ(sm1, MD1, MD1);

   DeviceMatrix QQ(sm0, MD1, MD1);


   kernels::internal::LoadX(e,D1D,x,DD);

   kernels::internal::EvalX(D1D,D1D,B,DD,DQ);

   kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);

   MFEM_SYNC_THREAD; // sync here to allow in-place evaluations

   MFEM_FOREACH_THREAD(qy,y,D1D)

   {

      MFEM_FOREACH_THREAD(qx,x,D1D)

      {

         y(qx,qy,e) = QQ(qx,qy);

      }

   }

   MFEM_SYNC_THREAD;

}


template<int T_D1D = 0>

MFEM_HOST_DEVICE inline

void DGMassBasis3D(const int e,

                   const int NE,

                   const real_t *b_,

                   const real_t *x_,

                   real_t *y_,

                   const int d1d = 0)

{

   const int D1D = T_D1D ? T_D1D : d1d;


   const auto b = Reshape(b_, D1D, D1D);

   const auto x = Reshape(x_, D1D, D1D, D1D, NE);

   auto y = Reshape(y_, D1D, D1D, D1D, NE);


   constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;


   MFEM_SHARED real_t sB[MD1*MD1];

   MFEM_SHARED real_t sm0[MD1*MD1*MD1];

   MFEM_SHARED real_t sm1[MD1*MD1*MD1];


   kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);


   ConstDeviceMatrix B(sB, D1D,D1D);

   DeviceCube DDD(sm0, MD1,MD1,MD1);

   DeviceCube DDQ(sm1, MD1,MD1,MD1);

   DeviceCube DQQ(sm0, MD1,MD1,MD1);

   DeviceCube QQQ(sm1, MD1,MD1,MD1);


   kernels::internal::LoadX(e,D1D,x,DDD);

   kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);

   kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);

   kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);

   MFEM_SYNC_THREAD; // sync here to allow in-place evaluation

   MFEM_FOREACH_THREAD(qz,z,D1D)

   {

      MFEM_FOREACH_THREAD(qy,y,D1D)

      {

         for (int qx = 0; qx < D1D; ++qx)

         {

            y(qx,qy,qz,e) = QQQ(qz,qy,qx);

         }

      }

   }

   MFEM_SYNC_THREAD;

}


template<int DIM, int T_D1D = 0>

MFEM_HOST_DEVICE inline

void DGMassBasis(const int e,

                 const int NE,

                 const real_t *b_,

                 const real_t *x_,

                 real_t *y_,

                 const int d1d = 0)

{

   if (DIM == 1)

   {

      DGMassBasis1D<T_D1D>(e, NE, b_, x_, y_, d1d);

   }

   else if (DIM == 2)

   {

      DGMassBasis2D<T_D1D>(e, NE, b_, x_, y_, d1d);

   }

   else if (DIM == 3)

   {

      DGMassBasis3D<T_D1D>(e, NE, b_, x_, y_, d1d);

   }

   else

   {

      MFEM_ABORT_KERNEL("Dimension not supported.");

   }

}


} // namespace internal


} // namespace mfem


#endif

bilininteg_mass_kernels.hpp

kernels.hpp

kernels.hpp

b
real_t b
Definition lissajous.cpp:42

a
real_t a
Definition lissajous.cpp:41

DIM
constexpr int DIM
Definition minimal-surface.cpp:71

real_t
mfem::real_t real_t
Definition mtop_solvers.hpp:17

mfem
Definition CodeDocumentation.dox:1

mfem::DeviceCube
DeviceTensor< 3, real_t > DeviceCube
Definition dtensor.hpp:153

mfem::Reshape
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138

mfem::ConstDeviceMatrix
DeviceTensor< 2, const real_t > ConstDeviceMatrix
Definition dtensor.hpp:151

mfem::DeviceMatrix
DeviceTensor< 2, real_t > DeviceMatrix
Definition dtensor.hpp:150