12#ifndef MFEM_FES_KERNELS_HPP
13#define MFEM_FES_KERNELS_HPP
41template <Ordering::Type Order,
class Base,
bool Diag = true>
42struct DerefineMatrixOpFunctorBase;
45struct DerefineMatrixOpFunctorBase<Ordering::
byNODES, Base, true>
52 int MFEM_HOST_DEVICE BlockWidth(
int k)
const
54 return bcptr[k + 1] - bcptr[k];
57 void MFEM_HOST_DEVICE Col(
int j,
int k,
int &col,
int &sign)
const
59 col = cptr[bcptr[k] + j];
67 int MFEM_HOST_DEVICE IndexX(
int col,
int vdim,
int)
const
69 return col + vdim *
static_cast<const Base *
>(
this)->width;
71 int MFEM_HOST_DEVICE IndexY(
int row,
int vdim)
const
73 return row + vdim *
static_cast<const Base *
>(
this)->height;
78struct DerefineMatrixOpFunctorBase<Ordering::
byVDIM, Base, true>
85 int MFEM_HOST_DEVICE BlockWidth(
int k)
const
87 return bcptr[k + 1] - bcptr[k];
90 void MFEM_HOST_DEVICE Col(
int j,
int k,
int &col,
int &sign)
const
92 col = cptr[bcptr[k] + j];
100 int MFEM_HOST_DEVICE IndexX(
int col,
int vdim,
int)
const
102 return vdim + col *
static_cast<const Base *
>(
this)->vdims;
104 int MFEM_HOST_DEVICE IndexY(
int row,
int vdim)
const
106 return vdim + row *
static_cast<const Base *
>(
this)->vdims;
111struct DerefineMatrixOpFunctorBase<Ordering::
byNODES, Base, false>
122 int MFEM_HOST_DEVICE BlockWidth(
int k)
const {
return bwptr[k]; }
124 void MFEM_HOST_DEVICE Col(
int j,
int k,
int &col,
int &sign)
const
129 int MFEM_HOST_DEVICE IndexX(
int col,
int vdim,
int k)
const
132 int segwidth = segptr[tmp + 1] - segptr[tmp];
133 return segptr[tmp] *
static_cast<const Base *
>(
this)->vdims + col +
136 int MFEM_HOST_DEVICE IndexY(
int row,
int vdim)
const
138 return row + vdim *
static_cast<const Base *
>(
this)->height;
143struct DerefineMatrixOpFunctorBase<Ordering::
byVDIM, Base, false>
154 int MFEM_HOST_DEVICE BlockWidth(
int k)
const {
return bwptr[k]; }
156 void MFEM_HOST_DEVICE Col(
int j,
int k,
int &col,
int &sign)
const
161 int MFEM_HOST_DEVICE IndexX(
int col,
int vdim,
int k)
const
164 int segwidth = segptr[tmp + 1] - segptr[tmp];
165 return segptr[tmp] *
static_cast<const Base *
>(
this)->vdims + col +
168 int MFEM_HOST_DEVICE IndexY(
int row,
int vdim)
const
170 return vdim + row *
static_cast<const Base *
>(
this)->vdims;
176template <Ordering::Type Order,
bool Atomic,
bool Diag = true>
177struct DerefineMatrixOpMultFunctor
178 :
public DerefineMatrixOpFunctorBase<
179 Order, DerefineMatrixOpMultFunctor<Order, Atomic, Diag>, Diag>
200 void MFEM_HOST_DEVICE operator()(
int kidx)
const
202 int k = kidx % nblocks;
203 int vdim = kidx / nblocks;
205 int block_height = brptr[k + 1] - brptr[k];
206 int block_width = this->BlockWidth(k);
207 MFEM_FOREACH_THREAD(i, x, block_height)
209 int row = rptr[brptr[k] + i];
220 for (
int j = 0; j < block_width; ++j)
222 int col, sign = rsign;
223 this->Col(j, k, col, sign);
224 sum += sign * bsptr[boptr[k] + i + j * block_height] *
225 xptr[this->IndexX(col, vdim, k)];
227#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
230 atomicAdd(yptr + this->IndexY(row, vdim), sum);
235 yptr[this->IndexY(row, vdim)] += sum;
242 void Run(
int N)
const {
forall_2D(nblocks * vdims, N, 1, *
this); }
MFEM_DEVICE mfem::real_t atomicAdd(mfem::real_t *add, mfem::real_t val)
void forall_2D(int N, int X, int Y, lambda &&body)