MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
fes_kernels.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_FES_KERNELS_HPP
13#define MFEM_FES_KERNELS_HPP
14
15#include "../general/forall.hpp"
16
17#include <climits>
18
19namespace mfem
20{
21/// \cond DO_NOT_DOCUMENT
22namespace internal
23{
24
25///
26/// Implements matrix-vector multiply $y = A x$ for a sparse matrix composed of
27/// a sum of smaller dense blocks. There is additional permutation/sign
28/// information associated with each block. The base class only implements
29/// helper routines such as computing block widths, index into x, index into y,
30/// and column in A given sub-block information.
31/// @sa DerefineMatrixOpMultFunctor
32///
33/// @tparam Order vdim ordering for x and y. Note that for Diag = false this is
34/// ignored for x as x has a special interleaved order.
35/// @tparam Base used for the curious recurring template pattern (CRTP) so the
36/// base class can access child class fields without virtual functions
37/// @tparam Diag true if this corresponds to the diagonal block (coarse element
38/// and fine element are on our rank), false otherwise (coarse element is on our
39/// rank, fine element is on a different rank).
40///
41template <Ordering::Type Order, class Base, bool Diag = true>
42struct DerefineMatrixOpFunctorBase;
43
44template <class Base>
45struct DerefineMatrixOpFunctorBase<Ordering::byNODES, Base, true>
46{
47 /// block column indices offsets
48 const int *bcptr;
49 /// column indices
50 const int *cptr;
51
52 int MFEM_HOST_DEVICE BlockWidth(int k) const
53 {
54 return bcptr[k + 1] - bcptr[k];
55 }
56
57 void MFEM_HOST_DEVICE Col(int j, int k, int &col, int &sign) const
58 {
59 col = cptr[bcptr[k] + j];
60 if (col < 0)
61 {
62 col = -1 - col;
63 sign = -sign;
64 }
65 }
66
67 int MFEM_HOST_DEVICE IndexX(int col, int vdim, int) const
68 {
69 return col + vdim * static_cast<const Base *>(this)->width;
70 }
71 int MFEM_HOST_DEVICE IndexY(int row, int vdim) const
72 {
73 return row + vdim * static_cast<const Base *>(this)->height;
74 }
75};
76
77template <class Base>
78struct DerefineMatrixOpFunctorBase<Ordering::byVDIM, Base, true>
79{
80 /// block column indices offsets
81 const int *bcptr;
82 /// column indices
83 const int *cptr;
84
85 int MFEM_HOST_DEVICE BlockWidth(int k) const
86 {
87 return bcptr[k + 1] - bcptr[k];
88 }
89
90 void MFEM_HOST_DEVICE Col(int j, int k, int &col, int &sign) const
91 {
92 col = cptr[bcptr[k] + j];
93 if (col < 0)
94 {
95 col = -1 - col;
96 sign = -sign;
97 }
98 }
99
100 int MFEM_HOST_DEVICE IndexX(int col, int vdim, int) const
101 {
102 return vdim + col * static_cast<const Base *>(this)->vdims;
103 }
104 int MFEM_HOST_DEVICE IndexY(int row, int vdim) const
105 {
106 return vdim + row * static_cast<const Base *>(this)->vdims;
107 }
108};
109
110template <class Base>
111struct DerefineMatrixOpFunctorBase<Ordering::byNODES, Base, false>
112{
113 /// receive segment offsets
114 const int *segptr;
115 /// receive segment index
116 const int *rsptr;
117 /// off-diagonal block column offsets
118 const int *coptr;
119 /// off-diagonal block widths
120 const int *bwptr;
121
122 int MFEM_HOST_DEVICE BlockWidth(int k) const { return bwptr[k]; }
123
124 void MFEM_HOST_DEVICE Col(int j, int k, int &col, int &sign) const
125 {
126 col = coptr[k] + j;
127 }
128
129 int MFEM_HOST_DEVICE IndexX(int col, int vdim, int k) const
130 {
131 int tmp = rsptr[k];
132 int segwidth = segptr[tmp + 1] - segptr[tmp];
133 return segptr[tmp] * static_cast<const Base *>(this)->vdims + col +
134 vdim * segwidth;
135 }
136 int MFEM_HOST_DEVICE IndexY(int row, int vdim) const
137 {
138 return row + vdim * static_cast<const Base *>(this)->height;
139 }
140};
141
142template <class Base>
143struct DerefineMatrixOpFunctorBase<Ordering::byVDIM, Base, false>
144{
145 /// receive segment offsets
146 const int *segptr;
147 /// receive segment index
148 const int *rsptr;
149 /// off-diagonal block column offsets
150 const int *coptr;
151 /// off-diagonal block widths
152 const int *bwptr;
153
154 int MFEM_HOST_DEVICE BlockWidth(int k) const { return bwptr[k]; }
155
156 void MFEM_HOST_DEVICE Col(int j, int k, int &col, int &sign) const
157 {
158 col = coptr[k] + j;
159 }
160
161 int MFEM_HOST_DEVICE IndexX(int col, int vdim, int k) const
162 {
163 int tmp = rsptr[k];
164 int segwidth = segptr[tmp + 1] - segptr[tmp];
165 return segptr[tmp] * static_cast<const Base *>(this)->vdims + col +
166 vdim * segwidth;
167 }
168 int MFEM_HOST_DEVICE IndexY(int row, int vdim) const
169 {
170 return vdim + row * static_cast<const Base *>(this)->vdims;
171 }
172};
173
174/// internally used to implement the derefinement operator Mult diagonal
175/// block
176template <Ordering::Type Order, bool Atomic, bool Diag = true>
177struct DerefineMatrixOpMultFunctor
178 : public DerefineMatrixOpFunctorBase<
179 Order, DerefineMatrixOpMultFunctor<Order, Atomic, Diag>, Diag>
180{
181 const real_t *xptr;
182 real_t *yptr;
183 /// block storage
184 const real_t *bsptr;
185 /// block offsets
186 const int *boptr;
187 /// block row index offsets
188 const int *brptr;
189 /// row indices
190 const int *rptr;
191
192 // number of blocks
193 int nblocks;
194 // number of components
195 int vdims;
196 /// overall operator height (for vdim = 1)
197 int height;
198 /// overall operator width (for vdim = 1)
199 int width;
200 void MFEM_HOST_DEVICE operator()(int kidx) const
201 {
202 int k = kidx % nblocks;
203 int vdim = kidx / nblocks;
204
205 int block_height = brptr[k + 1] - brptr[k];
206 int block_width = this->BlockWidth(k);
207 MFEM_FOREACH_THREAD(i, x, block_height)
208 {
209 int row = rptr[brptr[k] + i];
210 int rsign = 1;
211 if (row < 0)
212 {
213 row = -1 - row;
214 rsign = -1;
215 }
216 if (row < INT_MAX)
217 {
218 // row not marked as unused
219 real_t sum = 0;
220 for (int j = 0; j < block_width; ++j)
221 {
222 int col, sign = rsign;
223 this->Col(j, k, col, sign);
224 sum += sign * bsptr[boptr[k] + i + j * block_height] *
225 xptr[this->IndexX(col, vdim, k)];
226 }
227#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
228 if (Atomic)
229 {
230 atomicAdd(yptr + this->IndexY(row, vdim), sum);
231 }
232 else
233#endif
234 {
235 yptr[this->IndexY(row, vdim)] += sum;
236 }
237 }
238 }
239 }
240
241 /// N is the max block row size (doesn't have to be a power of 2)
242 void Run(int N) const { forall_2D(nblocks * vdims, N, 1, *this); }
243};
244
245} // namespace internal
246/// \endcond DO_NOT_DOCUMENT
247} // namespace mfem
248
249#endif
MFEM_DEVICE mfem::real_t atomicAdd(mfem::real_t *add, mfem::real_t val)
Definition backends.hpp:70
mfem::real_t real_t
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:925