MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
dgmassinv_kernels.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_DGMASSINV_KERNELS_HPP
13#define MFEM_DGMASSINV_KERNELS_HPP
14
15#include "../linalg/kernels.hpp"
16#include "kernels.hpp"
18
19namespace mfem
20{
21
22namespace internal
23{
24
25template <int DIM, int D1D, int Q1D>
26MFEM_HOST_DEVICE inline
27void DGMassApply(const int e,
28 const int NE,
29 const real_t *B,
30 const real_t *Bt,
31 const real_t *pa_data,
32 const real_t *x,
33 real_t *y,
34 const int d1d = 0,
35 const int q1d = 0)
36{
37 constexpr bool use_smem = (D1D > 0 && Q1D > 0);
38 constexpr bool ACCUM = false;
39 constexpr int NBZ = 1;
40
41 if (DIM == 1)
42 {
43 PAMassApply1D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
44 return;
45 }
46
47 if (use_smem)
48 {
49 // cannot specialize functions below with D1D or Q1D equal to zero
50 // (this branch only runs with D1D and Q1D are both positive)
51 constexpr int TD1D = D1D ? D1D : 1;
52 constexpr int TQ1D = Q1D ? Q1D : 1;
53 if (DIM == 2)
54 {
55 SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);
56 }
57 else if (DIM == 3)
58 {
59 SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);
60 }
61 else
62 {
63 MFEM_ABORT_KERNEL("Unsupported dimension.");
64 }
65 }
66 else
67 {
68 if (DIM == 2)
69 {
70 PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
71 }
72 else if (DIM == 3)
73 {
74 PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
75 }
76 else
77 {
78 MFEM_ABORT_KERNEL("Unsupported dimension.");
79 }
80 }
81}
82
83MFEM_HOST_DEVICE inline
84void DGMassPreconditioner(const int e,
85 const int NE,
86 const int ND,
87 const real_t *dinv,
88 const real_t *x,
89 real_t *y)
90{
91 const auto X = ConstDeviceMatrix(x, ND, NE);
92 const auto D = ConstDeviceMatrix(dinv, ND, NE);
93 auto Y = DeviceMatrix(y, ND, NE);
94
95 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
96 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
97
98 for (int i = tid; i < ND; i += bxy)
99 {
100 Y(i, e) = D(i, e)*X(i, e);
101 }
102 MFEM_SYNC_THREAD;
103}
104
105MFEM_HOST_DEVICE inline
106void DGMassAxpy(const int e,
107 const int NE,
108 const int ND,
109 const real_t a,
110 const real_t *x,
111 const real_t b,
112 const real_t *y,
113 real_t *z)
114{
115 const auto X = ConstDeviceMatrix(x, ND, NE);
116 const auto Y = ConstDeviceMatrix(y, ND, NE);
117 auto Z = DeviceMatrix(z, ND, NE);
118
119 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
120 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
121
122 for (int i = tid; i < ND; i += bxy)
123 {
124 Z(i, e) = a*X(i, e) + b*Y(i, e);
125 }
126 MFEM_SYNC_THREAD;
127}
128
129template <int NB>
130MFEM_HOST_DEVICE inline
131real_t DGMassDot(const int e,
132 const int NE,
133 const int ND,
134 const real_t *x,
135 const real_t *y)
136{
137 const auto X = ConstDeviceMatrix(x, ND, NE);
138 const auto Y = ConstDeviceMatrix(y, ND, NE);
139
140 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
141 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
142
143 MFEM_SHARED real_t s_dot[NB*NB];
144 s_dot[tid] = 0.0;
145
146 for (int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }
147 MFEM_SYNC_THREAD;
148
149 if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }
150 MFEM_SYNC_THREAD;
151
152 if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }
153 MFEM_SYNC_THREAD;
154
155 if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }
156 MFEM_SYNC_THREAD;
157
158 if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }
159 MFEM_SYNC_THREAD;
160
161 if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }
162 MFEM_SYNC_THREAD;
163
164 if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }
165 MFEM_SYNC_THREAD;
166
167 if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }
168 MFEM_SYNC_THREAD;
169
170 if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }
171 MFEM_SYNC_THREAD;
172
173 if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }
174 MFEM_SYNC_THREAD;
175
176 if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }
177 MFEM_SYNC_THREAD;
178
179 return s_dot[0];
180}
181
182template<int T_D1D = 0>
183MFEM_HOST_DEVICE inline
184void DGMassBasis1D(const int e,
185 const int NE,
186 const real_t *b_,
187 const real_t *x_,
188 real_t *y_,
189 const int d1d = 0)
190{
191 const int D1D = T_D1D ? T_D1D : d1d;
192
193 const auto b = Reshape(b_, D1D, D1D);
194 const auto x = Reshape(x_, D1D, NE);
195 auto y = Reshape(y_, D1D, NE);
196
197 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
198 real_t Y[MD1];
199
200 MFEM_FOREACH_THREAD(i,x,D1D)
201 {
202 real_t val = 0.0;
203 for (int j = 0; j < D1D; ++j)
204 {
205 val += b(i,j)*x(j,e);
206 }
207 Y[i] = val;
208 }
209 MFEM_SYNC_THREAD;
210 if (MFEM_THREAD_ID(y) == 0)
211 {
212 MFEM_FOREACH_THREAD(i,x,D1D)
213 {
214 y(i,e) = Y[i];
215 }
216 }
217}
218
219template<int T_D1D = 0>
220MFEM_HOST_DEVICE inline
221void DGMassBasis2D(const int e,
222 const int NE,
223 const real_t *b_,
224 const real_t *x_,
225 real_t *y_,
226 const int d1d = 0)
227{
228 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
229 const int D1D = T_D1D ? T_D1D : d1d;
230
231 const auto b = Reshape(b_, D1D, D1D);
232 const auto x = Reshape(x_, D1D, D1D, NE);
233 auto y = Reshape(y_, D1D, D1D, NE);
234
235 MFEM_SHARED real_t sB[MD1*MD1];
236 MFEM_SHARED real_t sm0[MD1*MD1];
237 MFEM_SHARED real_t sm1[MD1*MD1];
238
239 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
240
241 ConstDeviceMatrix B(sB, D1D,D1D);
242 DeviceMatrix DD(sm0, MD1, MD1);
243 DeviceMatrix DQ(sm1, MD1, MD1);
244 DeviceMatrix QQ(sm0, MD1, MD1);
245
246 kernels::internal::LoadX(e,D1D,x,DD);
247 kernels::internal::EvalX(D1D,D1D,B,DD,DQ);
248 kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);
249 MFEM_SYNC_THREAD; // sync here to allow in-place evaluations
250 MFEM_FOREACH_THREAD(qy,y,D1D)
251 {
252 MFEM_FOREACH_THREAD(qx,x,D1D)
253 {
254 y(qx,qy,e) = QQ(qx,qy);
255 }
256 }
257 MFEM_SYNC_THREAD;
258}
259
260template<int T_D1D = 0>
261MFEM_HOST_DEVICE inline
262void DGMassBasis3D(const int e,
263 const int NE,
264 const real_t *b_,
265 const real_t *x_,
266 real_t *y_,
267 const int d1d = 0)
268{
269 const int D1D = T_D1D ? T_D1D : d1d;
270
271 const auto b = Reshape(b_, D1D, D1D);
272 const auto x = Reshape(x_, D1D, D1D, D1D, NE);
273 auto y = Reshape(y_, D1D, D1D, D1D, NE);
274
275 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
276
277 MFEM_SHARED real_t sB[MD1*MD1];
278 MFEM_SHARED real_t sm0[MD1*MD1*MD1];
279 MFEM_SHARED real_t sm1[MD1*MD1*MD1];
280
281 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
282
283 ConstDeviceMatrix B(sB, D1D,D1D);
284 DeviceCube DDD(sm0, MD1,MD1,MD1);
285 DeviceCube DDQ(sm1, MD1,MD1,MD1);
286 DeviceCube DQQ(sm0, MD1,MD1,MD1);
287 DeviceCube QQQ(sm1, MD1,MD1,MD1);
288
289 kernels::internal::LoadX(e,D1D,x,DDD);
290 kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);
291 kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);
292 kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);
293 MFEM_SYNC_THREAD; // sync here to allow in-place evaluation
294 MFEM_FOREACH_THREAD(qz,z,D1D)
295 {
296 MFEM_FOREACH_THREAD(qy,y,D1D)
297 {
298 for (int qx = 0; qx < D1D; ++qx)
299 {
300 y(qx,qy,qz,e) = QQQ(qz,qy,qx);
301 }
302 }
303 }
304 MFEM_SYNC_THREAD;
305}
306
307template<int DIM, int T_D1D = 0>
308MFEM_HOST_DEVICE inline
309void DGMassBasis(const int e,
310 const int NE,
311 const real_t *b_,
312 const real_t *x_,
313 real_t *y_,
314 const int d1d = 0)
315{
316 if (DIM == 1)
317 {
318 DGMassBasis1D<T_D1D>(e, NE, b_, x_, y_, d1d);
319 }
320 else if (DIM == 2)
321 {
322 DGMassBasis2D<T_D1D>(e, NE, b_, x_, y_, d1d);
323 }
324 else if (DIM == 3)
325 {
326 DGMassBasis3D<T_D1D>(e, NE, b_, x_, y_, d1d);
327 }
328 else
329 {
330 MFEM_ABORT_KERNEL("Dimension not supported.");
331 }
332}
333
334} // namespace internal
335
336} // namespace mfem
337
338#endif
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
constexpr int DIM
mfem::real_t real_t
DeviceTensor< 3, real_t > DeviceCube
Definition dtensor.hpp:153
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
DeviceTensor< 2, const real_t > ConstDeviceMatrix
Definition dtensor.hpp:151
DeviceTensor< 2, real_t > DeviceMatrix
Definition dtensor.hpp:150