MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
dgmassinv_kernels.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_DGMASSINV_KERNELS_HPP
13#define MFEM_DGMASSINV_KERNELS_HPP
14
15#include "../linalg/kernels.hpp"
16#include "kernels.hpp"
17#include "integ/bilininteg_mass_kernels.hpp"
18
19namespace mfem
20{
21
22namespace internal
23{
24
25template <int DIM, int D1D, int Q1D>
26MFEM_HOST_DEVICE inline
27void DGMassApply(const int e,
28 const int NE,
29 const real_t *B,
30 const real_t *Bt,
31 const real_t *pa_data,
32 const real_t *x,
33 real_t *y,
34 const int d1d = 0,
35 const int q1d = 0)
36{
37 constexpr bool use_smem = (D1D > 0 && Q1D > 0);
38 constexpr bool ACCUM = false;
39 constexpr int NBZ = 1;
40 if (use_smem)
41 {
42 // cannot specialize functions below with D1D or Q1D equal to zero
43 // (this branch only runs with D1D and Q1D are both positive)
44 constexpr int TD1D = D1D ? D1D : 1;
45 constexpr int TQ1D = Q1D ? Q1D : 1;
46 if (DIM == 2)
47 {
48 SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);
49 }
50 else if (DIM == 3)
51 {
52 SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);
53 }
54 else
55 {
56 MFEM_ABORT_KERNEL("Unsupported dimension.");
57 }
58 }
59 else
60 {
61 if (DIM == 2)
62 {
63 PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
64 }
65 else if (DIM == 3)
66 {
67 PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
68 }
69 else
70 {
71 MFEM_ABORT_KERNEL("Unsupported dimension.");
72 }
73 }
74}
75
76MFEM_HOST_DEVICE inline
77void DGMassPreconditioner(const int e,
78 const int NE,
79 const int ND,
80 const real_t *dinv,
81 const real_t *x,
82 real_t *y)
83{
84 const auto X = ConstDeviceMatrix(x, ND, NE);
85 const auto D = ConstDeviceMatrix(dinv, ND, NE);
86 auto Y = DeviceMatrix(y, ND, NE);
87
88 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
89 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
90
91 for (int i = tid; i < ND; i += bxy)
92 {
93 Y(i, e) = D(i, e)*X(i, e);
94 }
95 MFEM_SYNC_THREAD;
96}
97
98MFEM_HOST_DEVICE inline
99void DGMassAxpy(const int e,
100 const int NE,
101 const int ND,
102 const real_t a,
103 const real_t *x,
104 const real_t b,
105 const real_t *y,
106 real_t *z)
107{
108 const auto X = ConstDeviceMatrix(x, ND, NE);
109 const auto Y = ConstDeviceMatrix(y, ND, NE);
110 auto Z = DeviceMatrix(z, ND, NE);
111
112 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
113 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
114
115 for (int i = tid; i < ND; i += bxy)
116 {
117 Z(i, e) = a*X(i, e) + b*Y(i, e);
118 }
119 MFEM_SYNC_THREAD;
120}
121
122template <int NB>
123MFEM_HOST_DEVICE inline
124real_t DGMassDot(const int e,
125 const int NE,
126 const int ND,
127 const real_t *x,
128 const real_t *y)
129{
130 const auto X = ConstDeviceMatrix(x, ND, NE);
131 const auto Y = ConstDeviceMatrix(y, ND, NE);
132
133 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
134 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
135
136 MFEM_SHARED real_t s_dot[NB*NB];
137 s_dot[tid] = 0.0;
138
139 for (int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }
140 MFEM_SYNC_THREAD;
141
142 if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }
143 MFEM_SYNC_THREAD;
144
145 if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }
146 MFEM_SYNC_THREAD;
147
148 if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }
149 MFEM_SYNC_THREAD;
150
151 if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }
152 MFEM_SYNC_THREAD;
153
154 if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }
155 MFEM_SYNC_THREAD;
156
157 if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }
158 MFEM_SYNC_THREAD;
159
160 if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }
161 MFEM_SYNC_THREAD;
162
163 if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }
164 MFEM_SYNC_THREAD;
165
166 if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }
167 MFEM_SYNC_THREAD;
168
169 if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }
170 MFEM_SYNC_THREAD;
171
172 return s_dot[0];
173}
174
175template<int T_D1D = 0>
176MFEM_HOST_DEVICE inline
177void DGMassBasis2D(const int e,
178 const int NE,
179 const real_t *b_,
180 const real_t *x_,
181 real_t *y_,
182 const int d1d = 0)
183{
184 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
185 const int D1D = T_D1D ? T_D1D : d1d;
186
187 const auto b = Reshape(b_, D1D, D1D);
188 const auto x = Reshape(x_, D1D, D1D, NE);
189 auto y = Reshape(y_, D1D, D1D, NE);
190
191 MFEM_SHARED real_t sB[MD1*MD1];
192 MFEM_SHARED real_t sm0[MD1*MD1];
193 MFEM_SHARED real_t sm1[MD1*MD1];
194
195 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
196
197 ConstDeviceMatrix B(sB, D1D,D1D);
198 DeviceMatrix DD(sm0, MD1, MD1);
199 DeviceMatrix DQ(sm1, MD1, MD1);
200 DeviceMatrix QQ(sm0, MD1, MD1);
201
202 kernels::internal::LoadX(e,D1D,x,DD);
203 kernels::internal::EvalX(D1D,D1D,B,DD,DQ);
204 kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);
205 MFEM_SYNC_THREAD; // sync here to allow in-place evaluations
206 MFEM_FOREACH_THREAD(qy,y,D1D)
207 {
208 MFEM_FOREACH_THREAD(qx,x,D1D)
209 {
210 y(qx,qy,e) = QQ(qx,qy);
211 }
212 }
213 MFEM_SYNC_THREAD;
214}
215
216template<int T_D1D = 0>
217MFEM_HOST_DEVICE inline
218void DGMassBasis3D(const int e,
219 const int NE,
220 const real_t *b_,
221 const real_t *x_,
222 real_t *y_,
223 const int d1d = 0)
224{
225 const int D1D = T_D1D ? T_D1D : d1d;
226
227 const auto b = Reshape(b_, D1D, D1D);
228 const auto x = Reshape(x_, D1D, D1D, D1D, NE);
229 auto y = Reshape(y_, D1D, D1D, D1D, NE);
230
231 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
232
233 MFEM_SHARED real_t sB[MD1*MD1];
234 MFEM_SHARED real_t sm0[MD1*MD1*MD1];
235 MFEM_SHARED real_t sm1[MD1*MD1*MD1];
236
237 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
238
239 ConstDeviceMatrix B(sB, D1D,D1D);
240 DeviceCube DDD(sm0, MD1,MD1,MD1);
241 DeviceCube DDQ(sm1, MD1,MD1,MD1);
242 DeviceCube DQQ(sm0, MD1,MD1,MD1);
243 DeviceCube QQQ(sm1, MD1,MD1,MD1);
244
245 kernels::internal::LoadX(e,D1D,x,DDD);
246 kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);
247 kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);
248 kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);
249 MFEM_SYNC_THREAD; // sync here to allow in-place evaluation
250 MFEM_FOREACH_THREAD(qz,z,D1D)
251 {
252 MFEM_FOREACH_THREAD(qy,y,D1D)
253 {
254 for (int qx = 0; qx < D1D; ++qx)
255 {
256 y(qx,qy,qz,e) = QQQ(qz,qy,qx);
257 }
258 }
259 }
260 MFEM_SYNC_THREAD;
261}
262
263template<int DIM, int T_D1D = 0>
264MFEM_HOST_DEVICE inline
265void DGMassBasis(const int e,
266 const int NE,
267 const real_t *b_,
268 const real_t *x_,
269 real_t *y_,
270 const int d1d = 0)
271{
272 if (DIM == 2)
273 {
274 DGMassBasis2D<T_D1D>(e, NE, b_, x_, y_, d1d);
275 }
276 else if (DIM == 3)
277 {
278 DGMassBasis3D<T_D1D>(e, NE, b_, x_, y_, d1d);
279 }
280 else
281 {
282 MFEM_ABORT_KERNEL("Dimension not supported.");
283 }
284}
285
286} // namespace internal
287
288} // namespace mfem
289
290#endif
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
constexpr int DIM
DeviceTensor< 2, real_t > DeviceMatrix
Definition dtensor.hpp:143
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
DeviceTensor< 2, const real_t > ConstDeviceMatrix
Definition dtensor.hpp:144
float real_t
Definition config.hpp:43
DeviceTensor< 3, real_t > DeviceCube
Definition dtensor.hpp:146