MFEM  v4.6.0
Finite element discretization library
dgmassinv_kernels.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_DGMASSINV_KERNELS_HPP
13 #define MFEM_DGMASSINV_KERNELS_HPP
14 
15 #include "../linalg/kernels.hpp"
16 #include "kernels.hpp"
17 #include "integ/bilininteg_mass_kernels.hpp"
18 
19 namespace mfem
20 {
21 
22 namespace internal
23 {
24 
25 template <int DIM, int D1D, int Q1D>
26 MFEM_HOST_DEVICE inline
27 void DGMassApply(const int e,
28  const int NE,
29  const double *B,
30  const double *Bt,
31  const double *pa_data,
32  const double *x,
33  double *y,
34  const int d1d = 0,
35  const int q1d = 0)
36 {
37  constexpr bool use_smem = (D1D > 0 && Q1D > 0);
38  constexpr bool ACCUM = false;
39  constexpr int NBZ = 1;
40  if (use_smem)
41  {
42  // cannot specialize functions below with D1D or Q1D equal to zero
43  // (this branch only runs with D1D and Q1D are both positive)
44  constexpr int TD1D = D1D ? D1D : 1;
45  constexpr int TQ1D = Q1D ? Q1D : 1;
46  if (DIM == 2)
47  {
48  SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);
49  }
50  else if (DIM == 3)
51  {
52  SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);
53  }
54  else
55  {
56  MFEM_ABORT_KERNEL("Unsupported dimension.");
57  }
58  }
59  else
60  {
61  if (DIM == 2)
62  {
63  PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
64  }
65  else if (DIM == 3)
66  {
67  PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
68  }
69  else
70  {
71  MFEM_ABORT_KERNEL("Unsupported dimension.");
72  }
73  }
74 }
75 
76 MFEM_HOST_DEVICE inline
77 void DGMassPreconditioner(const int e,
78  const int NE,
79  const int ND,
80  const double *dinv,
81  const double *x,
82  double *y)
83 {
84  const auto X = ConstDeviceMatrix(x, ND, NE);
85  const auto D = ConstDeviceMatrix(dinv, ND, NE);
86  auto Y = DeviceMatrix(y, ND, NE);
87 
88  const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
89  const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
90 
91  for (int i = tid; i < ND; i += bxy)
92  {
93  Y(i, e) = D(i, e)*X(i, e);
94  }
95  MFEM_SYNC_THREAD;
96 }
97 
98 MFEM_HOST_DEVICE inline
99 void DGMassAxpy(const int e,
100  const int NE,
101  const int ND,
102  const double a,
103  const double *x,
104  const double b,
105  const double *y,
106  double *z)
107 {
108  const auto X = ConstDeviceMatrix(x, ND, NE);
109  const auto Y = ConstDeviceMatrix(y, ND, NE);
110  auto Z = DeviceMatrix(z, ND, NE);
111 
112  const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
113  const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
114 
115  for (int i = tid; i < ND; i += bxy)
116  {
117  Z(i, e) = a*X(i, e) + b*Y(i, e);
118  }
119  MFEM_SYNC_THREAD;
120 }
121 
122 template <int NB>
123 MFEM_HOST_DEVICE inline
124 double DGMassDot(const int e,
125  const int NE,
126  const int ND,
127  const double *x,
128  const double *y)
129 {
130  const auto X = ConstDeviceMatrix(x, ND, NE);
131  const auto Y = ConstDeviceMatrix(y, ND, NE);
132 
133  const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
134  const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
135 
136  MFEM_SHARED double s_dot[NB*NB];
137  s_dot[tid] = 0.0;
138 
139  for (int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }
140  MFEM_SYNC_THREAD;
141 
142  if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }
143  MFEM_SYNC_THREAD;
144 
145  if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }
146  MFEM_SYNC_THREAD;
147 
148  if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }
149  MFEM_SYNC_THREAD;
150 
151  if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }
152  MFEM_SYNC_THREAD;
153 
154  if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }
155  MFEM_SYNC_THREAD;
156 
157  if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }
158  MFEM_SYNC_THREAD;
159 
160  if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }
161  MFEM_SYNC_THREAD;
162 
163  if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }
164  MFEM_SYNC_THREAD;
165 
166  if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }
167  MFEM_SYNC_THREAD;
168 
169  if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }
170  MFEM_SYNC_THREAD;
171 
172  return s_dot[0];
173 }
174 
175 template<int T_D1D = 0>
176 MFEM_HOST_DEVICE inline
177 void DGMassBasis2D(const int e,
178  const int NE,
179  const double *b_,
180  const double *x_,
181  double *y_,
182  const int d1d = 0)
183 {
184  constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
185  const int D1D = T_D1D ? T_D1D : d1d;
186 
187  const auto b = Reshape(b_, D1D, D1D);
188  const auto x = Reshape(x_, D1D, D1D, NE);
189  auto y = Reshape(y_, D1D, D1D, NE);
190 
191  MFEM_SHARED double sB[MD1*MD1];
192  MFEM_SHARED double sm0[MD1*MD1];
193  MFEM_SHARED double sm1[MD1*MD1];
194 
195  kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
196 
197  ConstDeviceMatrix B(sB, D1D,D1D);
198  DeviceMatrix DD(sm0, MD1, MD1);
199  DeviceMatrix DQ(sm1, MD1, MD1);
200  DeviceMatrix QQ(sm0, MD1, MD1);
201 
202  kernels::internal::LoadX(e,D1D,x,DD);
203  kernels::internal::EvalX(D1D,D1D,B,DD,DQ);
204  kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);
205  MFEM_SYNC_THREAD; // sync here to allow in-place evaluations
206  MFEM_FOREACH_THREAD(qy,y,D1D)
207  {
208  MFEM_FOREACH_THREAD(qx,x,D1D)
209  {
210  y(qx,qy,e) = QQ(qx,qy);
211  }
212  }
213  MFEM_SYNC_THREAD;
214 }
215 
216 template<int T_D1D = 0>
217 MFEM_HOST_DEVICE inline
218 void DGMassBasis3D(const int e,
219  const int NE,
220  const double *b_,
221  const double *x_,
222  double *y_,
223  const int d1d = 0)
224 {
225  const int D1D = T_D1D ? T_D1D : d1d;
226 
227  const auto b = Reshape(b_, D1D, D1D);
228  const auto x = Reshape(x_, D1D, D1D, D1D, NE);
229  auto y = Reshape(y_, D1D, D1D, D1D, NE);
230 
231  constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
232 
233  MFEM_SHARED double sB[MD1*MD1];
234  MFEM_SHARED double sm0[MD1*MD1*MD1];
235  MFEM_SHARED double sm1[MD1*MD1*MD1];
236 
237  kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
238 
239  ConstDeviceMatrix B(sB, D1D,D1D);
240  DeviceCube DDD(sm0, MD1,MD1,MD1);
241  DeviceCube DDQ(sm1, MD1,MD1,MD1);
242  DeviceCube DQQ(sm0, MD1,MD1,MD1);
243  DeviceCube QQQ(sm1, MD1,MD1,MD1);
244 
245  kernels::internal::LoadX(e,D1D,x,DDD);
246  kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);
247  kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);
248  kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);
249  MFEM_SYNC_THREAD; // sync here to allow in-place evaluation
250  MFEM_FOREACH_THREAD(qz,z,D1D)
251  {
252  MFEM_FOREACH_THREAD(qy,y,D1D)
253  {
254  for (int qx = 0; qx < D1D; ++qx)
255  {
256  y(qx,qy,qz,e) = QQQ(qz,qy,qx);
257  }
258  }
259  }
260  MFEM_SYNC_THREAD;
261 }
262 
263 template<int DIM, int T_D1D = 0>
264 MFEM_HOST_DEVICE inline
265 void DGMassBasis(const int e,
266  const int NE,
267  const double *b_,
268  const double *x_,
269  double *y_,
270  const int d1d = 0)
271 {
272  if (DIM == 2)
273  {
274  DGMassBasis2D<T_D1D>(e, NE, b_, x_, y_, d1d);
275  }
276  else if (DIM == 3)
277  {
278  DGMassBasis3D<T_D1D>(e, NE, b_, x_, y_, d1d);
279  }
280  else
281  {
282  MFEM_ABORT_KERNEL("Dimension not supported.");
283  }
284 }
285 
286 } // namespace internal
287 
288 } // namespace mfem
289 
290 #endif
DeviceTensor< 2, const double > ConstDeviceMatrix
Definition: dtensor.hpp:144
constexpr int DIM
DeviceTensor< 2, double > DeviceMatrix
Definition: dtensor.hpp:143
double b
Definition: lissajous.cpp:42
DeviceTensor< 3, double > DeviceCube
Definition: dtensor.hpp:146
double a
Definition: lissajous.cpp:41
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131