MFEM  v4.5.2
Finite element discretization library
dgmassinv_kernels.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_DGMASSINV_KERNELS_HPP
13 #define MFEM_DGMASSINV_KERNELS_HPP
14 
15 #include "bilininteg_mass_pa.hpp"
16 #include "../linalg/kernels.hpp"
17 #include "kernels.hpp"
18 
19 namespace mfem
20 {
21 
22 namespace internal
23 {
24 
25 void MakeReciprocal(int n, double *x)
26 {
27  MFEM_FORALL(i, n, x[i] = 1.0/x[i]; );
28 }
29 
30 template <int DIM, int D1D, int Q1D>
31 MFEM_HOST_DEVICE inline
32 void DGMassApply(const int e,
33  const int NE,
34  const double *B,
35  const double *Bt,
36  const double *pa_data,
37  const double *x,
38  double *y,
39  const int d1d = 0,
40  const int q1d = 0)
41 {
42  constexpr bool use_smem = (D1D > 0 && Q1D > 0);
43  constexpr bool ACCUM = false;
44  constexpr int NBZ = 1;
45  if (use_smem)
46  {
47  // cannot specialize functions below with D1D or Q1D equal to zero
48  // (this branch only runs with D1D and Q1D are both positive)
49  constexpr int TD1D = D1D ? D1D : 1;
50  constexpr int TQ1D = Q1D ? Q1D : 1;
51  if (DIM == 2)
52  {
53  SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);
54  }
55  else if (DIM == 3)
56  {
57  SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);
58  }
59  else
60  {
61  MFEM_ABORT_KERNEL("Unsupported dimension.");
62  }
63  }
64  else
65  {
66  if (DIM == 2)
67  {
68  PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
69  }
70  else if (DIM == 3)
71  {
72  PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
73  }
74  else
75  {
76  MFEM_ABORT_KERNEL("Unsupported dimension.");
77  }
78  }
79 }
80 
81 MFEM_HOST_DEVICE inline
82 void DGMassPreconditioner(const int e,
83  const int NE,
84  const int ND,
85  const double *dinv,
86  const double *x,
87  double *y)
88 {
89  const auto X = ConstDeviceMatrix(x, ND, NE);
90  const auto D = ConstDeviceMatrix(dinv, ND, NE);
91  auto Y = DeviceMatrix(y, ND, NE);
92 
93  const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
94  const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
95 
96  for (int i = tid; i < ND; i += bxy)
97  {
98  Y(i, e) = D(i, e)*X(i, e);
99  }
100  MFEM_SYNC_THREAD;
101 }
102 
103 MFEM_HOST_DEVICE inline
104 void DGMassAxpy(const int e,
105  const int NE,
106  const int ND,
107  const double a,
108  const double *x,
109  const double b,
110  const double *y,
111  double *z)
112 {
113  const auto X = ConstDeviceMatrix(x, ND, NE);
114  const auto Y = ConstDeviceMatrix(y, ND, NE);
115  auto Z = DeviceMatrix(z, ND, NE);
116 
117  const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
118  const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
119 
120  for (int i = tid; i < ND; i += bxy)
121  {
122  Z(i, e) = a*X(i, e) + b*Y(i, e);
123  }
124  MFEM_SYNC_THREAD;
125 }
126 
127 template <int NB>
128 MFEM_HOST_DEVICE inline
129 double DGMassDot(const int e,
130  const int NE,
131  const int ND,
132  const double *x,
133  const double *y)
134 {
135  const auto X = ConstDeviceMatrix(x, ND, NE);
136  const auto Y = ConstDeviceMatrix(y, ND, NE);
137 
138  const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
139  const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
140 
141  MFEM_SHARED double s_dot[NB*NB];
142  s_dot[tid] = 0.0;
143 
144  for (int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }
145  MFEM_SYNC_THREAD;
146 
147  if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }
148  MFEM_SYNC_THREAD;
149 
150  if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }
151  MFEM_SYNC_THREAD;
152 
153  if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }
154  MFEM_SYNC_THREAD;
155 
156  if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }
157  MFEM_SYNC_THREAD;
158 
159  if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }
160  MFEM_SYNC_THREAD;
161 
162  if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }
163  MFEM_SYNC_THREAD;
164 
165  if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }
166  MFEM_SYNC_THREAD;
167 
168  if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }
169  MFEM_SYNC_THREAD;
170 
171  if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }
172  MFEM_SYNC_THREAD;
173 
174  if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }
175  MFEM_SYNC_THREAD;
176 
177  return s_dot[0];
178 }
179 
180 template<int T_D1D = 0, int MAX_D1D = 0>
181 MFEM_HOST_DEVICE inline
182 void DGMassBasis2D(const int e,
183  const int NE,
184  const double *b_,
185  const double *x_,
186  double *y_,
187  const int d1d = 0)
188 {
189  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
190  const int D1D = T_D1D ? T_D1D : d1d;
191 
192  const auto b = Reshape(b_, D1D, D1D);
193  const auto x = Reshape(x_, D1D, D1D, NE);
194  auto y = Reshape(y_, D1D, D1D, NE);
195 
196  MFEM_SHARED double sB[MD1*MD1];
197  MFEM_SHARED double sm0[MD1*MD1];
198  MFEM_SHARED double sm1[MD1*MD1];
199 
200  kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
201 
202  ConstDeviceMatrix B(sB, D1D,D1D);
203  DeviceMatrix DD(sm0, MD1, MD1);
204  DeviceMatrix DQ(sm1, MD1, MD1);
205  DeviceMatrix QQ(sm0, MD1, MD1);
206 
207  kernels::internal::LoadX(e,D1D,x,DD);
208  kernels::internal::EvalX(D1D,D1D,B,DD,DQ);
209  kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);
210  MFEM_SYNC_THREAD; // sync here to allow in-place evaluations
211  MFEM_FOREACH_THREAD(qy,y,D1D)
212  {
213  MFEM_FOREACH_THREAD(qx,x,D1D)
214  {
215  y(qx,qy,e) = QQ(qx,qy);
216  }
217  }
218  MFEM_SYNC_THREAD;
219 }
220 
221 template<int T_D1D = 0, int MAX_D1D = 0>
222 MFEM_HOST_DEVICE inline
223 void DGMassBasis3D(const int e,
224  const int NE,
225  const double *b_,
226  const double *x_,
227  double *y_,
228  const int d1d = 0)
229 {
230  const int D1D = T_D1D ? T_D1D : d1d;
231 
232  const auto b = Reshape(b_, D1D, D1D);
233  const auto x = Reshape(x_, D1D, D1D, D1D, NE);
234  auto y = Reshape(y_, D1D, D1D, D1D, NE);
235 
236  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
237 
238  MFEM_SHARED double sB[MD1*MD1];
239  MFEM_SHARED double sm0[MD1*MD1*MD1];
240  MFEM_SHARED double sm1[MD1*MD1*MD1];
241 
242  kernels::internal::LoadB<MD1,MD1>(D1D,D1D,b,sB);
243 
244  ConstDeviceMatrix B(sB, D1D,D1D);
245  DeviceCube DDD(sm0, MD1,MD1,MD1);
246  DeviceCube DDQ(sm1, MD1,MD1,MD1);
247  DeviceCube DQQ(sm0, MD1,MD1,MD1);
248  DeviceCube QQQ(sm1, MD1,MD1,MD1);
249 
250  kernels::internal::LoadX(e,D1D,x,DDD);
251  kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);
252  kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);
253  kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);
254  MFEM_SYNC_THREAD; // sync here to allow in-place evaluation
255  MFEM_FOREACH_THREAD(qz,z,D1D)
256  {
257  MFEM_FOREACH_THREAD(qy,y,D1D)
258  {
259  for (int qx = 0; qx < D1D; ++qx)
260  {
261  y(qx,qy,qz,e) = QQQ(qz,qy,qx);
262  }
263  }
264  }
265  MFEM_SYNC_THREAD;
266 }
267 
268 template<int DIM, int T_D1D = 0, int MAX_D1D = 0>
269 MFEM_HOST_DEVICE inline
270 void DGMassBasis(const int e,
271  const int NE,
272  const double *b_,
273  const double *x_,
274  double *y_,
275  const int d1d = 0)
276 {
277  if (DIM == 2)
278  {
279  DGMassBasis2D<T_D1D, MAX_D1D>(e, NE, b_, x_, y_, d1d);
280  }
281  else if (DIM == 3)
282  {
283  DGMassBasis3D<T_D1D, MAX_D1D>(e, NE, b_, x_, y_, d1d);
284  }
285  else
286  {
287  MFEM_ABORT_KERNEL("Dimension not supported.");
288  }
289 }
290 
291 } // namespace internal
292 
293 } // namespace mfem
294 
295 #endif
DeviceTensor< 2, const double > ConstDeviceMatrix
Definition: dtensor.hpp:144
constexpr int DIM
DeviceTensor< 2, double > DeviceMatrix
Definition: dtensor.hpp:143
double b
Definition: lissajous.cpp:42
DeviceTensor< 3, double > DeviceCube
Definition: dtensor.hpp:146
double a
Definition: lissajous.cpp:41
const int MAX_D1D
Definition: forall.hpp:28
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131