MFEM  v4.5.2
Finite element discretization library
bilininteg_mass_pa.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_BILININTEG_MASS_PA_HPP
13 #define MFEM_BILININTEG_MASS_PA_HPP
14 
15 #include "../config/config.hpp"
16 #include "../general/forall.hpp"
17 #include "../linalg/dtensor.hpp"
18 
19 namespace mfem
20 {
21 
22 namespace internal
23 {
24 
25 template <bool ACCUMULATE = true>
26 MFEM_HOST_DEVICE inline
27 void PAMassApply2D_Element(const int e,
28  const int NE,
29  const double *b_,
30  const double *bt_,
31  const double *d_,
32  const double *x_,
33  double *y_,
34  const int d1d = 0,
35  const int q1d = 0)
36 {
37  const int D1D = d1d;
38  const int Q1D = q1d;
39  auto B = ConstDeviceMatrix(b_, Q1D, D1D);
40  auto Bt = ConstDeviceMatrix(bt_, D1D, Q1D);
41  auto D = ConstDeviceCube(d_, Q1D, Q1D, NE);
42  auto X = ConstDeviceCube(x_, D1D, D1D, NE);
43  auto Y = DeviceCube(y_, D1D, D1D, NE);
44 
45  if (!ACCUMULATE)
46  {
47  for (int dy = 0; dy < D1D; ++dy)
48  {
49  for (int dx = 0; dx < D1D; ++dx)
50  {
51  Y(dx, dy, e) = 0.0;
52  }
53  }
54  }
55 
56  constexpr int max_D1D = MAX_D1D;
57  constexpr int max_Q1D = MAX_Q1D;
58  double sol_xy[max_Q1D][max_Q1D];
59  for (int qy = 0; qy < Q1D; ++qy)
60  {
61  for (int qx = 0; qx < Q1D; ++qx)
62  {
63  sol_xy[qy][qx] = 0.0;
64  }
65  }
66  for (int dy = 0; dy < D1D; ++dy)
67  {
68  double sol_x[max_Q1D];
69  for (int qy = 0; qy < Q1D; ++qy)
70  {
71  sol_x[qy] = 0.0;
72  }
73  for (int dx = 0; dx < D1D; ++dx)
74  {
75  const double s = X(dx,dy,e);
76  for (int qx = 0; qx < Q1D; ++qx)
77  {
78  sol_x[qx] += B(qx,dx)* s;
79  }
80  }
81  for (int qy = 0; qy < Q1D; ++qy)
82  {
83  const double d2q = B(qy,dy);
84  for (int qx = 0; qx < Q1D; ++qx)
85  {
86  sol_xy[qy][qx] += d2q * sol_x[qx];
87  }
88  }
89  }
90  for (int qy = 0; qy < Q1D; ++qy)
91  {
92  for (int qx = 0; qx < Q1D; ++qx)
93  {
94  sol_xy[qy][qx] *= D(qx,qy,e);
95  }
96  }
97  for (int qy = 0; qy < Q1D; ++qy)
98  {
99  double sol_x[max_D1D];
100  for (int dx = 0; dx < D1D; ++dx)
101  {
102  sol_x[dx] = 0.0;
103  }
104  for (int qx = 0; qx < Q1D; ++qx)
105  {
106  const double s = sol_xy[qy][qx];
107  for (int dx = 0; dx < D1D; ++dx)
108  {
109  sol_x[dx] += Bt(dx,qx) * s;
110  }
111  }
112  for (int dy = 0; dy < D1D; ++dy)
113  {
114  const double q2d = Bt(dy,qy);
115  for (int dx = 0; dx < D1D; ++dx)
116  {
117  Y(dx,dy,e) += q2d * sol_x[dx];
118  }
119  }
120  }
121 }
122 
123 template<int T_D1D, int T_Q1D, int T_NBZ, bool ACCUMULATE = true>
124 MFEM_HOST_DEVICE inline
125 void SmemPAMassApply2D_Element(const int e,
126  const int NE,
127  const double *b_,
128  const double *d_,
129  const double *x_,
130  double *y_,
131  int d1d = 0,
132  int q1d = 0)
133 {
134  const int D1D = T_D1D ? T_D1D : d1d;
135  const int Q1D = T_Q1D ? T_Q1D : q1d;
136  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
137 
138  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
139  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
140  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
141 
142  auto b = ConstDeviceMatrix(b_, Q1D, D1D);
143  auto D = ConstDeviceCube(d_, Q1D, Q1D, NE);
144  auto x = ConstDeviceCube(x_, D1D, D1D, NE);
145  auto Y = DeviceCube(y_, D1D, D1D, NE);
146 
147  const int tidz = MFEM_THREAD_ID(z);
148 
149  MFEM_SHARED double BBt[MQ1*MD1];
150  double (*B)[MD1] = (double (*)[MD1]) BBt;
151  double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
152  MFEM_SHARED double sm0[NBZ][MDQ*MDQ];
153  MFEM_SHARED double sm1[NBZ][MDQ*MDQ];
154  double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
155  double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
156  double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
157  double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
158 
159 
160  MFEM_FOREACH_THREAD(dy,y,D1D)
161  {
162  MFEM_FOREACH_THREAD(dx,x,D1D)
163  {
164  X[dy][dx] = x(dx,dy,e);
165  }
166  }
167  if (tidz == 0)
168  {
169  MFEM_FOREACH_THREAD(dy,y,D1D)
170  {
171  MFEM_FOREACH_THREAD(q,x,Q1D)
172  {
173  B[q][dy] = b(q,dy);
174  }
175  }
176  }
177  MFEM_SYNC_THREAD;
178  MFEM_FOREACH_THREAD(dy,y,D1D)
179  {
180  MFEM_FOREACH_THREAD(qx,x,Q1D)
181  {
182  double dq = 0.0;
183  for (int dx = 0; dx < D1D; ++dx)
184  {
185  dq += X[dy][dx] * B[qx][dx];
186  }
187  DQ[dy][qx] = dq;
188  }
189  }
190  MFEM_SYNC_THREAD;
191  MFEM_FOREACH_THREAD(qy,y,Q1D)
192  {
193  MFEM_FOREACH_THREAD(qx,x,Q1D)
194  {
195  double qq = 0.0;
196  for (int dy = 0; dy < D1D; ++dy)
197  {
198  qq += DQ[dy][qx] * B[qy][dy];
199  }
200  QQ[qy][qx] = qq * D(qx, qy, e);
201  }
202  }
203  MFEM_SYNC_THREAD;
204  if (tidz == 0)
205  {
206  MFEM_FOREACH_THREAD(dy,y,D1D)
207  {
208  MFEM_FOREACH_THREAD(q,x,Q1D)
209  {
210  Bt[dy][q] = b(q,dy);
211  }
212  }
213  }
214  MFEM_SYNC_THREAD;
215  MFEM_FOREACH_THREAD(qy,y,Q1D)
216  {
217  MFEM_FOREACH_THREAD(dx,x,D1D)
218  {
219  double dq = 0.0;
220  for (int qx = 0; qx < Q1D; ++qx)
221  {
222  dq += QQ[qy][qx] * Bt[dx][qx];
223  }
224  QD[qy][dx] = dq;
225  }
226  }
227  MFEM_SYNC_THREAD;
228  MFEM_FOREACH_THREAD(dy,y,D1D)
229  {
230  MFEM_FOREACH_THREAD(dx,x,D1D)
231  {
232  double dd = 0.0;
233  for (int qy = 0; qy < Q1D; ++qy)
234  {
235  dd += (QD[qy][dx] * Bt[dy][qy]);
236  }
237  if (ACCUMULATE)
238  {
239  Y(dx, dy, e) += dd;
240  }
241  else
242  {
243  Y(dx, dy, e) = dd;
244  }
245  }
246  }
247 }
248 
249 template <bool ACCUMULATE = true>
250 MFEM_HOST_DEVICE inline
251 void PAMassApply3D_Element(const int e,
252  const int NE,
253  const double *b_,
254  const double *bt_,
255  const double *d_,
256  const double *x_,
257  double *y_,
258  const int d1d,
259  const int q1d)
260 {
261  const int D1D = d1d;
262  const int Q1D = q1d;
263  auto B = ConstDeviceMatrix(b_, Q1D, D1D);
264  auto Bt = ConstDeviceMatrix(bt_, D1D, Q1D);
265  auto D = DeviceTensor<4,const double>(d_, Q1D, Q1D, Q1D, NE);
266  auto X = DeviceTensor<4,const double>(x_, D1D, D1D, D1D, NE);
267  auto Y = DeviceTensor<4,double>(y_, D1D, D1D, D1D, NE);
268 
269  if (!ACCUMULATE)
270  {
271  for (int dz = 0; dz < D1D; ++dz)
272  {
273  for (int dy = 0; dy < D1D; ++dy)
274  {
275  for (int dx = 0; dx < D1D; ++dx)
276  {
277  Y(dx, dy, dz, e) = 0.0;
278  }
279  }
280  }
281  }
282 
283  constexpr int max_D1D = MAX_D1D;
284  constexpr int max_Q1D = MAX_Q1D;
285  double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
286  for (int qz = 0; qz < Q1D; ++qz)
287  {
288  for (int qy = 0; qy < Q1D; ++qy)
289  {
290  for (int qx = 0; qx < Q1D; ++qx)
291  {
292  sol_xyz[qz][qy][qx] = 0.0;
293  }
294  }
295  }
296  for (int dz = 0; dz < D1D; ++dz)
297  {
298  double sol_xy[max_Q1D][max_Q1D];
299  for (int qy = 0; qy < Q1D; ++qy)
300  {
301  for (int qx = 0; qx < Q1D; ++qx)
302  {
303  sol_xy[qy][qx] = 0.0;
304  }
305  }
306  for (int dy = 0; dy < D1D; ++dy)
307  {
308  double sol_x[max_Q1D];
309  for (int qx = 0; qx < Q1D; ++qx)
310  {
311  sol_x[qx] = 0;
312  }
313  for (int dx = 0; dx < D1D; ++dx)
314  {
315  const double s = X(dx,dy,dz,e);
316  for (int qx = 0; qx < Q1D; ++qx)
317  {
318  sol_x[qx] += B(qx,dx) * s;
319  }
320  }
321  for (int qy = 0; qy < Q1D; ++qy)
322  {
323  const double wy = B(qy,dy);
324  for (int qx = 0; qx < Q1D; ++qx)
325  {
326  sol_xy[qy][qx] += wy * sol_x[qx];
327  }
328  }
329  }
330  for (int qz = 0; qz < Q1D; ++qz)
331  {
332  const double wz = B(qz,dz);
333  for (int qy = 0; qy < Q1D; ++qy)
334  {
335  for (int qx = 0; qx < Q1D; ++qx)
336  {
337  sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
338  }
339  }
340  }
341  }
342  for (int qz = 0; qz < Q1D; ++qz)
343  {
344  for (int qy = 0; qy < Q1D; ++qy)
345  {
346  for (int qx = 0; qx < Q1D; ++qx)
347  {
348  sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
349  }
350  }
351  }
352  for (int qz = 0; qz < Q1D; ++qz)
353  {
354  double sol_xy[max_D1D][max_D1D];
355  for (int dy = 0; dy < D1D; ++dy)
356  {
357  for (int dx = 0; dx < D1D; ++dx)
358  {
359  sol_xy[dy][dx] = 0;
360  }
361  }
362  for (int qy = 0; qy < Q1D; ++qy)
363  {
364  double sol_x[max_D1D];
365  for (int dx = 0; dx < D1D; ++dx)
366  {
367  sol_x[dx] = 0;
368  }
369  for (int qx = 0; qx < Q1D; ++qx)
370  {
371  const double s = sol_xyz[qz][qy][qx];
372  for (int dx = 0; dx < D1D; ++dx)
373  {
374  sol_x[dx] += Bt(dx,qx) * s;
375  }
376  }
377  for (int dy = 0; dy < D1D; ++dy)
378  {
379  const double wy = Bt(dy,qy);
380  for (int dx = 0; dx < D1D; ++dx)
381  {
382  sol_xy[dy][dx] += wy * sol_x[dx];
383  }
384  }
385  }
386  for (int dz = 0; dz < D1D; ++dz)
387  {
388  const double wz = Bt(dz,qz);
389  for (int dy = 0; dy < D1D; ++dy)
390  {
391  for (int dx = 0; dx < D1D; ++dx)
392  {
393  Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
394  }
395  }
396  }
397  }
398 }
399 
400 template<int T_D1D, int T_Q1D, bool ACCUMULATE = true>
401 MFEM_HOST_DEVICE inline
402 void SmemPAMassApply3D_Element(const int e,
403  const int NE,
404  const double *b_,
405  const double *d_,
406  const double *x_,
407  double *y_,
408  const int d1d = 0,
409  const int q1d = 0)
410 {
411  constexpr int D1D = T_D1D ? T_D1D : d1d;
412  constexpr int Q1D = T_Q1D ? T_Q1D : q1d;
413  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
414  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
415  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
416 
417  auto b = ConstDeviceMatrix(b_, Q1D, D1D);
418  auto d = DeviceTensor<4,const double>(d_, Q1D, Q1D, Q1D, NE);
419  auto x = DeviceTensor<4,const double>(x_, D1D, D1D, D1D, NE);
420  auto y = DeviceTensor<4,double>(y_, D1D, D1D, D1D, NE);
421 
422  MFEM_SHARED double sDQ[MQ1*MD1];
423  double (*B)[MD1] = (double (*)[MD1]) sDQ;
424  double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
425  MFEM_SHARED double sm0[MDQ*MDQ*MDQ];
426  MFEM_SHARED double sm1[MDQ*MDQ*MDQ];
427  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
428  double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
429  double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
430  double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
431  double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
432  double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
433  MFEM_FOREACH_THREAD(dy,y,D1D)
434  {
435  MFEM_FOREACH_THREAD(dx,x,D1D)
436  {
437  MFEM_UNROLL(MD1)
438  for (int dz = 0; dz < D1D; ++dz)
439  {
440  X[dz][dy][dx] = x(dx,dy,dz,e);
441  }
442  }
443  MFEM_FOREACH_THREAD(dx,x,Q1D)
444  {
445  B[dx][dy] = b(dx,dy);
446  }
447  }
448  MFEM_SYNC_THREAD;
449  MFEM_FOREACH_THREAD(dy,y,D1D)
450  {
451  MFEM_FOREACH_THREAD(qx,x,Q1D)
452  {
453  double u[D1D];
454  MFEM_UNROLL(MD1)
455  for (int dz = 0; dz < D1D; dz++)
456  {
457  u[dz] = 0;
458  }
459  MFEM_UNROLL(MD1)
460  for (int dx = 0; dx < D1D; ++dx)
461  {
462  MFEM_UNROLL(MD1)
463  for (int dz = 0; dz < D1D; ++dz)
464  {
465  u[dz] += X[dz][dy][dx] * B[qx][dx];
466  }
467  }
468  MFEM_UNROLL(MD1)
469  for (int dz = 0; dz < D1D; ++dz)
470  {
471  DDQ[dz][dy][qx] = u[dz];
472  }
473  }
474  }
475  MFEM_SYNC_THREAD;
476  MFEM_FOREACH_THREAD(qy,y,Q1D)
477  {
478  MFEM_FOREACH_THREAD(qx,x,Q1D)
479  {
480  double u[D1D];
481  MFEM_UNROLL(MD1)
482  for (int dz = 0; dz < D1D; dz++)
483  {
484  u[dz] = 0;
485  }
486  MFEM_UNROLL(MD1)
487  for (int dy = 0; dy < D1D; ++dy)
488  {
489  MFEM_UNROLL(MD1)
490  for (int dz = 0; dz < D1D; dz++)
491  {
492  u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
493  }
494  }
495  MFEM_UNROLL(MD1)
496  for (int dz = 0; dz < D1D; dz++)
497  {
498  DQQ[dz][qy][qx] = u[dz];
499  }
500  }
501  }
502  MFEM_SYNC_THREAD;
503  MFEM_FOREACH_THREAD(qy,y,Q1D)
504  {
505  MFEM_FOREACH_THREAD(qx,x,Q1D)
506  {
507  double u[Q1D];
508  MFEM_UNROLL(MQ1)
509  for (int qz = 0; qz < Q1D; qz++)
510  {
511  u[qz] = 0;
512  }
513  MFEM_UNROLL(MD1)
514  for (int dz = 0; dz < D1D; ++dz)
515  {
516  MFEM_UNROLL(MQ1)
517  for (int qz = 0; qz < Q1D; qz++)
518  {
519  u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
520  }
521  }
522  MFEM_UNROLL(MQ1)
523  for (int qz = 0; qz < Q1D; qz++)
524  {
525  QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
526  }
527  }
528  }
529  MFEM_SYNC_THREAD;
530  MFEM_FOREACH_THREAD(di,y,D1D)
531  {
532  MFEM_FOREACH_THREAD(q,x,Q1D)
533  {
534  Bt[di][q] = b(q,di);
535  }
536  }
537  MFEM_SYNC_THREAD;
538  MFEM_FOREACH_THREAD(qy,y,Q1D)
539  {
540  MFEM_FOREACH_THREAD(dx,x,D1D)
541  {
542  double u[Q1D];
543  MFEM_UNROLL(MQ1)
544  for (int qz = 0; qz < Q1D; ++qz)
545  {
546  u[qz] = 0;
547  }
548  MFEM_UNROLL(MQ1)
549  for (int qx = 0; qx < Q1D; ++qx)
550  {
551  MFEM_UNROLL(MQ1)
552  for (int qz = 0; qz < Q1D; ++qz)
553  {
554  u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
555  }
556  }
557  MFEM_UNROLL(MQ1)
558  for (int qz = 0; qz < Q1D; ++qz)
559  {
560  QQD[qz][qy][dx] = u[qz];
561  }
562  }
563  }
564  MFEM_SYNC_THREAD;
565  MFEM_FOREACH_THREAD(dy,y,D1D)
566  {
567  MFEM_FOREACH_THREAD(dx,x,D1D)
568  {
569  double u[Q1D];
570  MFEM_UNROLL(MQ1)
571  for (int qz = 0; qz < Q1D; ++qz)
572  {
573  u[qz] = 0;
574  }
575  MFEM_UNROLL(MQ1)
576  for (int qy = 0; qy < Q1D; ++qy)
577  {
578  MFEM_UNROLL(MQ1)
579  for (int qz = 0; qz < Q1D; ++qz)
580  {
581  u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
582  }
583  }
584  MFEM_UNROLL(MQ1)
585  for (int qz = 0; qz < Q1D; ++qz)
586  {
587  QDD[qz][dy][dx] = u[qz];
588  }
589  }
590  }
591  MFEM_SYNC_THREAD;
592  MFEM_FOREACH_THREAD(dy,y,D1D)
593  {
594  MFEM_FOREACH_THREAD(dx,x,D1D)
595  {
596  double u[D1D];
597  MFEM_UNROLL(MD1)
598  for (int dz = 0; dz < D1D; ++dz)
599  {
600  u[dz] = 0;
601  }
602  MFEM_UNROLL(MQ1)
603  for (int qz = 0; qz < Q1D; ++qz)
604  {
605  MFEM_UNROLL(MD1)
606  for (int dz = 0; dz < D1D; ++dz)
607  {
608  u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
609  }
610  }
611  MFEM_UNROLL(MD1)
612  for (int dz = 0; dz < D1D; ++dz)
613  {
614  if (ACCUMULATE)
615  {
616  y(dx,dy,dz,e) += u[dz];
617  }
618  else
619  {
620  y(dx,dy,dz,e) = u[dz];
621  }
622  }
623  }
624  }
625  MFEM_SYNC_THREAD;
626 }
627 
628 } // namespace internal
629 
630 } // namespace mfem
631 
632 #endif
DeviceTensor< 2, const double > ConstDeviceMatrix
Definition: dtensor.hpp:144
const int MAX_Q1D
Definition: forall.hpp:29
double b
Definition: lissajous.cpp:42
DeviceTensor< 3, double > DeviceCube
Definition: dtensor.hpp:146
const int MAX_D1D
Definition: forall.hpp:28
RefCoord s[3]
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
DeviceTensor< 3, const double > ConstDeviceCube
Definition: dtensor.hpp:147