MFEM  v4.5.1
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_hdiv.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "qspace.hpp"
16 
17 using namespace std;
18 
19 
20 // Piola transformation in H(div): w = (1 / det (dF)) dF \hat{w}
21 // div w = (1 / det (dF)) \hat{div} \hat{w}
22 
23 namespace mfem
24 {
25 
26 // PA H(div) Mass Assemble 2D kernel
27 void PAHdivSetup2D(const int Q1D,
28  const int coeffDim,
29  const int NE,
30  const Array<double> &w,
31  const Vector &j,
32  Vector &coeff_,
33  Vector &op)
34 {
35  const bool symmetric = (coeffDim != 4);
36  const int NQ = Q1D*Q1D;
37  auto W = w.Read();
38 
39  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
40  auto C = Reshape(coeff_.Read(), coeffDim, NQ, NE);
41  auto y = Reshape(op.Write(), NQ, symmetric ? 3 : 4, NE);
42 
43  MFEM_FORALL(e, NE,
44  {
45  for (int q = 0; q < NQ; ++q)
46  {
47  const double J11 = J(q,0,0,e);
48  const double J21 = J(q,1,0,e);
49  const double J12 = J(q,0,1,e);
50  const double J22 = J(q,1,1,e);
51  const double c_detJ = W[q] / ((J11*J22)-(J21*J12));
52 
53  // (1/detJ) J^T C J
54  if (coeffDim == 3 || coeffDim == 4) // Matrix coefficient
55  {
56  const double C11 = C(0,q,e);
57  const double C12 = C(1,q,e);
58  const double C21 = symmetric ? C12 : C(2,q,e);
59  const double C22 = symmetric ? C(2,q,e) : C(3,q,e);
60  const double R11 = C11*J11 + C12*J21;
61  const double R21 = C21*J11 + C22*J21;
62  const double R12 = C11*J12 + C12*J22;
63  const double R22 = C21*J12 + C22*J22;
64 
65  y(q,0,e) = c_detJ * (J11*R11 + J21*R21); // 1,1
66  y(q,1,e) = c_detJ * (J11*R12 + J21*R22); // 1,2
67 
68  if (symmetric)
69  {
70  y(q,2,e) = c_detJ * (J12*R12 + J22*R22); // 2,2
71  }
72  else
73  {
74  y(q,2,e) = c_detJ * (J12*R11 + J22*R21); // 2,1
75  y(q,3,e) = c_detJ * (J12*R12 + J22*R22); // 2,2
76  }
77  }
78  else // Vector or scalar coefficient
79  {
80  const double C1 = C(0,q,e);
81  const double C2 = (coeffDim == 2 ? C(1,q,e) : C1);
82  y(q,0,e) = c_detJ * (J11*C1*J11 + J21*C2*J21); // 1,1
83  y(q,1,e) = c_detJ * (J11*C1*J12 + J21*C2*J22); // 1,2
84  y(q,2,e) = c_detJ * (J12*C1*J12 + J22*C2*J22); // 2,2
85  }
86  }
87  });
88 }
89 
90 // PA H(div) Mass Assemble 3D kernel
91 void PAHdivSetup3D(const int Q1D,
92  const int coeffDim,
93  const int NE,
94  const Array<double> &w,
95  const Vector &j,
96  Vector &coeff_,
97  Vector &op)
98 {
99  const bool symmetric = (coeffDim != 9);
100  const int NQ = Q1D*Q1D*Q1D;
101  auto W = w.Read();
102  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
103  auto C = Reshape(coeff_.Read(), coeffDim, NQ, NE);
104  auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
105 
106  MFEM_FORALL(e, NE,
107  {
108  for (int q = 0; q < NQ; ++q)
109  {
110  const double J11 = J(q,0,0,e);
111  const double J21 = J(q,1,0,e);
112  const double J31 = J(q,2,0,e);
113  const double J12 = J(q,0,1,e);
114  const double J22 = J(q,1,1,e);
115  const double J32 = J(q,2,1,e);
116  const double J13 = J(q,0,2,e);
117  const double J23 = J(q,1,2,e);
118  const double J33 = J(q,2,2,e);
119  const double detJ = J11 * (J22 * J33 - J32 * J23) -
120  /* */ J21 * (J12 * J33 - J32 * J13) +
121  /* */ J31 * (J12 * J23 - J22 * J13);
122  const double c_detJ = W[q] / detJ;
123 
124  // (1/detJ) J^T C J
125  if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
126  {
127  double M[3][3];
128  M[0][0] = C(0, q, e);
129  M[0][1] = C(1, q, e);
130  M[0][2] = C(2, q, e);
131  M[1][0] = (!symmetric) ? C(3, q, e) : M[0][1];
132  M[1][1] = (!symmetric) ? C(4, q, e) : C(3, q, e);
133  M[1][2] = (!symmetric) ? C(5, q, e) : C(4, q, e);
134  M[2][0] = (!symmetric) ? C(6, q, e) : M[0][2];
135  M[2][1] = (!symmetric) ? C(7, q, e) : M[1][2];
136  M[2][2] = (!symmetric) ? C(8, q, e) : C(5, q, e);
137 
138  int idx = 0;
139  for (int i=0; i<3; ++i)
140  for (int j = (symmetric ? i : 0); j<3; ++j)
141  {
142  y(q,idx,e) = 0.0;
143  for (int k=0; k<3; ++k)
144  {
145  double MJ_kj = 0.0;
146  for (int l=0; l<3; ++l)
147  {
148  MJ_kj += M[k][l] * J(q,l,j,e);
149  }
150 
151  y(q,idx,e) += J(q,k,i,e) * MJ_kj;
152  }
153 
154  y(q,idx,e) *= c_detJ;
155  idx++;
156  }
157  }
158  else // Vector or scalar coefficient version
159  {
160  int idx = 0;
161  for (int i=0; i<3; ++i)
162  for (int j=i; j<3; ++j)
163  {
164  y(q,idx,e) = 0.0;
165  for (int k=0; k<3; ++k)
166  {
167  y(q,idx,e) += J(q,k,i,e) * C(coeffDim == 3 ? k : 0, q, e) * J(q,k,j,e);
168  }
169 
170  y(q,idx,e) *= c_detJ;
171  idx++;
172  }
173  }
174  }
175  });
176 }
177 
178 void PAHdivMassApply2D(const int D1D,
179  const int Q1D,
180  const int NE,
181  const bool symmetric,
182  const Array<double> &Bo_,
183  const Array<double> &Bc_,
184  const Array<double> &Bot_,
185  const Array<double> &Bct_,
186  const Vector &op_,
187  const Vector &x_,
188  Vector &y_)
189 {
190  constexpr static int VDIM = 2;
191  constexpr static int MAX_D1D = HDIV_MAX_D1D;
192  constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
193 
194  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
195  auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
196  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
197  auto Bct = Reshape(Bct_.Read(), D1D, Q1D);
198  auto op = Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
199  auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
200  auto y = Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
201 
202  MFEM_FORALL(e, NE,
203  {
204  double mass[MAX_Q1D][MAX_Q1D][VDIM];
205 
206  for (int qy = 0; qy < Q1D; ++qy)
207  {
208  for (int qx = 0; qx < Q1D; ++qx)
209  {
210  for (int c = 0; c < VDIM; ++c)
211  {
212  mass[qy][qx][c] = 0.0;
213  }
214  }
215  }
216 
217  int osc = 0;
218 
219  for (int c = 0; c < VDIM; ++c) // loop over x, y components
220  {
221  const int D1Dx = (c == 1) ? D1D - 1 : D1D;
222  const int D1Dy = (c == 0) ? D1D - 1 : D1D;
223 
224  for (int dy = 0; dy < D1Dy; ++dy)
225  {
226  double massX[MAX_Q1D];
227  for (int qx = 0; qx < Q1D; ++qx)
228  {
229  massX[qx] = 0.0;
230  }
231 
232  for (int dx = 0; dx < D1Dx; ++dx)
233  {
234  const double t = x(dx + (dy * D1Dx) + osc, e);
235  for (int qx = 0; qx < Q1D; ++qx)
236  {
237  massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
238  }
239  }
240 
241  for (int qy = 0; qy < Q1D; ++qy)
242  {
243  const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
244  for (int qx = 0; qx < Q1D; ++qx)
245  {
246  mass[qy][qx][c] += massX[qx] * wy;
247  }
248  }
249  }
250 
251  osc += D1Dx * D1Dy;
252  } // loop (c) over components
253 
254  // Apply D operator.
255  for (int qy = 0; qy < Q1D; ++qy)
256  {
257  for (int qx = 0; qx < Q1D; ++qx)
258  {
259  const double O11 = op(qx,qy,0,e);
260  const double O12 = op(qx,qy,1,e);
261  const double O21 = symmetric ? O12 : op(qx,qy,2,e);
262  const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
263  const double massX = mass[qy][qx][0];
264  const double massY = mass[qy][qx][1];
265  mass[qy][qx][0] = (O11*massX)+(O12*massY);
266  mass[qy][qx][1] = (O21*massX)+(O22*massY);
267  }
268  }
269 
270  for (int qy = 0; qy < Q1D; ++qy)
271  {
272  osc = 0;
273 
274  for (int c = 0; c < VDIM; ++c) // loop over x, y components
275  {
276  const int D1Dx = (c == 1) ? D1D - 1 : D1D;
277  const int D1Dy = (c == 0) ? D1D - 1 : D1D;
278 
279  double massX[MAX_D1D];
280  for (int dx = 0; dx < D1Dx; ++dx)
281  {
282  massX[dx] = 0;
283  }
284  for (int qx = 0; qx < Q1D; ++qx)
285  {
286  for (int dx = 0; dx < D1Dx; ++dx)
287  {
288  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bct(dx,qx) :
289  Bot(dx,qx));
290  }
291  }
292 
293  for (int dy = 0; dy < D1Dy; ++dy)
294  {
295  const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
296 
297  for (int dx = 0; dx < D1Dx; ++dx)
298  {
299  y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
300  }
301  }
302 
303  osc += D1Dx * D1Dy;
304  } // loop c
305  } // loop qy
306  }); // end of element loop
307 }
308 
309 template<int T_D1D = 0, int T_Q1D = 0>
310 void SmemPAHdivMassApply2D(const int NE,
311  const bool symmetric,
312  const Array<double> &Bo_,
313  const Array<double> &Bc_,
314  const Array<double> &Bot_,
315  const Array<double> &Bct_,
316  const Vector &op_,
317  const Vector &x_,
318  Vector &y_,
319  const int d1d = 0,
320  const int q1d = 0)
321 {
322  MFEM_CONTRACT_VAR(Bot_);
323  MFEM_CONTRACT_VAR(Bct_);
324 
325  static constexpr int VDIM = 2;
326 
327  const int D1D = T_D1D ? T_D1D : d1d;
328  const int Q1D = T_Q1D ? T_Q1D : q1d;
329 
330  const auto bo = Reshape(Bo_.Read(), Q1D, D1D-1);
331  const auto bc = Reshape(Bc_.Read(), Q1D, D1D);
332  const auto D = Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
333  const auto x = Reshape(x_.Read(), D1D*(D1D-1), VDIM, NE);
334  auto y = y_.ReadWrite();
335 
336  MFEM_FORALL_3D(e, NE, Q1D, Q1D, VDIM,
337  {
338  const int tidz = MFEM_THREAD_ID(z);
339 
340  const int D1D = T_D1D ? T_D1D : d1d;
341  const int Q1D = T_Q1D ? T_Q1D : q1d;
342 
343  constexpr int MQ1 = T_Q1D ? T_Q1D : HDIV_MAX_Q1D;
344  constexpr int MD1 = T_D1D ? T_D1D : HDIV_MAX_D1D;
345  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
346 
347  MFEM_SHARED double smo[MQ1*(MD1-1)];
348  DeviceMatrix Bo(smo, D1D-1, Q1D);
349 
350  MFEM_SHARED double smc[MQ1*MD1];
351  DeviceMatrix Bc(smc, D1D, Q1D);
352 
353  MFEM_SHARED double sm0[VDIM*MDQ*MDQ];
354  MFEM_SHARED double sm1[VDIM*MDQ*MDQ];
355  DeviceMatrix X(sm0, D1D*(D1D-1), VDIM);
356  DeviceCube QD(sm1, Q1D, D1D, VDIM);
357  DeviceCube QQ(sm0, Q1D, Q1D, VDIM);
358 
359  // Load X, Bo and Bc into shared memory
360  MFEM_FOREACH_THREAD(vd,z,VDIM)
361  {
362  MFEM_FOREACH_THREAD(dy,y,D1D)
363  {
364  MFEM_FOREACH_THREAD(qx,x,Q1D)
365  {
366  if (qx < D1D && dy < (D1D-1)) { X(qx + dy*D1D,vd) = x(qx+dy*D1D,vd,e); }
367  if (tidz == 0)
368  {
369  if (dy < (D1D-1)) { Bo(dy,qx) = bo(qx,dy); }
370  Bc(dy,qx) = bc(qx,dy);
371  }
372  }
373  }
374  }
375  MFEM_SYNC_THREAD;
376  // Apply B operator
377  MFEM_FOREACH_THREAD(vd,z,VDIM)
378  {
379  const int nx = (vd == 0) ? D1D : D1D-1;
380  const int ny = (vd == 1) ? D1D : D1D-1;
381  DeviceCube Xxy(X, nx, ny, VDIM);
382  DeviceMatrix Bx = (vd == 0) ? Bc : Bo;
383  MFEM_FOREACH_THREAD(dy,y,ny)
384  {
385  MFEM_FOREACH_THREAD(qx,x,Q1D)
386  {
387  double dq = 0.0;
388  for (int dx = 0; dx < nx; ++dx)
389  {
390  dq += Xxy(dx,dy,vd) * Bx(dx,qx);
391  }
392  QD(qx,dy,vd) = dq;
393  }
394  }
395  }
396  MFEM_SYNC_THREAD;
397  MFEM_FOREACH_THREAD(vd,z,VDIM)
398  {
399  const int ny = (vd == 1) ? D1D : D1D-1;
400  DeviceMatrix By = (vd == 1) ? Bc : Bo;
401  MFEM_FOREACH_THREAD(qy,y,Q1D)
402  {
403  MFEM_FOREACH_THREAD(qx,x,Q1D)
404  {
405  double qq = 0.0;
406  for (int dy = 0; dy < ny; ++dy)
407  {
408  qq += QD(qx,dy,vd) * By(dy,qy);
409  }
410  QQ(qx,qy,vd) = qq;
411  }
412  }
413  }
414  MFEM_SYNC_THREAD;
415  // Apply D operator
416  if (tidz == 0)
417  {
418  MFEM_FOREACH_THREAD(qy,y,Q1D)
419  {
420  MFEM_FOREACH_THREAD(qx,x,Q1D)
421  {
422  const double Qx = QQ(qx,qy,0);
423  const double Qy = QQ(qx,qy,1);
424 
425  const double D11 = D(qx,qy,0,e);
426  const double D12 = D(qx,qy,1,e);
427  const double D21 = symmetric ? D12 : D(qx,qy,2,e);
428  const double D22 = symmetric ? D(qx,qy,2,e) : D(qx,qy,3,e);
429 
430  QQ(qx,qy,0) = D11*Qx + D12*Qy;
431  QQ(qx,qy,1) = D21*Qx + D22*Qy;
432  }
433  }
434  }
435  MFEM_SYNC_THREAD;
436  // Apply Bt operator
437  MFEM_FOREACH_THREAD(vd,z,VDIM)
438  {
439  const int nx = (vd == 0) ? D1D : D1D-1;
440  DeviceMatrix Btx = (vd == 0) ? Bc : Bo;
441  MFEM_FOREACH_THREAD(qy,y,Q1D)
442  {
443  MFEM_FOREACH_THREAD(dx,x,nx)
444  {
445  double qd = 0.0;
446  for (int qx = 0; qx < Q1D; ++qx)
447  {
448  qd += QQ(qx,qy,vd) * Btx(dx,qx);
449  }
450  QD(dx,qy,vd) = qd;
451  }
452  }
453  }
454  MFEM_SYNC_THREAD;
455  MFEM_FOREACH_THREAD(vd,z,VDIM)
456  {
457  const int nx = (vd == 0) ? D1D : D1D-1;
458  const int ny = (vd == 1) ? D1D : D1D-1;
459  DeviceMatrix Bty = (vd == 1) ? Bc : Bo;
460  DeviceTensor<4> Yxy(y, nx, ny, VDIM, NE);
461  MFEM_FOREACH_THREAD(dy,y,ny)
462  {
463  MFEM_FOREACH_THREAD(dx,x,nx)
464  {
465  double dd = 0.0;
466  for (int qy = 0; qy < Q1D; ++qy)
467  {
468  dd += QD(dx,qy,vd) * Bty(dy,qy);
469  }
470  Yxy(dx,dy,vd,e) += dd;
471  }
472  }
473  }
474  MFEM_SYNC_THREAD;
475  });
476 }
477 
478 void PAHdivMassAssembleDiagonal2D(const int D1D,
479  const int Q1D,
480  const int NE,
481  const bool symmetric,
482  const Array<double> &Bo_,
483  const Array<double> &Bc_,
484  const Vector &op_,
485  Vector &diag_)
486 {
487  constexpr static int VDIM = 2;
488  constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
489 
490  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
491  auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
492  auto op = Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
493  auto diag = Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE);
494 
495  MFEM_FORALL(e, NE,
496  {
497  int osc = 0;
498 
499  for (int c = 0; c < VDIM; ++c) // loop over x, y components
500  {
501  const int D1Dx = (c == 1) ? D1D - 1 : D1D;
502  const int D1Dy = (c == 0) ? D1D - 1 : D1D;
503 
504  for (int dy = 0; dy < D1Dy; ++dy)
505  {
506  double mass[MAX_Q1D];
507  for (int qx = 0; qx < Q1D; ++qx)
508  {
509  mass[qx] = 0.0;
510  for (int qy = 0; qy < Q1D; ++qy)
511  {
512  const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
513  mass[qx] += wy*wy*((c == 0) ? op(qx,qy,0,e) : op(qx,qy,symmetric ? 2 : 3,e));
514  }
515  }
516 
517  for (int dx = 0; dx < D1Dx; ++dx)
518  {
519  double val = 0.0;
520  for (int qx = 0; qx < Q1D; ++qx)
521  {
522  const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx);
523  val += mass[qx] * wx * wx;
524  }
525  diag(dx + (dy * D1Dx) + osc, e) += val;
526  }
527  }
528 
529  osc += D1Dx * D1Dy;
530  } // loop (c) over components
531  }); // end of element loop
532 }
533 
534 void PAHdivMassAssembleDiagonal3D(const int D1D,
535  const int Q1D,
536  const int NE,
537  const bool symmetric,
538  const Array<double> &Bo_,
539  const Array<double> &Bc_,
540  const Vector &op_,
541  Vector &diag_)
542 {
543  MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D");
544  MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D");
545  constexpr static int VDIM = 3;
546 
547  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
548  auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
549  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
550  auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
551 
552  MFEM_FORALL(e, NE,
553  {
554  int osc = 0;
555 
556  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
557  {
558  const int D1Dz = (c == 2) ? D1D : D1D - 1;
559  const int D1Dy = (c == 1) ? D1D : D1D - 1;
560  const int D1Dx = (c == 0) ? D1D : D1D - 1;
561 
562  const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
563  (symmetric ? 5 : 8));
564 
565  double mass[HDIV_MAX_Q1D];
566 
567  for (int dz = 0; dz < D1Dz; ++dz)
568  {
569  for (int dy = 0; dy < D1Dy; ++dy)
570  {
571  for (int qx = 0; qx < Q1D; ++qx)
572  {
573  mass[qx] = 0.0;
574  for (int qy = 0; qy < Q1D; ++qy)
575  {
576  const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
577  for (int qz = 0; qz < Q1D; ++qz)
578  {
579  const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
580  mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
581  }
582  }
583  }
584 
585  for (int dx = 0; dx < D1Dx; ++dx)
586  {
587  double val = 0.0;
588  for (int qx = 0; qx < Q1D; ++qx)
589  {
590  const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx);
591  val += mass[qx] * wx * wx;
592  }
593  diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val;
594  }
595  }
596  }
597 
598  osc += D1Dx * D1Dy * D1Dz;
599  } // loop c
600  }); // end of element loop
601 }
602 
603 void PAHdivMassApply3D(const int D1D,
604  const int Q1D,
605  const int NE,
606  const bool symmetric,
607  const Array<double> &Bo_,
608  const Array<double> &Bc_,
609  const Array<double> &Bot_,
610  const Array<double> &Bct_,
611  const Vector &op_,
612  const Vector &x_,
613  Vector &y_)
614 {
615  MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D");
616  MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D");
617  constexpr static int VDIM = 3;
618 
619  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
620  auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
621  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
622  auto Bct = Reshape(Bct_.Read(), D1D, Q1D);
623  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
624  auto x = Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
625  auto y = Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
626 
627  MFEM_FORALL(e, NE,
628  {
629  double mass[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D][VDIM];
630 
631  for (int qz = 0; qz < Q1D; ++qz)
632  {
633  for (int qy = 0; qy < Q1D; ++qy)
634  {
635  for (int qx = 0; qx < Q1D; ++qx)
636  {
637  for (int c = 0; c < VDIM; ++c)
638  {
639  mass[qz][qy][qx][c] = 0.0;
640  }
641  }
642  }
643  }
644 
645  int osc = 0;
646 
647  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
648  {
649  const int D1Dz = (c == 2) ? D1D : D1D - 1;
650  const int D1Dy = (c == 1) ? D1D : D1D - 1;
651  const int D1Dx = (c == 0) ? D1D : D1D - 1;
652 
653  for (int dz = 0; dz < D1Dz; ++dz)
654  {
655  double massXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
656  for (int qy = 0; qy < Q1D; ++qy)
657  {
658  for (int qx = 0; qx < Q1D; ++qx)
659  {
660  massXY[qy][qx] = 0.0;
661  }
662  }
663 
664  for (int dy = 0; dy < D1Dy; ++dy)
665  {
666  double massX[HDIV_MAX_Q1D];
667  for (int qx = 0; qx < Q1D; ++qx)
668  {
669  massX[qx] = 0.0;
670  }
671 
672  for (int dx = 0; dx < D1Dx; ++dx)
673  {
674  const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
675  for (int qx = 0; qx < Q1D; ++qx)
676  {
677  massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
678  }
679  }
680 
681  for (int qy = 0; qy < Q1D; ++qy)
682  {
683  const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
684  for (int qx = 0; qx < Q1D; ++qx)
685  {
686  const double wx = massX[qx];
687  massXY[qy][qx] += wx * wy;
688  }
689  }
690  }
691 
692  for (int qz = 0; qz < Q1D; ++qz)
693  {
694  const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
695  for (int qy = 0; qy < Q1D; ++qy)
696  {
697  for (int qx = 0; qx < Q1D; ++qx)
698  {
699  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
700  }
701  }
702  }
703  }
704 
705  osc += D1Dx * D1Dy * D1Dz;
706  } // loop (c) over components
707 
708  // Apply D operator.
709  for (int qz = 0; qz < Q1D; ++qz)
710  {
711  for (int qy = 0; qy < Q1D; ++qy)
712  {
713  for (int qx = 0; qx < Q1D; ++qx)
714  {
715  const double O11 = op(qx,qy,qz,0,e);
716  const double O12 = op(qx,qy,qz,1,e);
717  const double O13 = op(qx,qy,qz,2,e);
718  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
719  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
720  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
721  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
722  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
723  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
724 
725  const double massX = mass[qz][qy][qx][0];
726  const double massY = mass[qz][qy][qx][1];
727  const double massZ = mass[qz][qy][qx][2];
728  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
729  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
730  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
731  }
732  }
733  }
734 
735  for (int qz = 0; qz < Q1D; ++qz)
736  {
737  double massXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
738 
739  osc = 0;
740 
741  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
742  {
743  const int D1Dz = (c == 2) ? D1D : D1D - 1;
744  const int D1Dy = (c == 1) ? D1D : D1D - 1;
745  const int D1Dx = (c == 0) ? D1D : D1D - 1;
746 
747  for (int dy = 0; dy < D1Dy; ++dy)
748  {
749  for (int dx = 0; dx < D1Dx; ++dx)
750  {
751  massXY[dy][dx] = 0;
752  }
753  }
754  for (int qy = 0; qy < Q1D; ++qy)
755  {
756  double massX[HDIV_MAX_D1D];
757  for (int dx = 0; dx < D1Dx; ++dx)
758  {
759  massX[dx] = 0;
760  }
761  for (int qx = 0; qx < Q1D; ++qx)
762  {
763  for (int dx = 0; dx < D1Dx; ++dx)
764  {
765  massX[dx] += mass[qz][qy][qx][c] *
766  ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
767  }
768  }
769  for (int dy = 0; dy < D1Dy; ++dy)
770  {
771  const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
772  for (int dx = 0; dx < D1Dx; ++dx)
773  {
774  massXY[dy][dx] += massX[dx] * wy;
775  }
776  }
777  }
778 
779  for (int dz = 0; dz < D1Dz; ++dz)
780  {
781  const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
782  for (int dy = 0; dy < D1Dy; ++dy)
783  {
784  for (int dx = 0; dx < D1Dx; ++dx)
785  {
786  y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
787  massXY[dy][dx] * wz;
788  }
789  }
790  }
791 
792  osc += D1Dx * D1Dy * D1Dz;
793  } // loop c
794  } // loop qz
795  }); // end of element loop
796 }
797 
798 template<int T_D1D = 0, int T_Q1D = 0>
799 void SmemPAHdivMassApply3D(const int NE,
800  const bool symmetric,
801  const Array<double> &Bo_,
802  const Array<double> &Bc_,
803  const Array<double> &Bot_,
804  const Array<double> &Bct_,
805  const Vector &op_,
806  const Vector &x_,
807  Vector &y_,
808  const int d1d = 0,
809  const int q1d = 0)
810 {
811  MFEM_CONTRACT_VAR(Bot_);
812  MFEM_CONTRACT_VAR(Bct_);
813 
814  static constexpr int VDIM = 3;
815 
816  const int D1D = T_D1D ? T_D1D : d1d;
817  const int Q1D = T_Q1D ? T_Q1D : q1d;
818 
819  const auto bo = Reshape(Bo_.Read(), Q1D, D1D-1);
820  const auto bc = Reshape(Bc_.Read(), Q1D, D1D);
821  const auto D = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
822  const auto x = Reshape(x_.Read(), D1D*(D1D-1)*(D1D-1), VDIM, NE);
823  auto y = y_.ReadWrite();
824 
825  MFEM_FORALL_3D(e, NE, Q1D, Q1D, VDIM,
826  {
827  const int tidz = MFEM_THREAD_ID(z);
828 
829  const int D1D = T_D1D ? T_D1D : d1d;
830  const int Q1D = T_Q1D ? T_Q1D : q1d;
831 
832  constexpr int MQ1 = T_Q1D ? T_Q1D : HDIV_MAX_Q1D;
833  constexpr int MD1 = T_D1D ? T_D1D : HDIV_MAX_D1D;
834  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
835 
836  MFEM_SHARED double smo[MQ1*(MD1-1)];
837  DeviceMatrix Bo(smo, D1D-1, Q1D);
838 
839  MFEM_SHARED double smc[MQ1*MD1];
840  DeviceMatrix Bc(smc, D1D, Q1D);
841 
842  MFEM_SHARED double sm0[VDIM*MDQ*MDQ*MDQ];
843  MFEM_SHARED double sm1[VDIM*MDQ*MDQ*MDQ];
844  DeviceMatrix X(sm0, D1D*(D1D-1)*(D1D-1), VDIM);
845  DeviceTensor<4> QDD(sm1, Q1D, D1D, D1D, VDIM);
846  DeviceTensor<4> QQD(sm0, Q1D, Q1D, D1D, VDIM);
847  DeviceTensor<4> QQQ(sm1, Q1D, Q1D, Q1D, VDIM);
848  DeviceTensor<4> DQQ(sm0, D1D, Q1D, Q1D, VDIM);
849  DeviceTensor<4> DDQ(sm1, D1D, D1D, Q1D, VDIM);
850 
851  // Load X into shared memory
852  MFEM_FOREACH_THREAD(vd,z,VDIM)
853  {
854  MFEM_FOREACH_THREAD(dz,y,D1D-1)
855  {
856  MFEM_FOREACH_THREAD(dy,x,D1D-1)
857  {
858  MFEM_UNROLL(MD1)
859  for (int dx = 0; dx < D1D; ++dx)
860  {
861  X(dx+(dy+dz*(D1D-1))*D1D,vd) = x(dx+(dy+dz*(D1D-1))*D1D,vd,e);
862  }
863  }
864  }
865  }
866  // Load Bo and Bc into shared memory
867  if (tidz == 0)
868  {
869  MFEM_FOREACH_THREAD(d,y,D1D-1)
870  {
871  MFEM_FOREACH_THREAD(q,x,Q1D)
872  {
873  Bo(d,q) = bo(q,d);
874  }
875  }
876  MFEM_FOREACH_THREAD(d,y,D1D)
877  {
878  MFEM_FOREACH_THREAD(q,x,Q1D)
879  {
880  Bc(d,q) = bc(q,d);
881  }
882  }
883  }
884  MFEM_SYNC_THREAD;
885  // Apply B operator
886  MFEM_FOREACH_THREAD(vd,z,VDIM)
887  {
888  const int nx = (vd == 0) ? D1D : D1D-1;
889  const int ny = (vd == 1) ? D1D : D1D-1;
890  const int nz = (vd == 2) ? D1D : D1D-1;
891  DeviceTensor<4> Xxyz(X, nx, ny, nz, VDIM);
892  DeviceMatrix Bx = (vd == 0) ? Bc : Bo;
893  MFEM_FOREACH_THREAD(dy,y,ny)
894  {
895  MFEM_FOREACH_THREAD(qx,x,Q1D)
896  {
897  double u[D1D];
898  MFEM_UNROLL(MD1)
899  for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
900  MFEM_UNROLL(MD1)
901  for (int dx = 0; dx < nx; ++dx)
902  {
903  MFEM_UNROLL(MD1)
904  for (int dz = 0; dz < nz; ++dz)
905  {
906  u[dz] += Xxyz(dx,dy,dz,vd) * Bx(dx,qx);
907  }
908  }
909  MFEM_UNROLL(MD1)
910  for (int dz = 0; dz < nz; ++dz) { QDD(qx,dy,dz,vd) = u[dz]; }
911  }
912  }
913  }
914  MFEM_SYNC_THREAD;
915  MFEM_FOREACH_THREAD(vd,z,VDIM)
916  {
917  const int ny = (vd == 1) ? D1D : D1D-1;
918  const int nz = (vd == 2) ? D1D : D1D-1;
919  DeviceMatrix By = (vd == 1) ? Bc : Bo;
920  MFEM_FOREACH_THREAD(qy,y,Q1D)
921  {
922  MFEM_FOREACH_THREAD(qx,x,Q1D)
923  {
924  double u[D1D];
925  MFEM_UNROLL(MD1)
926  for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
927  MFEM_UNROLL(MD1)
928  for (int dy = 0; dy < ny; ++dy)
929  {
930  MFEM_UNROLL(MD1)
931  for (int dz = 0; dz < nz; ++dz)
932  {
933  u[dz] += QDD(qx,dy,dz,vd) * By(dy,qy);
934  }
935  }
936  MFEM_UNROLL(MD1)
937  for (int dz = 0; dz < nz; ++dz) { QQD(qx,qy,dz,vd) = u[dz]; }
938  }
939  }
940  }
941  MFEM_SYNC_THREAD;
942  MFEM_FOREACH_THREAD(vd,z,VDIM)
943  {
944  const int nz = (vd == 2) ? D1D : D1D-1;
945  DeviceMatrix Bz = (vd == 2) ? Bc : Bo;
946  MFEM_FOREACH_THREAD(qy,y,Q1D)
947  {
948  MFEM_FOREACH_THREAD(qx,x,Q1D)
949  {
950  double u[Q1D];
951  MFEM_UNROLL(MQ1)
952  for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
953  MFEM_UNROLL(MD1)
954  for (int dz = 0; dz < nz; ++dz)
955  {
956  MFEM_UNROLL(MQ1)
957  for (int qz = 0; qz < Q1D; ++qz)
958  {
959  u[qz] += QQD(qx,qy,dz,vd) * Bz(dz,qz);
960  }
961  }
962  MFEM_UNROLL(MQ1)
963  for (int qz = 0; qz < Q1D; ++qz) { QQQ(qx,qy,qz,vd) = u[qz]; }
964  }
965  }
966  }
967  MFEM_SYNC_THREAD;
968  // Apply D operator
969  if (tidz == 0)
970  {
971  MFEM_FOREACH_THREAD(qy,y,Q1D)
972  {
973  MFEM_FOREACH_THREAD(qx,x,Q1D)
974  {
975  MFEM_UNROLL(MQ1)
976  for (int qz = 0; qz < Q1D; ++qz)
977  {
978  const double Qx = QQQ(qx,qy,qz,0);
979  const double Qy = QQQ(qx,qy,qz,1);
980  const double Qz = QQQ(qx,qy,qz,2);
981 
982  const double D11 = D(qx,qy,qz,0,e);
983  const double D12 = D(qx,qy,qz,1,e);
984  const double D13 = D(qx,qy,qz,2,e);
985  const double D21 = symmetric ? D12 : D(qx,qy,qz,3,e);
986  const double D22 = symmetric ? D(qx,qy,qz,3,e) : D(qx,qy,qz,4,e);
987  const double D23 = symmetric ? D(qx,qy,qz,4,e) : D(qx,qy,qz,5,e);
988  const double D31 = symmetric ? D13 : D(qx,qy,qz,6,e);
989  const double D32 = symmetric ? D23 : D(qx,qy,qz,7,e);
990  const double D33 = symmetric ? D(qx,qy,qz,5,e) : D(qx,qy,qz,8,e);
991 
992  QQQ(qx,qy,qz,0) = D11*Qx + D12*Qy + D13*Qz;
993  QQQ(qx,qy,qz,1) = D21*Qx + D22*Qy + D23*Qz;
994  QQQ(qx,qy,qz,2) = D31*Qx + D32*Qy + D33*Qz;
995  }
996  }
997  }
998  }
999  MFEM_SYNC_THREAD;
1000  // Apply Bt operator
1001  MFEM_FOREACH_THREAD(vd,z,VDIM)
1002  {
1003  const int nx = (vd == 0) ? D1D : D1D-1;
1004  DeviceMatrix Btx = (vd == 0) ? Bc : Bo;
1005  MFEM_FOREACH_THREAD(qy,y,Q1D)
1006  {
1007  MFEM_FOREACH_THREAD(dx,x,nx)
1008  {
1009  double u[Q1D];
1010  MFEM_UNROLL(MQ1)
1011  for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
1012  MFEM_UNROLL(MQ1)
1013  for (int qx = 0; qx < Q1D; ++qx)
1014  {
1015  MFEM_UNROLL(MQ1)
1016  for (int qz = 0; qz < Q1D; ++qz)
1017  {
1018  u[qz] += QQQ(qx,qy,qz,vd) * Btx(dx,qx);
1019  }
1020  }
1021  MFEM_UNROLL(MQ1)
1022  for (int qz = 0; qz < Q1D; ++qz) { DQQ(dx,qy,qz,vd) = u[qz]; }
1023  }
1024  }
1025  }
1026  MFEM_SYNC_THREAD;
1027  MFEM_FOREACH_THREAD(vd,z,VDIM)
1028  {
1029  const int nx = (vd == 0) ? D1D : D1D-1;
1030  const int ny = (vd == 1) ? D1D : D1D-1;
1031  DeviceMatrix Bty = (vd == 1) ? Bc : Bo;
1032  MFEM_FOREACH_THREAD(dy,y,ny)
1033  {
1034  MFEM_FOREACH_THREAD(dx,x,nx)
1035  {
1036  double u[Q1D];
1037  MFEM_UNROLL(MQ1)
1038  for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
1039  MFEM_UNROLL(MQ1)
1040  for (int qy = 0; qy < Q1D; ++qy)
1041  {
1042  MFEM_UNROLL(MQ1)
1043  for (int qz = 0; qz < Q1D; ++qz)
1044  {
1045  u[qz] += DQQ(dx,qy,qz,vd) * Bty(dy,qy);
1046  }
1047  }
1048  MFEM_UNROLL(MQ1)
1049  for (int qz = 0; qz < Q1D; ++qz) { DDQ(dx,dy,qz,vd) = u[qz]; }
1050  }
1051  }
1052  }
1053  MFEM_SYNC_THREAD;
1054  MFEM_FOREACH_THREAD(vd,z,VDIM)
1055  {
1056  const int nx = (vd == 0) ? D1D : D1D-1;
1057  const int ny = (vd == 1) ? D1D : D1D-1;
1058  const int nz = (vd == 2) ? D1D : D1D-1;
1059  DeviceTensor<5> Yxyz(y, nx, ny, nz, VDIM, NE);
1060  DeviceMatrix Btz = (vd == 2) ? Bc : Bo;
1061  MFEM_FOREACH_THREAD(dy,y,ny)
1062  {
1063  MFEM_FOREACH_THREAD(dx,x,nx)
1064  {
1065  double u[D1D];
1066  MFEM_UNROLL(MD1)
1067  for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
1068  MFEM_UNROLL(MQ1)
1069  for (int qz = 0; qz < Q1D; ++qz)
1070  {
1071  MFEM_UNROLL(MD1)
1072  for (int dz = 0; dz < nz; ++dz)
1073  {
1074  u[dz] += DDQ(dx,dy,qz,vd) * Btz(dz,qz);
1075  }
1076  }
1077  MFEM_UNROLL(MD1)
1078  for (int dz = 0; dz < nz; ++dz) { Yxyz(dx,dy,dz,vd,e) += u[dz]; }
1079  }
1080  }
1081  }
1082  MFEM_SYNC_THREAD;
1083  });
1084 }
1085 
1086 void PAHdivMassApply(const int dim,
1087  const int D1D,
1088  const int Q1D,
1089  const int NE,
1090  const bool symmetric,
1091  const Array<double> &Bo,
1092  const Array<double> &Bc,
1093  const Array<double> &Bot,
1094  const Array<double> &Bct,
1095  const Vector &op,
1096  const Vector &x,
1097  Vector &y)
1098 {
1099  const int id = (D1D << 4) | Q1D;
1100 
1101  if (dim == 2)
1102  {
1103  switch (id)
1104  {
1105  case 0x22: return SmemPAHdivMassApply2D<2,2>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1106  case 0x33: return SmemPAHdivMassApply2D<3,3>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1107  case 0x44: return SmemPAHdivMassApply2D<4,4>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1108  case 0x55: return SmemPAHdivMassApply2D<5,5>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1109  default: // fallback
1110  return PAHdivMassApply2D(D1D,Q1D,NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1111  }
1112  }
1113  else if (dim == 3)
1114  {
1115  switch (id)
1116  {
1117  case 0x23: return SmemPAHdivMassApply3D<2,3>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1118  case 0x34: return SmemPAHdivMassApply3D<3,4>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1119  case 0x45: return SmemPAHdivMassApply3D<4,5>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1120  case 0x56: return SmemPAHdivMassApply3D<5,6>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1121  case 0x67: return SmemPAHdivMassApply3D<6,7>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1122  case 0x78: return SmemPAHdivMassApply3D<7,8>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1123  default: // fallback
1124  return PAHdivMassApply3D(D1D,Q1D,NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1125  }
1126  }
1127 }
1128 
1129 // PA H(div) div-div assemble 2D kernel
1130 // NOTE: this is identical to PACurlCurlSetup3D
1131 static void PADivDivSetup2D(const int Q1D,
1132  const int NE,
1133  const Array<double> &w,
1134  const Vector &j,
1135  Vector &coeff_,
1136  Vector &op)
1137 {
1138  const int NQ = Q1D*Q1D;
1139  auto W = w.Read();
1140  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
1141  auto coeff = Reshape(coeff_.Read(), NQ, NE);
1142  auto y = Reshape(op.Write(), NQ, NE);
1143  MFEM_FORALL(e, NE,
1144  {
1145  for (int q = 0; q < NQ; ++q)
1146  {
1147  const double J11 = J(q,0,0,e);
1148  const double J21 = J(q,1,0,e);
1149  const double J12 = J(q,0,1,e);
1150  const double J22 = J(q,1,1,e);
1151  const double detJ = (J11*J22)-(J21*J12);
1152  y(q,e) = W[q] * coeff(q,e) / detJ;
1153  }
1154  });
1155 }
1156 
1157 static void PADivDivSetup3D(const int Q1D,
1158  const int NE,
1159  const Array<double> &w,
1160  const Vector &j,
1161  Vector &coeff_,
1162  Vector &op)
1163 {
1164  const int NQ = Q1D*Q1D*Q1D;
1165  auto W = w.Read();
1166  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
1167  auto coeff = Reshape(coeff_.Read(), NQ, NE);
1168  auto y = Reshape(op.Write(), NQ, NE);
1169 
1170  MFEM_FORALL(e, NE,
1171  {
1172  for (int q = 0; q < NQ; ++q)
1173  {
1174  const double J11 = J(q,0,0,e);
1175  const double J21 = J(q,1,0,e);
1176  const double J31 = J(q,2,0,e);
1177  const double J12 = J(q,0,1,e);
1178  const double J22 = J(q,1,1,e);
1179  const double J32 = J(q,2,1,e);
1180  const double J13 = J(q,0,2,e);
1181  const double J23 = J(q,1,2,e);
1182  const double J33 = J(q,2,2,e);
1183  const double detJ = J11 * (J22 * J33 - J32 * J23) -
1184  /* */ J21 * (J12 * J33 - J32 * J13) +
1185  /* */ J31 * (J12 * J23 - J22 * J13);
1186  y(q,e) = W[q] * coeff(q, e) / detJ;
1187  }
1188  });
1189 }
1190 
1191 static void PADivDivApply2D(const int D1D,
1192  const int Q1D,
1193  const int NE,
1194  const Array<double> &Bo_,
1195  const Array<double> &Gc_,
1196  const Array<double> &Bot_,
1197  const Array<double> &Gct_,
1198  const Vector &op_,
1199  const Vector &x_,
1200  Vector &y_)
1201 {
1202  constexpr static int VDIM = 2;
1203  constexpr static int MAX_D1D = HDIV_MAX_D1D;
1204  constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
1205 
1206  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1207  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
1208  auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1209  auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
1210  auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
1211  auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
1212  auto y = Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
1213 
1214  MFEM_FORALL(e, NE,
1215  {
1216  double div[MAX_Q1D][MAX_Q1D];
1217 
1218  // div[qy][qx] will be computed as du_x/dx + du_y/dy
1219 
1220  for (int qy = 0; qy < Q1D; ++qy)
1221  {
1222  for (int qx = 0; qx < Q1D; ++qx)
1223  {
1224  div[qy][qx] = 0;
1225  }
1226  }
1227 
1228  int osc = 0;
1229 
1230  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1231  {
1232  const int D1Dx = (c == 1) ? D1D - 1 : D1D;
1233  const int D1Dy = (c == 0) ? D1D - 1 : D1D;
1234 
1235  for (int dy = 0; dy < D1Dy; ++dy)
1236  {
1237  double gradX[MAX_Q1D];
1238  for (int qx = 0; qx < Q1D; ++qx)
1239  {
1240  gradX[qx] = 0;
1241  }
1242 
1243  for (int dx = 0; dx < D1Dx; ++dx)
1244  {
1245  const double t = x(dx + (dy * D1Dx) + osc, e);
1246  for (int qx = 0; qx < Q1D; ++qx)
1247  {
1248  gradX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
1249  }
1250  }
1251 
1252  for (int qy = 0; qy < Q1D; ++qy)
1253  {
1254  const double wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy);
1255  for (int qx = 0; qx < Q1D; ++qx)
1256  {
1257  div[qy][qx] += gradX[qx] * wy;
1258  }
1259  }
1260  }
1261 
1262  osc += D1Dx * D1Dy;
1263  } // loop (c) over components
1264 
1265  // Apply D operator.
1266  for (int qy = 0; qy < Q1D; ++qy)
1267  {
1268  for (int qx = 0; qx < Q1D; ++qx)
1269  {
1270  div[qy][qx] *= op(qx,qy,e);
1271  }
1272  }
1273 
1274  for (int qy = 0; qy < Q1D; ++qy)
1275  {
1276  osc = 0;
1277 
1278  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1279  {
1280  const int D1Dx = (c == 1) ? D1D - 1 : D1D;
1281  const int D1Dy = (c == 0) ? D1D - 1 : D1D;
1282 
1283  double gradX[MAX_D1D];
1284  for (int dx = 0; dx < D1Dx; ++dx)
1285  {
1286  gradX[dx] = 0;
1287  }
1288  for (int qx = 0; qx < Q1D; ++qx)
1289  {
1290  for (int dx = 0; dx < D1Dx; ++dx)
1291  {
1292  gradX[dx] += div[qy][qx] * (c == 0 ? Gct(dx,qx) : Bot(dx,qx));
1293  }
1294  }
1295  for (int dy = 0; dy < D1Dy; ++dy)
1296  {
1297  const double wy = (c == 0) ? Bot(dy,qy) : Gct(dy,qy);
1298  for (int dx = 0; dx < D1Dx; ++dx)
1299  {
1300  y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1301  }
1302  }
1303 
1304  osc += D1Dx * D1Dy;
1305  } // loop c
1306  } // loop qy
1307  }); // end of element loop
1308 }
1309 
1310 static void PADivDivApply3D(const int D1D,
1311  const int Q1D,
1312  const int NE,
1313  const Array<double> &Bo_,
1314  const Array<double> &Gc_,
1315  const Array<double> &Bot_,
1316  const Array<double> &Gct_,
1317  const Vector &op_,
1318  const Vector &x_,
1319  Vector &y_)
1320 {
1321  MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D");
1322  MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D");
1323  constexpr static int VDIM = 3;
1324 
1325  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1326  auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1327  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
1328  auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
1329  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
1330  auto x = Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1331  auto y = Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1332 
1333  MFEM_FORALL(e, NE,
1334  {
1335  double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1336 
1337  for (int qz = 0; qz < Q1D; ++qz)
1338  {
1339  for (int qy = 0; qy < Q1D; ++qy)
1340  {
1341  for (int qx = 0; qx < Q1D; ++qx)
1342  {
1343  div[qz][qy][qx] = 0.0;
1344  }
1345  }
1346  }
1347 
1348  int osc = 0;
1349 
1350  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1351  {
1352  const int D1Dz = (c == 2) ? D1D : D1D - 1;
1353  const int D1Dy = (c == 1) ? D1D : D1D - 1;
1354  const int D1Dx = (c == 0) ? D1D : D1D - 1;
1355 
1356  for (int dz = 0; dz < D1Dz; ++dz)
1357  {
1358  double aXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1359  for (int qy = 0; qy < Q1D; ++qy)
1360  {
1361  for (int qx = 0; qx < Q1D; ++qx)
1362  {
1363  aXY[qy][qx] = 0.0;
1364  }
1365  }
1366 
1367  for (int dy = 0; dy < D1Dy; ++dy)
1368  {
1369  double aX[HDIV_MAX_Q1D];
1370  for (int qx = 0; qx < Q1D; ++qx)
1371  {
1372  aX[qx] = 0.0;
1373  }
1374 
1375  for (int dx = 0; dx < D1Dx; ++dx)
1376  {
1377  const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1378  for (int qx = 0; qx < Q1D; ++qx)
1379  {
1380  aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
1381  }
1382  }
1383 
1384  for (int qy = 0; qy < Q1D; ++qy)
1385  {
1386  const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
1387  for (int qx = 0; qx < Q1D; ++qx)
1388  {
1389  const double wx = aX[qx];
1390  aXY[qy][qx] += wx * wy;
1391  }
1392  }
1393  }
1394 
1395  for (int qz = 0; qz < Q1D; ++qz)
1396  {
1397  const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
1398  for (int qy = 0; qy < Q1D; ++qy)
1399  {
1400  for (int qx = 0; qx < Q1D; ++qx)
1401  {
1402  div[qz][qy][qx] += aXY[qy][qx] * wz;
1403  }
1404  }
1405  }
1406  }
1407 
1408  osc += D1Dx * D1Dy * D1Dz;
1409  } // loop (c) over components
1410 
1411  // Apply D operator.
1412  for (int qz = 0; qz < Q1D; ++qz)
1413  {
1414  for (int qy = 0; qy < Q1D; ++qy)
1415  {
1416  for (int qx = 0; qx < Q1D; ++qx)
1417  {
1418  div[qz][qy][qx] *= op(qx,qy,qz,e);
1419  }
1420  }
1421  }
1422 
1423  for (int qz = 0; qz < Q1D; ++qz)
1424  {
1425  double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
1426 
1427  osc = 0;
1428 
1429  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1430  {
1431  const int D1Dz = (c == 2) ? D1D : D1D - 1;
1432  const int D1Dy = (c == 1) ? D1D : D1D - 1;
1433  const int D1Dx = (c == 0) ? D1D : D1D - 1;
1434 
1435  for (int dy = 0; dy < D1Dy; ++dy)
1436  {
1437  for (int dx = 0; dx < D1Dx; ++dx)
1438  {
1439  aXY[dy][dx] = 0;
1440  }
1441  }
1442  for (int qy = 0; qy < Q1D; ++qy)
1443  {
1444  double aX[HDIV_MAX_D1D];
1445  for (int dx = 0; dx < D1Dx; ++dx)
1446  {
1447  aX[dx] = 0;
1448  }
1449  for (int qx = 0; qx < Q1D; ++qx)
1450  {
1451  for (int dx = 0; dx < D1Dx; ++dx)
1452  {
1453  aX[dx] += div[qz][qy][qx] *
1454  (c == 0 ? Gct(dx,qx) : Bot(dx,qx));
1455  }
1456  }
1457  for (int dy = 0; dy < D1Dy; ++dy)
1458  {
1459  const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
1460  for (int dx = 0; dx < D1Dx; ++dx)
1461  {
1462  aXY[dy][dx] += aX[dx] * wy;
1463  }
1464  }
1465  }
1466 
1467  for (int dz = 0; dz < D1Dz; ++dz)
1468  {
1469  const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
1470  for (int dy = 0; dy < D1Dy; ++dy)
1471  {
1472  for (int dx = 0; dx < D1Dx; ++dx)
1473  {
1474  y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
1475  aXY[dy][dx] * wz;
1476  }
1477  }
1478  }
1479 
1480  osc += D1Dx * D1Dy * D1Dz;
1481  } // loop c
1482  } // loop qz
1483  }); // end of element loop
1484 }
1485 
1486 void DivDivIntegrator::AssemblePA(const FiniteElementSpace &fes)
1487 {
1488  // Assumes tensor-product elements
1489  Mesh *mesh = fes.GetMesh();
1490  const FiniteElement *fel = fes.GetFE(0);
1491 
1492  const VectorTensorFiniteElement *el =
1493  dynamic_cast<const VectorTensorFiniteElement*>(fel);
1494  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
1495 
1496  const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule
1497  (*el, *el, *mesh->GetElementTransformation(0));
1498 
1499  const int dims = el->GetDim();
1500  MFEM_VERIFY(dims == 2 || dims == 3, "");
1501 
1502  const int nq = ir->GetNPoints();
1503  dim = mesh->Dimension();
1504  MFEM_VERIFY(dim == 2 || dim == 3, "");
1505 
1506  ne = fes.GetNE();
1507  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
1508  mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
1509  mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
1510  dofs1D = mapsC->ndof;
1511  quad1D = mapsC->nqpt;
1512 
1513  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
1514 
1515  pa_data.SetSize(nq * ne, Device::GetMemoryType());
1516 
1517  QuadratureSpace qs(*mesh, *ir);
1518  CoefficientVector coeff(Q, qs, CoefficientStorage::FULL);
1519 
1520  if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3)
1521  {
1522  PADivDivSetup3D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1523  }
1524  else if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2)
1525  {
1526  PADivDivSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1527  }
1528  else
1529  {
1530  MFEM_ABORT("Unknown kernel.");
1531  }
1532 }
1533 
1534 void DivDivIntegrator::AddMultPA(const Vector &x, Vector &y) const
1535 {
1536  if (dim == 3)
1537  PADivDivApply3D(dofs1D, quad1D, ne, mapsO->B, mapsC->G,
1538  mapsO->Bt, mapsC->Gt, pa_data, x, y);
1539  else if (dim == 2)
1540  PADivDivApply2D(dofs1D, quad1D, ne, mapsO->B, mapsC->G,
1541  mapsO->Bt, mapsC->Gt, pa_data, x, y);
1542  else
1543  {
1544  MFEM_ABORT("Unsupported dimension!");
1545  }
1546 }
1547 
1548 static void PADivDivAssembleDiagonal2D(const int D1D,
1549  const int Q1D,
1550  const int NE,
1551  const Array<double> &Bo_,
1552  const Array<double> &Gc_,
1553  const Vector &op_,
1554  Vector &diag_)
1555 {
1556  constexpr static int VDIM = 2;
1557  constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
1558 
1559  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1560  auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1561  auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
1562  auto diag = Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE);
1563 
1564  MFEM_FORALL(e, NE,
1565  {
1566  int osc = 0;
1567 
1568  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1569  {
1570  const int D1Dx = (c == 1) ? D1D - 1 : D1D;
1571  const int D1Dy = (c == 0) ? D1D - 1 : D1D;
1572 
1573  double div[MAX_Q1D];
1574 
1575  for (int dy = 0; dy < D1Dy; ++dy)
1576  {
1577  for (int qx = 0; qx < Q1D; ++qx)
1578  {
1579  div[qx] = 0.0;
1580  for (int qy = 0; qy < Q1D; ++qy)
1581  {
1582  const double wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy);
1583  div[qx] += wy * wy * op(qx,qy,e);
1584  }
1585  }
1586 
1587  for (int dx = 0; dx < D1Dx; ++dx)
1588  {
1589  double val = 0.0;
1590  for (int qx = 0; qx < Q1D; ++qx)
1591  {
1592  const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx);
1593  val += div[qx] * wx * wx;
1594  }
1595  diag(dx + (dy * D1Dx) + osc, e) += val;
1596  }
1597  }
1598 
1599  osc += D1Dx * D1Dy;
1600  } // loop c
1601  });
1602 }
1603 
1604 static void PADivDivAssembleDiagonal3D(const int D1D,
1605  const int Q1D,
1606  const int NE,
1607  const Array<double> &Bo_,
1608  const Array<double> &Gc_,
1609  const Vector &op_,
1610  Vector &diag_)
1611 {
1612  MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D");
1613  MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D");
1614  constexpr static int VDIM = 3;
1615 
1616  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1617  auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1618  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
1619  auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1620 
1621  MFEM_FORALL(e, NE,
1622  {
1623  int osc = 0;
1624 
1625  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1626  {
1627  const int D1Dz = (c == 2) ? D1D : D1D - 1;
1628  const int D1Dy = (c == 1) ? D1D : D1D - 1;
1629  const int D1Dx = (c == 0) ? D1D : D1D - 1;
1630 
1631  for (int dz = 0; dz < D1Dz; ++dz)
1632  {
1633  for (int dy = 0; dy < D1Dy; ++dy)
1634  {
1635  double a[HDIV_MAX_Q1D];
1636 
1637  for (int qx = 0; qx < Q1D; ++qx)
1638  {
1639  a[qx] = 0.0;
1640  for (int qy = 0; qy < Q1D; ++qy)
1641  {
1642  const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
1643 
1644  for (int qz = 0; qz < Q1D; ++qz)
1645  {
1646  const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
1647  a[qx] += wy * wy * wz * wz * op(qx,qy,qz,e);
1648  }
1649  }
1650  }
1651 
1652  for (int dx = 0; dx < D1Dx; ++dx)
1653  {
1654  double val = 0.0;
1655  for (int qx = 0; qx < Q1D; ++qx)
1656  {
1657  const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx);
1658  val += a[qx] * wx * wx;
1659  }
1660  diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val;
1661  }
1662  }
1663  }
1664 
1665  osc += D1Dx * D1Dy * D1Dz;
1666  } // loop c
1667  }); // end of element loop
1668 }
1669 
1670 void DivDivIntegrator::AssembleDiagonalPA(Vector& diag)
1671 {
1672  if (dim == 3)
1673  {
1674  PADivDivAssembleDiagonal3D(dofs1D, quad1D, ne,
1675  mapsO->B, mapsC->G, pa_data, diag);
1676  }
1677  else
1678  {
1679  PADivDivAssembleDiagonal2D(dofs1D, quad1D, ne,
1680  mapsO->B, mapsC->G, pa_data, diag);
1681  }
1682 }
1683 
1684 // PA H(div)-L2 (div u, p) assemble 2D kernel
1685 static void PADivL2Setup2D(const int Q1D,
1686  const int NE,
1687  const Array<double> &w,
1688  Vector &coeff_,
1689  Vector &op)
1690 {
1691  const int NQ = Q1D*Q1D;
1692  auto W = w.Read();
1693  auto coeff = Reshape(coeff_.Read(), NQ, NE);
1694  auto y = Reshape(op.Write(), NQ, NE);
1695  MFEM_FORALL(e, NE,
1696  {
1697  for (int q = 0; q < NQ; ++q)
1698  {
1699  y(q,e) = W[q] * coeff(q,e);
1700  }
1701  });
1702 }
1703 
1704 static void PADivL2Setup3D(const int Q1D,
1705  const int NE,
1706  const Array<double> &w,
1707  Vector &coeff_,
1708  Vector &op)
1709 {
1710  const int NQ = Q1D*Q1D*Q1D;
1711  auto W = w.Read();
1712  auto coeff = Reshape(coeff_.Read(), NQ, NE);
1713  auto y = Reshape(op.Write(), NQ, NE);
1714 
1715  MFEM_FORALL(e, NE,
1716  {
1717  for (int q = 0; q < NQ; ++q)
1718  {
1719  y(q,e) = W[q] * coeff(q, e);
1720  }
1721  });
1722 }
1723 
1724 void
1725 VectorFEDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
1726  const FiniteElementSpace &test_fes)
1727 {
1728  // Assumes tensor-product elements, with a vector test space and
1729  // scalar trial space.
1730  Mesh *mesh = trial_fes.GetMesh();
1731  const FiniteElement *trial_fel = trial_fes.GetFE(0);
1732  const FiniteElement *test_fel = test_fes.GetFE(0);
1733 
1734  const VectorTensorFiniteElement *trial_el =
1735  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
1736  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
1737 
1738  const NodalTensorFiniteElement *test_el =
1739  dynamic_cast<const NodalTensorFiniteElement*>(test_fel);
1740  MFEM_VERIFY(test_el != NULL, "Only NodalTensorFiniteElement is supported!");
1741 
1742  const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule(
1743  *trial_el, *trial_el,
1744  *mesh->GetElementTransformation(0));
1745 
1746  const int dims = trial_el->GetDim();
1747  MFEM_VERIFY(dims == 2 || dims == 3, "");
1748 
1749  const int nq = ir->GetNPoints();
1750  dim = mesh->Dimension();
1751  MFEM_VERIFY(dim == 2 || dim == 3, "");
1752 
1753  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder() + 1, "");
1754 
1755  ne = trial_fes.GetNE();
1756  mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
1757  mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
1758  dofs1D = mapsC->ndof;
1759  quad1D = mapsC->nqpt;
1760 
1761  L2mapsO = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
1762  L2dofs1D = L2mapsO->ndof;
1763 
1764  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
1765  if (dim == 2)
1766  {
1767  MFEM_VERIFY(nq == quad1D * quad1D, "");
1768  }
1769  else
1770  {
1771  MFEM_VERIFY(nq == quad1D * quad1D * quad1D, "");
1772  }
1773 
1774  pa_data.SetSize(nq * ne, Device::GetMemoryType());
1775 
1776  QuadratureSpace qs(*mesh, *ir);
1777  CoefficientVector coeff(Q, qs, CoefficientStorage::FULL);
1778 
1779  if (test_el->GetMapType() == FiniteElement::INTEGRAL)
1780  {
1781  const GeometricFactors *geom =
1782  mesh->GetGeometricFactors(*ir, GeometricFactors::DETERMINANTS);
1783  coeff /= geom->detJ;
1784  }
1785 
1786  if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3)
1787  {
1788  PADivL2Setup3D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
1789  }
1790  else if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2)
1791  {
1792  PADivL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
1793  }
1794  else
1795  {
1796  MFEM_ABORT("Unknown kernel.");
1797  }
1798 }
1799 
1800 // Apply to x corresponding to DOFs in H(div) (trial), whose divergence is
1801 // integrated against L_2 test functions corresponding to y.
1802 static void PAHdivL2Apply3D(const int D1D,
1803  const int Q1D,
1804  const int L2D1D,
1805  const int NE,
1806  const Array<double> &Bo_,
1807  const Array<double> &Gc_,
1808  const Array<double> &L2Bot_,
1809  const Vector &op_,
1810  const Vector &x_,
1811  Vector &y_)
1812 {
1813  MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D");
1814  MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D");
1815  constexpr static int VDIM = 3;
1816 
1817  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1818  auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1819  auto L2Bot = Reshape(L2Bot_.Read(), L2D1D, Q1D);
1820  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
1821  auto x = Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1822  auto y = Reshape(y_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE);
1823 
1824  MFEM_FORALL(e, NE,
1825  {
1826  double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1827 
1828  for (int qz = 0; qz < Q1D; ++qz)
1829  {
1830  for (int qy = 0; qy < Q1D; ++qy)
1831  {
1832  for (int qx = 0; qx < Q1D; ++qx)
1833  {
1834  div[qz][qy][qx] = 0.0;
1835  }
1836  }
1837  }
1838 
1839  int osc = 0;
1840 
1841  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1842  {
1843  const int D1Dz = (c == 2) ? D1D : D1D - 1;
1844  const int D1Dy = (c == 1) ? D1D : D1D - 1;
1845  const int D1Dx = (c == 0) ? D1D : D1D - 1;
1846 
1847  for (int dz = 0; dz < D1Dz; ++dz)
1848  {
1849  double aXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1850  for (int qy = 0; qy < Q1D; ++qy)
1851  {
1852  for (int qx = 0; qx < Q1D; ++qx)
1853  {
1854  aXY[qy][qx] = 0.0;
1855  }
1856  }
1857 
1858  for (int dy = 0; dy < D1Dy; ++dy)
1859  {
1860  double aX[HDIV_MAX_Q1D];
1861  for (int qx = 0; qx < Q1D; ++qx)
1862  {
1863  aX[qx] = 0.0;
1864  }
1865 
1866  for (int dx = 0; dx < D1Dx; ++dx)
1867  {
1868  const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1869  for (int qx = 0; qx < Q1D; ++qx)
1870  {
1871  aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
1872  }
1873  }
1874 
1875  for (int qy = 0; qy < Q1D; ++qy)
1876  {
1877  const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
1878  for (int qx = 0; qx < Q1D; ++qx)
1879  {
1880  aXY[qy][qx] += aX[qx] * wy;
1881  }
1882  }
1883  }
1884 
1885  for (int qz = 0; qz < Q1D; ++qz)
1886  {
1887  const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
1888  for (int qy = 0; qy < Q1D; ++qy)
1889  {
1890  for (int qx = 0; qx < Q1D; ++qx)
1891  {
1892  div[qz][qy][qx] += aXY[qy][qx] * wz;
1893  }
1894  }
1895  }
1896  }
1897 
1898  osc += D1Dx * D1Dy * D1Dz;
1899  } // loop (c) over components
1900 
1901  // Apply D operator.
1902  for (int qz = 0; qz < Q1D; ++qz)
1903  {
1904  for (int qy = 0; qy < Q1D; ++qy)
1905  {
1906  for (int qx = 0; qx < Q1D; ++qx)
1907  {
1908  div[qz][qy][qx] *= op(qx,qy,qz,e);
1909  }
1910  }
1911  }
1912 
1913  for (int qz = 0; qz < Q1D; ++qz)
1914  {
1915  double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
1916 
1917  for (int dy = 0; dy < L2D1D; ++dy)
1918  {
1919  for (int dx = 0; dx < L2D1D; ++dx)
1920  {
1921  aXY[dy][dx] = 0;
1922  }
1923  }
1924  for (int qy = 0; qy < Q1D; ++qy)
1925  {
1926  double aX[HDIV_MAX_D1D];
1927  for (int dx = 0; dx < L2D1D; ++dx)
1928  {
1929  aX[dx] = 0;
1930  }
1931  for (int qx = 0; qx < Q1D; ++qx)
1932  {
1933  for (int dx = 0; dx < L2D1D; ++dx)
1934  {
1935  aX[dx] += div[qz][qy][qx] * L2Bot(dx,qx);
1936  }
1937  }
1938  for (int dy = 0; dy < L2D1D; ++dy)
1939  {
1940  const double wy = L2Bot(dy,qy);
1941  for (int dx = 0; dx < L2D1D; ++dx)
1942  {
1943  aXY[dy][dx] += aX[dx] * wy;
1944  }
1945  }
1946  }
1947 
1948  for (int dz = 0; dz < L2D1D; ++dz)
1949  {
1950  const double wz = L2Bot(dz,qz);
1951  for (int dy = 0; dy < L2D1D; ++dy)
1952  {
1953  for (int dx = 0; dx < L2D1D; ++dx)
1954  {
1955  y(dx,dy,dz,e) += aXY[dy][dx] * wz;
1956  }
1957  }
1958  }
1959  } // loop qz
1960  }); // end of element loop
1961 }
1962 
1963 // Apply to x corresponding to DOFs in H(div) (trial), whose divergence is
1964 // integrated against L_2 test functions corresponding to y.
1965 static void PAHdivL2Apply2D(const int D1D,
1966  const int Q1D,
1967  const int L2D1D,
1968  const int NE,
1969  const Array<double> &Bo_,
1970  const Array<double> &Gc_,
1971  const Array<double> &L2Bot_,
1972  const Vector &op_,
1973  const Vector &x_,
1974  Vector &y_)
1975 {
1976  constexpr static int VDIM = 2;
1977  constexpr static int MAX_D1D = HDIV_MAX_D1D;
1978  constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
1979 
1980  auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1981  auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1982  auto L2Bot = Reshape(L2Bot_.Read(), L2D1D, Q1D);
1983  auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
1984  auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
1985  auto y = Reshape(y_.ReadWrite(), L2D1D, L2D1D, NE);
1986 
1987  MFEM_FORALL(e, NE,
1988  {
1989  double div[MAX_Q1D][MAX_Q1D];
1990 
1991  for (int qy = 0; qy < Q1D; ++qy)
1992  {
1993  for (int qx = 0; qx < Q1D; ++qx)
1994  {
1995  div[qy][qx] = 0.0;
1996  }
1997  }
1998 
1999  int osc = 0;
2000 
2001  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2002  {
2003  const int D1Dy = (c == 1) ? D1D : D1D - 1;
2004  const int D1Dx = (c == 0) ? D1D : D1D - 1;
2005 
2006  for (int dy = 0; dy < D1Dy; ++dy)
2007  {
2008  double aX[MAX_Q1D];
2009  for (int qx = 0; qx < Q1D; ++qx)
2010  {
2011  aX[qx] = 0.0;
2012  }
2013 
2014  for (int dx = 0; dx < D1Dx; ++dx)
2015  {
2016  const double t = x(dx + (dy * D1Dx) + osc, e);
2017  for (int qx = 0; qx < Q1D; ++qx)
2018  {
2019  aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
2020  }
2021  }
2022 
2023  for (int qy = 0; qy < Q1D; ++qy)
2024  {
2025  const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
2026  for (int qx = 0; qx < Q1D; ++qx)
2027  {
2028  div[qy][qx] += aX[qx] * wy;
2029  }
2030  }
2031  }
2032 
2033  osc += D1Dx * D1Dy;
2034  } // loop (c) over components
2035 
2036  // Apply D operator.
2037  for (int qy = 0; qy < Q1D; ++qy)
2038  {
2039  for (int qx = 0; qx < Q1D; ++qx)
2040  {
2041  div[qy][qx] *= op(qx,qy,e);
2042  }
2043  }
2044 
2045  for (int qy = 0; qy < Q1D; ++qy)
2046  {
2047  double aX[MAX_D1D];
2048  for (int dx = 0; dx < L2D1D; ++dx)
2049  {
2050  aX[dx] = 0;
2051  }
2052  for (int qx = 0; qx < Q1D; ++qx)
2053  {
2054  for (int dx = 0; dx < L2D1D; ++dx)
2055  {
2056  aX[dx] += div[qy][qx] * L2Bot(dx,qx);
2057  }
2058  }
2059  for (int dy = 0; dy < L2D1D; ++dy)
2060  {
2061  const double wy = L2Bot(dy,qy);
2062  for (int dx = 0; dx < L2D1D; ++dx)
2063  {
2064  y(dx,dy,e) += aX[dx] * wy;
2065  }
2066  }
2067  }
2068  }); // end of element loop
2069 }
2070 
2071 static void PAHdivL2ApplyTranspose3D(const int D1D,
2072  const int Q1D,
2073  const int L2D1D,
2074  const int NE,
2075  const Array<double> &L2Bo_,
2076  const Array<double> &Gct_,
2077  const Array<double> &Bot_,
2078  const Vector &op_,
2079  const Vector &x_,
2080  Vector &y_)
2081 {
2082  MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D");
2083  MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D");
2084  constexpr static int VDIM = 3;
2085 
2086  auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2087  auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2088  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2089  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
2090  auto x = Reshape(x_.Read(), L2D1D, L2D1D, L2D1D, NE);
2091  auto y = Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
2092 
2093  MFEM_FORALL(e, NE,
2094  {
2095  double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2096 
2097  for (int qz = 0; qz < Q1D; ++qz)
2098  {
2099  for (int qy = 0; qy < Q1D; ++qy)
2100  {
2101  for (int qx = 0; qx < Q1D; ++qx)
2102  {
2103  div[qz][qy][qx] = 0.0;
2104  }
2105  }
2106  }
2107 
2108  for (int dz = 0; dz < L2D1D; ++dz)
2109  {
2110  double aXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2111  for (int qy = 0; qy < Q1D; ++qy)
2112  {
2113  for (int qx = 0; qx < Q1D; ++qx)
2114  {
2115  aXY[qy][qx] = 0.0;
2116  }
2117  }
2118 
2119  for (int dy = 0; dy < L2D1D; ++dy)
2120  {
2121  double aX[HDIV_MAX_Q1D];
2122  for (int qx = 0; qx < Q1D; ++qx)
2123  {
2124  aX[qx] = 0.0;
2125  }
2126 
2127  for (int dx = 0; dx < L2D1D; ++dx)
2128  {
2129  const double t = x(dx,dy,dz,e);
2130  for (int qx = 0; qx < Q1D; ++qx)
2131  {
2132  aX[qx] += t * L2Bo(qx,dx);
2133  }
2134  }
2135 
2136  for (int qy = 0; qy < Q1D; ++qy)
2137  {
2138  const double wy = L2Bo(qy,dy);
2139  for (int qx = 0; qx < Q1D; ++qx)
2140  {
2141  aXY[qy][qx] += aX[qx] * wy;
2142  }
2143  }
2144  }
2145 
2146  for (int qz = 0; qz < Q1D; ++qz)
2147  {
2148  const double wz = L2Bo(qz,dz);
2149  for (int qy = 0; qy < Q1D; ++qy)
2150  {
2151  for (int qx = 0; qx < Q1D; ++qx)
2152  {
2153  div[qz][qy][qx] += aXY[qy][qx] * wz;
2154  }
2155  }
2156  }
2157  }
2158 
2159  // Apply D operator.
2160  for (int qz = 0; qz < Q1D; ++qz)
2161  {
2162  for (int qy = 0; qy < Q1D; ++qy)
2163  {
2164  for (int qx = 0; qx < Q1D; ++qx)
2165  {
2166  div[qz][qy][qx] *= op(qx,qy,qz,e);
2167  }
2168  }
2169  }
2170 
2171  for (int qz = 0; qz < Q1D; ++qz)
2172  {
2173  double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
2174 
2175  int osc = 0;
2176  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2177  {
2178  const int D1Dz = (c == 2) ? D1D : D1D - 1;
2179  const int D1Dy = (c == 1) ? D1D : D1D - 1;
2180  const int D1Dx = (c == 0) ? D1D : D1D - 1;
2181 
2182  for (int dy = 0; dy < D1Dy; ++dy)
2183  {
2184  for (int dx = 0; dx < D1Dx; ++dx)
2185  {
2186  aXY[dy][dx] = 0;
2187  }
2188  }
2189  for (int qy = 0; qy < Q1D; ++qy)
2190  {
2191  double aX[HDIV_MAX_D1D];
2192  for (int dx = 0; dx < D1Dx; ++dx)
2193  {
2194  aX[dx] = 0;
2195  }
2196  for (int qx = 0; qx < Q1D; ++qx)
2197  {
2198  for (int dx = 0; dx < D1Dx; ++dx)
2199  {
2200  aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx) :
2201  Bot(dx,qx));
2202  }
2203  }
2204  for (int dy = 0; dy < D1Dy; ++dy)
2205  {
2206  const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
2207  for (int dx = 0; dx < D1Dx; ++dx)
2208  {
2209  aXY[dy][dx] += aX[dx] * wy;
2210  }
2211  }
2212  }
2213 
2214  for (int dz = 0; dz < D1Dz; ++dz)
2215  {
2216  const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
2217  for (int dy = 0; dy < D1Dy; ++dy)
2218  {
2219  for (int dx = 0; dx < D1Dx; ++dx)
2220  {
2221  y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
2222  aXY[dy][dx] * wz;
2223  }
2224  }
2225  }
2226 
2227  osc += D1Dx * D1Dy * D1Dz;
2228  } // loop c
2229  } // loop qz
2230  }); // end of element loop
2231 }
2232 
2233 static void PAHdivL2ApplyTranspose2D(const int D1D,
2234  const int Q1D,
2235  const int L2D1D,
2236  const int NE,
2237  const Array<double> &L2Bo_,
2238  const Array<double> &Gct_,
2239  const Array<double> &Bot_,
2240  const Vector &op_,
2241  const Vector &x_,
2242  Vector &y_)
2243 {
2244  constexpr static int VDIM = 2;
2245  constexpr static int MAX_D1D = HDIV_MAX_D1D;
2246  constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
2247 
2248  auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2249  auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2250  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2251  auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
2252  auto x = Reshape(x_.Read(), L2D1D, L2D1D, NE);
2253  auto y = Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
2254 
2255  MFEM_FORALL(e, NE,
2256  {
2257  double div[MAX_Q1D][MAX_Q1D];
2258 
2259  for (int qy = 0; qy < Q1D; ++qy)
2260  {
2261  for (int qx = 0; qx < Q1D; ++qx)
2262  {
2263  div[qy][qx] = 0.0;
2264  }
2265  }
2266 
2267  for (int dy = 0; dy < L2D1D; ++dy)
2268  {
2269  double aX[MAX_Q1D];
2270  for (int qx = 0; qx < Q1D; ++qx)
2271  {
2272  aX[qx] = 0.0;
2273  }
2274 
2275  for (int dx = 0; dx < L2D1D; ++dx)
2276  {
2277  const double t = x(dx,dy,e);
2278  for (int qx = 0; qx < Q1D; ++qx)
2279  {
2280  aX[qx] += t * L2Bo(qx,dx);
2281  }
2282  }
2283 
2284  for (int qy = 0; qy < Q1D; ++qy)
2285  {
2286  const double wy = L2Bo(qy,dy);
2287  for (int qx = 0; qx < Q1D; ++qx)
2288  {
2289  div[qy][qx] += aX[qx] * wy;
2290  }
2291  }
2292  }
2293 
2294  // Apply D operator.
2295  for (int qy = 0; qy < Q1D; ++qy)
2296  {
2297  for (int qx = 0; qx < Q1D; ++qx)
2298  {
2299  div[qy][qx] *= op(qx,qy,e);
2300  }
2301  }
2302 
2303  for (int qy = 0; qy < Q1D; ++qy)
2304  {
2305  double aX[MAX_D1D];
2306 
2307  int osc = 0;
2308  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2309  {
2310  const int D1Dy = (c == 1) ? D1D : D1D - 1;
2311  const int D1Dx = (c == 0) ? D1D : D1D - 1;
2312 
2313  for (int dx = 0; dx < D1Dx; ++dx)
2314  {
2315  aX[dx] = 0;
2316  }
2317  for (int qx = 0; qx < Q1D; ++qx)
2318  {
2319  for (int dx = 0; dx < D1Dx; ++dx)
2320  {
2321  aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) : Bot(dx,qx));
2322  }
2323  }
2324  for (int dy = 0; dy < D1Dy; ++dy)
2325  {
2326  const double wy = (c == 0) ? Bot(dy,qy) : Gct(dy,qy);
2327  for (int dx = 0; dx < D1Dx; ++dx)
2328  {
2329  y(dx + (dy * D1Dx) + osc, e) += aX[dx] * wy;
2330  }
2331  }
2332 
2333  osc += D1Dx * D1Dy;
2334  } // loop c
2335  } // loop qy
2336  }); // end of element loop
2337 }
2338 
2339 void VectorFEDivergenceIntegrator::AddMultPA(const Vector &x, Vector &y) const
2340 {
2341  if (dim == 3)
2342  PAHdivL2Apply3D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G,
2343  L2mapsO->Bt, pa_data, x, y);
2344  else if (dim == 2)
2345  PAHdivL2Apply2D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G,
2346  L2mapsO->Bt, pa_data, x, y);
2347  else
2348  {
2349  MFEM_ABORT("Unsupported dimension!");
2350  }
2351 }
2352 
2353 void VectorFEDivergenceIntegrator::AddMultTransposePA(const Vector &x,
2354  Vector &y) const
2355 {
2356  if (dim == 3)
2357  PAHdivL2ApplyTranspose3D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2358  mapsC->Gt, mapsO->Bt, pa_data, x, y);
2359  else if (dim == 2)
2360  PAHdivL2ApplyTranspose2D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2361  mapsC->Gt, mapsO->Bt, pa_data, x, y);
2362  else
2363  {
2364  MFEM_ABORT("Unsupported dimension!");
2365  }
2366 }
2367 
2368 static void PAHdivL2AssembleDiagonal_ADAt_3D(const int D1D,
2369  const int Q1D,
2370  const int L2D1D,
2371  const int NE,
2372  const Array<double> &L2Bo_,
2373  const Array<double> &Gct_,
2374  const Array<double> &Bot_,
2375  const Vector &op_,
2376  const Vector &D_,
2377  Vector &diag_)
2378 {
2379  MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D");
2380  MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D");
2381  constexpr static int VDIM = 3;
2382 
2383  auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2384  auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2385  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2386  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
2387  auto D = Reshape(D_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
2388  auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE);
2389 
2390  MFEM_FORALL(e, NE,
2391  {
2392  for (int rz = 0; rz < L2D1D; ++rz)
2393  {
2394  for (int ry = 0; ry < L2D1D; ++ry)
2395  {
2396  for (int rx = 0; rx < L2D1D; ++rx)
2397  {
2398  // Compute row (rx,ry,rz), assuming all contributions are from
2399  // a single element.
2400 
2401  double row[3*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)*(HDIV_MAX_D1D-1)];
2402  double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2403 
2404  for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i)
2405  {
2406  row[i] = 0;
2407  }
2408 
2409  for (int qz = 0; qz < Q1D; ++qz)
2410  {
2411  for (int qy = 0; qy < Q1D; ++qy)
2412  {
2413  for (int qx = 0; qx < Q1D; ++qx)
2414  {
2415  div[qz][qy][qx] = op(qx,qy,qz,e) * L2Bo(qx,rx) *
2416  L2Bo(qy,ry) * L2Bo(qz,rz);
2417  }
2418  }
2419  }
2420 
2421  for (int qz = 0; qz < Q1D; ++qz)
2422  {
2423  double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
2424 
2425  int osc = 0;
2426  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2427  {
2428  const int D1Dz = (c == 2) ? D1D : D1D - 1;
2429  const int D1Dy = (c == 1) ? D1D : D1D - 1;
2430  const int D1Dx = (c == 0) ? D1D : D1D - 1;
2431 
2432  for (int dy = 0; dy < D1Dy; ++dy)
2433  {
2434  for (int dx = 0; dx < D1Dx; ++dx)
2435  {
2436  aXY[dy][dx] = 0;
2437  }
2438  }
2439  for (int qy = 0; qy < Q1D; ++qy)
2440  {
2441  double aX[HDIV_MAX_D1D];
2442  for (int dx = 0; dx < D1Dx; ++dx)
2443  {
2444  aX[dx] = 0;
2445  }
2446  for (int qx = 0; qx < Q1D; ++qx)
2447  {
2448  for (int dx = 0; dx < D1Dx; ++dx)
2449  {
2450  aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx)
2451  : Bot(dx,qx));
2452  }
2453  }
2454  for (int dy = 0; dy < D1Dy; ++dy)
2455  {
2456  const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
2457  for (int dx = 0; dx < D1Dx; ++dx)
2458  {
2459  aXY[dy][dx] += aX[dx] * wy;
2460  }
2461  }
2462  }
2463 
2464  for (int dz = 0; dz < D1Dz; ++dz)
2465  {
2466  const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
2467  for (int dy = 0; dy < D1Dy; ++dy)
2468  {
2469  for (int dx = 0; dx < D1Dx; ++dx)
2470  {
2471  row[dx + ((dy + (dz * D1Dy)) * D1Dx) + osc] +=
2472  aXY[dy][dx] * wz;
2473  }
2474  }
2475  }
2476 
2477  osc += D1Dx * D1Dy * D1Dz;
2478  } // loop c
2479  } // loop qz
2480 
2481  double val = 0.0;
2482  for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i)
2483  {
2484  val += row[i] * row[i] * D(i,e);
2485  }
2486  diag(rx,ry,rz,e) += val;
2487  } // loop rx
2488  } // loop ry
2489  } // loop rz
2490  }); // end of element loop
2491 }
2492 
2493 static void PAHdivL2AssembleDiagonal_ADAt_2D(const int D1D,
2494  const int Q1D,
2495  const int L2D1D,
2496  const int NE,
2497  const Array<double> &L2Bo_,
2498  const Array<double> &Gct_,
2499  const Array<double> &Bot_,
2500  const Vector &op_,
2501  const Vector &D_,
2502  Vector &diag_)
2503 {
2504  constexpr static int VDIM = 2;
2505 
2506  auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2507  auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2508  auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2509  auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
2510  auto D = Reshape(D_.Read(), 2*(D1D-1)*D1D, NE);
2511  auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, NE);
2512 
2513  MFEM_FORALL(e, NE,
2514  {
2515  for (int ry = 0; ry < L2D1D; ++ry)
2516  {
2517  for (int rx = 0; rx < L2D1D; ++rx)
2518  {
2519  // Compute row (rx,ry), assuming all contributions are from
2520  // a single element.
2521 
2522  double row[2*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)];
2523  double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2524 
2525  for (int i=0; i<2*D1D*(D1D - 1); ++i)
2526  {
2527  row[i] = 0;
2528  }
2529 
2530  for (int qy = 0; qy < Q1D; ++qy)
2531  {
2532  for (int qx = 0; qx < Q1D; ++qx)
2533  {
2534  div[qy][qx] = op(qx,qy,e) * L2Bo(qx,rx) * L2Bo(qy,ry);
2535  }
2536  }
2537 
2538  for (int qy = 0; qy < Q1D; ++qy)
2539  {
2540  int osc = 0;
2541  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2542  {
2543  const int D1Dy = (c == 1) ? D1D : D1D - 1;
2544  const int D1Dx = (c == 0) ? D1D : D1D - 1;
2545 
2546  double aX[HDIV_MAX_D1D];
2547  for (int dx = 0; dx < D1Dx; ++dx)
2548  {
2549  aX[dx] = 0;
2550  }
2551  for (int qx = 0; qx < Q1D; ++qx)
2552  {
2553  for (int dx = 0; dx < D1Dx; ++dx)
2554  {
2555  aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) :
2556  Bot(dx,qx));
2557  }
2558  }
2559 
2560  for (int dy = 0; dy < D1Dy; ++dy)
2561  {
2562  const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
2563 
2564  for (int dx = 0; dx < D1Dx; ++dx)
2565  {
2566  row[dx + (dy * D1Dx) + osc] += aX[dx] * wy;
2567  }
2568  }
2569 
2570  osc += D1Dx * D1Dy;
2571  } // loop c
2572  } // loop qy
2573 
2574  double val = 0.0;
2575  for (int i=0; i<2*D1D*(D1D - 1); ++i)
2576  {
2577  val += row[i] * row[i] * D(i,e);
2578  }
2579  diag(rx,ry,e) += val;
2580  } // loop rx
2581  } // loop ry
2582  }); // end of element loop
2583 }
2584 
2585 void VectorFEDivergenceIntegrator::AssembleDiagonalPA_ADAt(const Vector &D,
2586  Vector &diag)
2587 {
2588  if (dim == 3)
2589  PAHdivL2AssembleDiagonal_ADAt_3D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2590  mapsC->Gt, mapsO->Bt, pa_data, D, diag);
2591  else if (dim == 2)
2592  PAHdivL2AssembleDiagonal_ADAt_2D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2593  mapsC->Gt, mapsO->Bt, pa_data, D, diag);
2594  else
2595  {
2596  MFEM_ABORT("Unsupported dimension!");
2597  }
2598 }
2599 
2600 } // namespace mfem
constexpr int HDIV_MAX_D1D
Definition: bilininteg.hpp:31
void PAHdivMassApply2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Array< double > &Bot_, const Array< double > &Bct_, const Vector &op_, const Vector &x_, Vector &y_)
void PAHdivMassAssembleDiagonal3D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Vector &op_, Vector &diag_)
const int MAX_Q1D
Definition: forall.hpp:29
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:457
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:81
void PAHdivSetup2D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, Vector &coeff_, Vector &op)
void PAHdivMassAssembleDiagonal2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Vector &op_, Vector &diag_)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:465
const int MAX_D1D
Definition: forall.hpp:28
constexpr int dimension
This example only works in 3D. Kernels for 2D are not implemented.
Definition: hooke.cpp:45
RefCoord t[3]
constexpr int HDIV_MAX_Q1D
Definition: bilininteg.hpp:32
void SmemPAHdivMassApply2D(const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Array< double > &Bot_, const Array< double > &Bct_, const Vector &op_, const Vector &x_, Vector &y_, const int d1d=0, const int q1d=0)
Vector data type.
Definition: vector.hpp:60
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:449
void PAHdivSetup3D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, Vector &coeff_, Vector &op)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131