MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_hcurl.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "libceed/mass.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 void PAHcurlMassApply2D(const int D1D,
23  const int Q1D,
24  const int NE,
25  const bool symmetric,
26  const Array<double> &bo,
27  const Array<double> &bc,
28  const Array<double> &bot,
29  const Array<double> &bct,
30  const Vector &pa_data,
31  const Vector &x,
32  Vector &y)
33 {
34  constexpr static int VDIM = 2;
35  constexpr static int MAX_D1D = HCURL_MAX_D1D;
36  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
37 
38  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
39  auto Bc = Reshape(bc.Read(), Q1D, D1D);
40  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
41  auto Bct = Reshape(bct.Read(), D1D, Q1D);
42  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
43  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
44  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
45 
46  MFEM_FORALL(e, NE,
47  {
48  double mass[MAX_Q1D][MAX_Q1D][VDIM];
49 
50  for (int qy = 0; qy < Q1D; ++qy)
51  {
52  for (int qx = 0; qx < Q1D; ++qx)
53  {
54  for (int c = 0; c < VDIM; ++c)
55  {
56  mass[qy][qx][c] = 0.0;
57  }
58  }
59  }
60 
61  int osc = 0;
62 
63  for (int c = 0; c < VDIM; ++c) // loop over x, y components
64  {
65  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
66  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
67 
68  for (int dy = 0; dy < D1Dy; ++dy)
69  {
70  double massX[MAX_Q1D];
71  for (int qx = 0; qx < Q1D; ++qx)
72  {
73  massX[qx] = 0.0;
74  }
75 
76  for (int dx = 0; dx < D1Dx; ++dx)
77  {
78  const double t = X(dx + (dy * D1Dx) + osc, e);
79  for (int qx = 0; qx < Q1D; ++qx)
80  {
81  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
82  }
83  }
84 
85  for (int qy = 0; qy < Q1D; ++qy)
86  {
87  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
88  for (int qx = 0; qx < Q1D; ++qx)
89  {
90  mass[qy][qx][c] += massX[qx] * wy;
91  }
92  }
93  }
94 
95  osc += D1Dx * D1Dy;
96  } // loop (c) over components
97 
98  // Apply D operator.
99  for (int qy = 0; qy < Q1D; ++qy)
100  {
101  for (int qx = 0; qx < Q1D; ++qx)
102  {
103  const double O11 = op(qx,qy,0,e);
104  const double O21 = op(qx,qy,1,e);
105  const double O12 = symmetric ? O21 : op(qx,qy,2,e);
106  const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
107  const double massX = mass[qy][qx][0];
108  const double massY = mass[qy][qx][1];
109  mass[qy][qx][0] = (O11*massX)+(O12*massY);
110  mass[qy][qx][1] = (O21*massX)+(O22*massY);
111  }
112  }
113 
114  for (int qy = 0; qy < Q1D; ++qy)
115  {
116  osc = 0;
117 
118  for (int c = 0; c < VDIM; ++c) // loop over x, y components
119  {
120  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
121  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
122 
123  double massX[MAX_D1D];
124  for (int dx = 0; dx < D1Dx; ++dx)
125  {
126  massX[dx] = 0.0;
127  }
128  for (int qx = 0; qx < Q1D; ++qx)
129  {
130  for (int dx = 0; dx < D1Dx; ++dx)
131  {
132  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
133  }
134  }
135 
136  for (int dy = 0; dy < D1Dy; ++dy)
137  {
138  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
139 
140  for (int dx = 0; dx < D1Dx; ++dx)
141  {
142  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
143  }
144  }
145 
146  osc += D1Dx * D1Dy;
147  } // loop c
148  } // loop qy
149  }); // end of element loop
150 }
151 
152 void PAHcurlMassAssembleDiagonal2D(const int D1D,
153  const int Q1D,
154  const int NE,
155  const bool symmetric,
156  const Array<double> &bo,
157  const Array<double> &bc,
158  const Vector &pa_data,
159  Vector &diag)
160 {
161  constexpr static int VDIM = 2;
162  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
163 
164  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
165  auto Bc = Reshape(bc.Read(), Q1D, D1D);
166  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
167  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
168 
169  MFEM_FORALL(e, NE,
170  {
171  int osc = 0;
172 
173  for (int c = 0; c < VDIM; ++c) // loop over x, y components
174  {
175  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
176  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
177 
178  double mass[MAX_Q1D];
179 
180  for (int dy = 0; dy < D1Dy; ++dy)
181  {
182  for (int qx = 0; qx < Q1D; ++qx)
183  {
184  mass[qx] = 0.0;
185  for (int qy = 0; qy < Q1D; ++qy)
186  {
187  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
188 
189  mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
190  op(qx,qy,symmetric ? 2 : 3, e));
191  }
192  }
193 
194  for (int dx = 0; dx < D1Dx; ++dx)
195  {
196  for (int qx = 0; qx < Q1D; ++qx)
197  {
198  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
199  D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
200  }
201  }
202  }
203 
204  osc += D1Dx * D1Dy;
205  } // loop c
206  }); // end of element loop
207 }
208 
209 void PAHcurlMassAssembleDiagonal3D(const int D1D,
210  const int Q1D,
211  const int NE,
212  const bool symmetric,
213  const Array<double> &bo,
214  const Array<double> &bc,
215  const Vector &pa_data,
216  Vector &diag)
217 {
218  constexpr static int MAX_D1D = HCURL_MAX_D1D;
219  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
220 
221  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
222  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
223  constexpr static int VDIM = 3;
224 
225  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
226  auto Bc = Reshape(bc.Read(), Q1D, D1D);
227  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
228  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
229 
230  MFEM_FORALL(e, NE,
231  {
232  int osc = 0;
233 
234  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
235  {
236  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
237  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
238  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
239 
240  const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
241  (symmetric ? 5 : 8));
242 
243  double mass[MAX_Q1D];
244 
245  for (int dz = 0; dz < D1Dz; ++dz)
246  {
247  for (int dy = 0; dy < D1Dy; ++dy)
248  {
249  for (int qx = 0; qx < Q1D; ++qx)
250  {
251  mass[qx] = 0.0;
252  for (int qy = 0; qy < Q1D; ++qy)
253  {
254  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
255 
256  for (int qz = 0; qz < Q1D; ++qz)
257  {
258  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
259 
260  mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
261  }
262  }
263  }
264 
265  for (int dx = 0; dx < D1Dx; ++dx)
266  {
267  for (int qx = 0; qx < Q1D; ++qx)
268  {
269  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
270  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
271  }
272  }
273  }
274  }
275 
276  osc += D1Dx * D1Dy * D1Dz;
277  } // loop c
278  }); // end of element loop
279 }
280 
281 template<int T_D1D, int T_Q1D>
282 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
283  const int Q1D,
284  const int NE,
285  const bool symmetric,
286  const Array<double> &bo,
287  const Array<double> &bc,
288  const Vector &pa_data,
289  Vector &diag)
290 {
291  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
292  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
293 
294  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
295  auto Bc = Reshape(bc.Read(), Q1D, D1D);
296  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
297  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
298 
299  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
300  {
301  constexpr int VDIM = 3;
302  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
303  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
304 
305  MFEM_SHARED double sBo[tQ1D][tD1D];
306  MFEM_SHARED double sBc[tQ1D][tD1D];
307 
308  double op3[3];
309  MFEM_SHARED double sop[3][tQ1D][tQ1D];
310 
311  MFEM_FOREACH_THREAD(qx,x,Q1D)
312  {
313  MFEM_FOREACH_THREAD(qy,y,Q1D)
314  {
315  MFEM_FOREACH_THREAD(qz,z,Q1D)
316  {
317  op3[0] = op(qx,qy,qz,0,e);
318  op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
319  op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
320  }
321  }
322  }
323 
324  const int tidx = MFEM_THREAD_ID(x);
325  const int tidy = MFEM_THREAD_ID(y);
326  const int tidz = MFEM_THREAD_ID(z);
327 
328  if (tidz == 0)
329  {
330  MFEM_FOREACH_THREAD(d,y,D1D)
331  {
332  MFEM_FOREACH_THREAD(q,x,Q1D)
333  {
334  sBc[q][d] = Bc(q,d);
335  if (d < D1D-1)
336  {
337  sBo[q][d] = Bo(q,d);
338  }
339  }
340  }
341  }
342  MFEM_SYNC_THREAD;
343 
344  int osc = 0;
345  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
346  {
347  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
348  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
349  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
350 
351  double dxyz = 0.0;
352 
353  for (int qz=0; qz < Q1D; ++qz)
354  {
355  if (tidz == qz)
356  {
357  for (int i=0; i<3; ++i)
358  {
359  sop[i][tidx][tidy] = op3[i];
360  }
361  }
362 
363  MFEM_SYNC_THREAD;
364 
365  MFEM_FOREACH_THREAD(dz,z,D1Dz)
366  {
367  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
368 
369  MFEM_FOREACH_THREAD(dy,y,D1Dy)
370  {
371  MFEM_FOREACH_THREAD(dx,x,D1Dx)
372  {
373  for (int qy = 0; qy < Q1D; ++qy)
374  {
375  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
376 
377  for (int qx = 0; qx < Q1D; ++qx)
378  {
379  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
380  dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
381  }
382  }
383  }
384  }
385  }
386 
387  MFEM_SYNC_THREAD;
388  } // qz loop
389 
390  MFEM_FOREACH_THREAD(dz,z,D1Dz)
391  {
392  MFEM_FOREACH_THREAD(dy,y,D1Dy)
393  {
394  MFEM_FOREACH_THREAD(dx,x,D1Dx)
395  {
396  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
397  }
398  }
399  }
400 
401  osc += D1Dx * D1Dy * D1Dz;
402  } // c loop
403  }); // end of element loop
404 }
405 
406 void PAHcurlMassApply3D(const int D1D,
407  const int Q1D,
408  const int NE,
409  const bool symmetric,
410  const Array<double> &bo,
411  const Array<double> &bc,
412  const Array<double> &bot,
413  const Array<double> &bct,
414  const Vector &pa_data,
415  const Vector &x,
416  Vector &y)
417 {
418  constexpr static int MAX_D1D = HCURL_MAX_D1D;
419  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
420 
421  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
422  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
423  constexpr static int VDIM = 3;
424 
425  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
426  auto Bc = Reshape(bc.Read(), Q1D, D1D);
427  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
428  auto Bct = Reshape(bct.Read(), D1D, Q1D);
429  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
430  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
431  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
432 
433  MFEM_FORALL(e, NE,
434  {
435  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
436 
437  for (int qz = 0; qz < Q1D; ++qz)
438  {
439  for (int qy = 0; qy < Q1D; ++qy)
440  {
441  for (int qx = 0; qx < Q1D; ++qx)
442  {
443  for (int c = 0; c < VDIM; ++c)
444  {
445  mass[qz][qy][qx][c] = 0.0;
446  }
447  }
448  }
449  }
450 
451  int osc = 0;
452 
453  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
454  {
455  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
456  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
457  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
458 
459  for (int dz = 0; dz < D1Dz; ++dz)
460  {
461  double massXY[MAX_Q1D][MAX_Q1D];
462  for (int qy = 0; qy < Q1D; ++qy)
463  {
464  for (int qx = 0; qx < Q1D; ++qx)
465  {
466  massXY[qy][qx] = 0.0;
467  }
468  }
469 
470  for (int dy = 0; dy < D1Dy; ++dy)
471  {
472  double massX[MAX_Q1D];
473  for (int qx = 0; qx < Q1D; ++qx)
474  {
475  massX[qx] = 0.0;
476  }
477 
478  for (int dx = 0; dx < D1Dx; ++dx)
479  {
480  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
481  for (int qx = 0; qx < Q1D; ++qx)
482  {
483  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
484  }
485  }
486 
487  for (int qy = 0; qy < Q1D; ++qy)
488  {
489  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
490  for (int qx = 0; qx < Q1D; ++qx)
491  {
492  const double wx = massX[qx];
493  massXY[qy][qx] += wx * wy;
494  }
495  }
496  }
497 
498  for (int qz = 0; qz < Q1D; ++qz)
499  {
500  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
501  for (int qy = 0; qy < Q1D; ++qy)
502  {
503  for (int qx = 0; qx < Q1D; ++qx)
504  {
505  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
506  }
507  }
508  }
509  }
510 
511  osc += D1Dx * D1Dy * D1Dz;
512  } // loop (c) over components
513 
514  // Apply D operator.
515  for (int qz = 0; qz < Q1D; ++qz)
516  {
517  for (int qy = 0; qy < Q1D; ++qy)
518  {
519  for (int qx = 0; qx < Q1D; ++qx)
520  {
521  const double O11 = op(qx,qy,qz,0,e);
522  const double O12 = op(qx,qy,qz,1,e);
523  const double O13 = op(qx,qy,qz,2,e);
524  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
525  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
526  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
527  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
528  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
529  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
530  const double massX = mass[qz][qy][qx][0];
531  const double massY = mass[qz][qy][qx][1];
532  const double massZ = mass[qz][qy][qx][2];
533  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
534  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
535  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
536  }
537  }
538  }
539 
540  for (int qz = 0; qz < Q1D; ++qz)
541  {
542  double massXY[MAX_D1D][MAX_D1D];
543 
544  osc = 0;
545 
546  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
547  {
548  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
549  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
550  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
551 
552  for (int dy = 0; dy < D1Dy; ++dy)
553  {
554  for (int dx = 0; dx < D1Dx; ++dx)
555  {
556  massXY[dy][dx] = 0.0;
557  }
558  }
559  for (int qy = 0; qy < Q1D; ++qy)
560  {
561  double massX[MAX_D1D];
562  for (int dx = 0; dx < D1Dx; ++dx)
563  {
564  massX[dx] = 0;
565  }
566  for (int qx = 0; qx < Q1D; ++qx)
567  {
568  for (int dx = 0; dx < D1Dx; ++dx)
569  {
570  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
571  }
572  }
573  for (int dy = 0; dy < D1Dy; ++dy)
574  {
575  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
576  for (int dx = 0; dx < D1Dx; ++dx)
577  {
578  massXY[dy][dx] += massX[dx] * wy;
579  }
580  }
581  }
582 
583  for (int dz = 0; dz < D1Dz; ++dz)
584  {
585  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
586  for (int dy = 0; dy < D1Dy; ++dy)
587  {
588  for (int dx = 0; dx < D1Dx; ++dx)
589  {
590  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
591  }
592  }
593  }
594 
595  osc += D1Dx * D1Dy * D1Dz;
596  } // loop c
597  } // loop qz
598  }); // end of element loop
599 }
600 
601 template<int T_D1D, int T_Q1D>
602 void SmemPAHcurlMassApply3D(const int D1D,
603  const int Q1D,
604  const int NE,
605  const bool symmetric,
606  const Array<double> &bo,
607  const Array<double> &bc,
608  const Array<double> &bot,
609  const Array<double> &bct,
610  const Vector &pa_data,
611  const Vector &x,
612  Vector &y)
613 {
614  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
615  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
616 
617  const int dataSize = symmetric ? 6 : 9;
618 
619  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
620  auto Bc = Reshape(bc.Read(), Q1D, D1D);
621  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
622  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
623  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
624 
625  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
626  {
627  constexpr int VDIM = 3;
628  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
629  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
630 
631  MFEM_SHARED double sBo[tQ1D][tD1D];
632  MFEM_SHARED double sBc[tQ1D][tD1D];
633 
634  double op9[9];
635  MFEM_SHARED double sop[9*tQ1D*tQ1D];
636  MFEM_SHARED double mass[tQ1D][tQ1D][3];
637 
638  MFEM_SHARED double sX[tD1D][tD1D][tD1D];
639 
640  MFEM_FOREACH_THREAD(qx,x,Q1D)
641  {
642  MFEM_FOREACH_THREAD(qy,y,Q1D)
643  {
644  MFEM_FOREACH_THREAD(qz,z,Q1D)
645  {
646  for (int i=0; i<dataSize; ++i)
647  {
648  op9[i] = op(qx,qy,qz,i,e);
649  }
650  }
651  }
652  }
653 
654  const int tidx = MFEM_THREAD_ID(x);
655  const int tidy = MFEM_THREAD_ID(y);
656  const int tidz = MFEM_THREAD_ID(z);
657 
658  if (tidz == 0)
659  {
660  MFEM_FOREACH_THREAD(d,y,D1D)
661  {
662  MFEM_FOREACH_THREAD(q,x,Q1D)
663  {
664  sBc[q][d] = Bc(q,d);
665  if (d < D1D-1)
666  {
667  sBo[q][d] = Bo(q,d);
668  }
669  }
670  }
671  }
672  MFEM_SYNC_THREAD;
673 
674  for (int qz=0; qz < Q1D; ++qz)
675  {
676  int osc = 0;
677  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
678  {
679  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
680  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
681  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
682 
683  MFEM_FOREACH_THREAD(dz,z,D1Dz)
684  {
685  MFEM_FOREACH_THREAD(dy,y,D1Dy)
686  {
687  MFEM_FOREACH_THREAD(dx,x,D1Dx)
688  {
689  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
690  }
691  }
692  }
693  MFEM_SYNC_THREAD;
694 
695  if (tidz == qz)
696  {
697  for (int i=0; i<dataSize; ++i)
698  {
699  sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
700  }
701 
702  MFEM_FOREACH_THREAD(qy,y,Q1D)
703  {
704  MFEM_FOREACH_THREAD(qx,x,Q1D)
705  {
706  double u = 0.0;
707 
708  for (int dz = 0; dz < D1Dz; ++dz)
709  {
710  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
711  for (int dy = 0; dy < D1Dy; ++dy)
712  {
713  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
714  for (int dx = 0; dx < D1Dx; ++dx)
715  {
716  const double t = sX[dz][dy][dx];
717  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
718  u += t * wx * wy * wz;
719  }
720  }
721  }
722 
723  mass[qy][qx][c] = u;
724  } // qx
725  } // qy
726  } // tidz == qz
727 
728  osc += D1Dx * D1Dy * D1Dz;
729  MFEM_SYNC_THREAD;
730  } // c
731 
732  MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
733 
734  osc = 0;
735  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
736  {
737  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
738  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
739  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
740 
741  double dxyz = 0.0;
742 
743  MFEM_FOREACH_THREAD(dz,z,D1Dz)
744  {
745  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
746 
747  MFEM_FOREACH_THREAD(dy,y,D1Dy)
748  {
749  MFEM_FOREACH_THREAD(dx,x,D1Dx)
750  {
751  for (int qy = 0; qy < Q1D; ++qy)
752  {
753  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
754  for (int qx = 0; qx < Q1D; ++qx)
755  {
756  const int os = (dataSize*qx) + (dataSize*Q1D*qy);
757  const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
758  (symmetric ? 2 : 6))); // O11, O21, O31
759  const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
760  (symmetric ? 4 : 7))); // O12, O22, O32
761  const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
762  (symmetric ? 5 : 8))); // O13, O23, O33
763 
764  const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
765  (sop[id3] * mass[qy][qx][2]);
766 
767  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
768  dxyz += m_c * wx * wy * wz;
769  }
770  }
771  }
772  }
773  }
774 
775  MFEM_SYNC_THREAD;
776 
777  MFEM_FOREACH_THREAD(dz,z,D1Dz)
778  {
779  MFEM_FOREACH_THREAD(dy,y,D1Dy)
780  {
781  MFEM_FOREACH_THREAD(dx,x,D1Dx)
782  {
783  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
784  }
785  }
786  }
787 
788  osc += D1Dx * D1Dy * D1Dz;
789  } // c loop
790  } // qz
791  }); // end of element loop
792 }
793 
794 // PA H(curl) curl-curl assemble 2D kernel
795 static void PACurlCurlSetup2D(const int Q1D,
796  const int NE,
797  const Array<double> &w,
798  const Vector &j,
799  Vector &coeff,
800  Vector &op)
801 {
802  const int NQ = Q1D*Q1D;
803  auto W = w.Read();
804  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
805  auto C = Reshape(coeff.Read(), NQ, NE);
806  auto y = Reshape(op.Write(), NQ, NE);
807  MFEM_FORALL(e, NE,
808  {
809  for (int q = 0; q < NQ; ++q)
810  {
811  const double J11 = J(q,0,0,e);
812  const double J21 = J(q,1,0,e);
813  const double J12 = J(q,0,1,e);
814  const double J22 = J(q,1,1,e);
815  const double detJ = (J11*J22)-(J21*J12);
816  y(q,e) = W[q] * C(q,e) / detJ;
817  }
818  });
819 }
820 
821 // PA H(curl) curl-curl assemble 3D kernel
822 static void PACurlCurlSetup3D(const int Q1D,
823  const int coeffDim,
824  const int NE,
825  const Array<double> &w,
826  const Vector &j,
827  Vector &coeff,
828  Vector &op)
829 {
830  const int NQ = Q1D*Q1D*Q1D;
831  const bool symmetric = (coeffDim != 9);
832  auto W = w.Read();
833  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
834  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
835  auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
836 
837  MFEM_FORALL(e, NE,
838  {
839  for (int q = 0; q < NQ; ++q)
840  {
841  const double J11 = J(q,0,0,e);
842  const double J21 = J(q,1,0,e);
843  const double J31 = J(q,2,0,e);
844  const double J12 = J(q,0,1,e);
845  const double J22 = J(q,1,1,e);
846  const double J32 = J(q,2,1,e);
847  const double J13 = J(q,0,2,e);
848  const double J23 = J(q,1,2,e);
849  const double J33 = J(q,2,2,e);
850  const double detJ = J11 * (J22 * J33 - J32 * J23) -
851  /* */ J21 * (J12 * J33 - J32 * J13) +
852  /* */ J31 * (J12 * J23 - J22 * J13);
853 
854  const double c_detJ = W[q] / detJ;
855 
856  if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
857  {
858  // Set y to the 6 or 9 entries of J^T M J / det
859  const double M11 = C(0, q, e);
860  const double M12 = C(1, q, e);
861  const double M13 = C(2, q, e);
862  const double M21 = (!symmetric) ? C(3, q, e) : M12;
863  const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
864  const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
865  const double M31 = (!symmetric) ? C(6, q, e) : M13;
866  const double M32 = (!symmetric) ? C(7, q, e) : M23;
867  const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
868 
869  // First compute R = MJ
870  const double R11 = M11*J11 + M12*J21 + M13*J31;
871  const double R12 = M11*J12 + M12*J22 + M13*J32;
872  const double R13 = M11*J13 + M12*J23 + M13*J33;
873  const double R21 = M21*J11 + M22*J21 + M23*J31;
874  const double R22 = M21*J12 + M22*J22 + M23*J32;
875  const double R23 = M21*J13 + M22*J23 + M23*J33;
876  const double R31 = M31*J11 + M32*J21 + M33*J31;
877  const double R32 = M31*J12 + M32*J22 + M33*J32;
878  const double R33 = M31*J13 + M32*J23 + M33*J33;
879 
880  // Now set y to J^T R / det
881  y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
882  const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
883  y(q,1,e) = Y12; // 1,2
884  y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
885 
886  const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
887  const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
888  const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
889 
890  const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
891 
892  y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
893  y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
894  y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
895 
896  if (!symmetric)
897  {
898  y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
899  y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
900  y(q,8,e) = Y33; // 3,3
901  }
902  }
903  else // Vector or scalar coefficient version
904  {
905  // Set y to the 6 entries of J^T D J / det^2
906  const double D1 = C(0, q, e);
907  const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
908  const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
909 
910  y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
911  y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
912  y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
913  y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
914  y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
915  y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
916  }
917  }
918  });
919 }
920 
921 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
922 {
923  // Assumes tensor-product elements
924  Mesh *mesh = fes.GetMesh();
925  const FiniteElement *fel = fes.GetFE(0);
926 
927  const VectorTensorFiniteElement *el =
928  dynamic_cast<const VectorTensorFiniteElement*>(fel);
929  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
930 
931  const IntegrationRule *ir
932  = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
933  *mesh->GetElementTransformation(0));
934  const int dims = el->GetDim();
935  MFEM_VERIFY(dims == 2 || dims == 3, "");
936 
937  const int nq = ir->GetNPoints();
938  dim = mesh->Dimension();
939  MFEM_VERIFY(dim == 2 || dim == 3, "");
940 
941  const int dimc = (dim == 3) ? 3 : 1;
942 
943  ne = fes.GetNE();
944  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
945  mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
946  mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
947  dofs1D = mapsC->ndof;
948  quad1D = mapsC->nqpt;
949 
950  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
951 
952  const int MQsymmDim = MQ ? (MQ->GetWidth() * (MQ->GetWidth() + 1)) / 2 : 0;
953  const int MQfullDim = MQ ? (MQ->GetHeight() * MQ->GetWidth()) : 0;
954  const int MQdim = MQ ? (MQ->IsSymmetric() ? MQsymmDim : MQfullDim) : 0;
955  const int coeffDim = MQ ? MQdim : (DQ ? DQ->GetVDim() : 1);
956 
957  symmetric = MQ ? MQ->IsSymmetric() : true;
958 
959  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
960  const int ndata = (dim == 2) ? 1 : (symmetric ? symmDims : MQfullDim);
961  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
962 
963  Vector coeff(coeffDim * ne * nq);
964  coeff = 1.0;
965  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
966  if (Q || DQ || MQ)
967  {
968  Vector D(DQ ? coeffDim : 0);
969  DenseMatrix M;
970  Vector Msymm;
971  if (MQ)
972  {
973  if (symmetric)
974  {
975  Msymm.SetSize(MQsymmDim);
976  }
977  else
978  {
979  M.SetSize(dimc);
980  }
981  }
982 
983  if (DQ)
984  {
985  MFEM_VERIFY(coeffDim == dimc, "");
986  }
987  if (MQ)
988  {
989  MFEM_VERIFY(coeffDim == MQdim, "");
990  MFEM_VERIFY(MQ->GetHeight() == dimc && MQ->GetWidth() == dimc, "");
991  }
992 
993  for (int e=0; e<ne; ++e)
994  {
995  ElementTransformation *tr = mesh->GetElementTransformation(e);
996  for (int p=0; p<nq; ++p)
997  {
998  if (MQ)
999  {
1000  if (MQ->IsSymmetric())
1001  {
1002  MQ->EvalSymmetric(Msymm, *tr, ir->IntPoint(p));
1003 
1004  for (int i=0; i<MQsymmDim; ++i)
1005  {
1006  coeffh(i, p, e) = Msymm[i];
1007  }
1008  }
1009  else
1010  {
1011  MQ->Eval(M, *tr, ir->IntPoint(p));
1012 
1013  for (int i=0; i<dimc; ++i)
1014  for (int j=0; j<dimc; ++j)
1015  {
1016  coeffh(j+(i*dimc), p, e) = M(i,j);
1017  }
1018  }
1019  }
1020  else if (DQ)
1021  {
1022  DQ->Eval(D, *tr, ir->IntPoint(p));
1023  for (int i=0; i<coeffDim; ++i)
1024  {
1025  coeffh(i, p, e) = D[i];
1026  }
1027  }
1028  else
1029  {
1030  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
1031  }
1032  }
1033  }
1034  }
1035 
1036  if (el->GetDerivType() != mfem::FiniteElement::CURL)
1037  {
1038  MFEM_ABORT("Unknown kernel.");
1039  }
1040 
1041  if (dim == 3)
1042  {
1043  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
1044  pa_data);
1045  }
1046  else
1047  {
1048  PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1049  }
1050 }
1051 
1052 static void PACurlCurlApply2D(const int D1D,
1053  const int Q1D,
1054  const int NE,
1055  const Array<double> &bo,
1056  const Array<double> &bot,
1057  const Array<double> &gc,
1058  const Array<double> &gct,
1059  const Vector &pa_data,
1060  const Vector &x,
1061  Vector &y)
1062 {
1063  constexpr static int VDIM = 2;
1064  constexpr static int MAX_D1D = HCURL_MAX_D1D;
1065  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1066 
1067  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1068  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1069  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1070  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1071  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1072  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1073  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1074 
1075  MFEM_FORALL(e, NE,
1076  {
1077  double curl[MAX_Q1D][MAX_Q1D];
1078 
1079  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1080 
1081  for (int qy = 0; qy < Q1D; ++qy)
1082  {
1083  for (int qx = 0; qx < Q1D; ++qx)
1084  {
1085  curl[qy][qx] = 0.0;
1086  }
1087  }
1088 
1089  int osc = 0;
1090 
1091  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1092  {
1093  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1094  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1095 
1096  for (int dy = 0; dy < D1Dy; ++dy)
1097  {
1098  double gradX[MAX_Q1D];
1099  for (int qx = 0; qx < Q1D; ++qx)
1100  {
1101  gradX[qx] = 0;
1102  }
1103 
1104  for (int dx = 0; dx < D1Dx; ++dx)
1105  {
1106  const double t = X(dx + (dy * D1Dx) + osc, e);
1107  for (int qx = 0; qx < Q1D; ++qx)
1108  {
1109  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1110  }
1111  }
1112 
1113  for (int qy = 0; qy < Q1D; ++qy)
1114  {
1115  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1116  for (int qx = 0; qx < Q1D; ++qx)
1117  {
1118  curl[qy][qx] += gradX[qx] * wy;
1119  }
1120  }
1121  }
1122 
1123  osc += D1Dx * D1Dy;
1124  } // loop (c) over components
1125 
1126  // Apply D operator.
1127  for (int qy = 0; qy < Q1D; ++qy)
1128  {
1129  for (int qx = 0; qx < Q1D; ++qx)
1130  {
1131  curl[qy][qx] *= op(qx,qy,e);
1132  }
1133  }
1134 
1135  for (int qy = 0; qy < Q1D; ++qy)
1136  {
1137  osc = 0;
1138 
1139  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1140  {
1141  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1142  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1143 
1144  double gradX[MAX_D1D];
1145  for (int dx = 0; dx < D1Dx; ++dx)
1146  {
1147  gradX[dx] = 0.0;
1148  }
1149  for (int qx = 0; qx < Q1D; ++qx)
1150  {
1151  for (int dx = 0; dx < D1Dx; ++dx)
1152  {
1153  gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1154  }
1155  }
1156  for (int dy = 0; dy < D1Dy; ++dy)
1157  {
1158  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1159 
1160  for (int dx = 0; dx < D1Dx; ++dx)
1161  {
1162  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1163  }
1164  }
1165 
1166  osc += D1Dx * D1Dy;
1167  } // loop c
1168  } // loop qy
1169  }); // end of element loop
1170 }
1171 
1172 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1173 static void PACurlCurlApply3D(const int D1D,
1174  const int Q1D,
1175  const bool symmetric,
1176  const int NE,
1177  const Array<double> &bo,
1178  const Array<double> &bc,
1179  const Array<double> &bot,
1180  const Array<double> &bct,
1181  const Array<double> &gc,
1182  const Array<double> &gct,
1183  const Vector &pa_data,
1184  const Vector &x,
1185  Vector &y)
1186 {
1187  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1188  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1189  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1190  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1191  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1192  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1193  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1194 
1195  constexpr static int VDIM = 3;
1196 
1197  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1198  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1199  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1200  auto Bct = Reshape(bct.Read(), D1D, Q1D);
1201  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1202  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1203  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1204  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1205  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1206 
1207  MFEM_FORALL(e, NE,
1208  {
1209  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1210  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1211 
1212  for (int qz = 0; qz < Q1D; ++qz)
1213  {
1214  for (int qy = 0; qy < Q1D; ++qy)
1215  {
1216  for (int qx = 0; qx < Q1D; ++qx)
1217  {
1218  for (int c = 0; c < VDIM; ++c)
1219  {
1220  curl[qz][qy][qx][c] = 0.0;
1221  }
1222  }
1223  }
1224  }
1225 
1226  // We treat x, y, z components separately for optimization specific to each.
1227 
1228  int osc = 0;
1229 
1230  {
1231  // x component
1232  const int D1Dz = D1D;
1233  const int D1Dy = D1D;
1234  const int D1Dx = D1D - 1;
1235 
1236  for (int dz = 0; dz < D1Dz; ++dz)
1237  {
1238  double gradXY[MAX_Q1D][MAX_Q1D][2];
1239  for (int qy = 0; qy < Q1D; ++qy)
1240  {
1241  for (int qx = 0; qx < Q1D; ++qx)
1242  {
1243  for (int d = 0; d < 2; ++d)
1244  {
1245  gradXY[qy][qx][d] = 0.0;
1246  }
1247  }
1248  }
1249 
1250  for (int dy = 0; dy < D1Dy; ++dy)
1251  {
1252  double massX[MAX_Q1D];
1253  for (int qx = 0; qx < Q1D; ++qx)
1254  {
1255  massX[qx] = 0.0;
1256  }
1257 
1258  for (int dx = 0; dx < D1Dx; ++dx)
1259  {
1260  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1261  for (int qx = 0; qx < Q1D; ++qx)
1262  {
1263  massX[qx] += t * Bo(qx,dx);
1264  }
1265  }
1266 
1267  for (int qy = 0; qy < Q1D; ++qy)
1268  {
1269  const double wy = Bc(qy,dy);
1270  const double wDy = Gc(qy,dy);
1271  for (int qx = 0; qx < Q1D; ++qx)
1272  {
1273  const double wx = massX[qx];
1274  gradXY[qy][qx][0] += wx * wDy;
1275  gradXY[qy][qx][1] += wx * wy;
1276  }
1277  }
1278  }
1279 
1280  for (int qz = 0; qz < Q1D; ++qz)
1281  {
1282  const double wz = Bc(qz,dz);
1283  const double wDz = Gc(qz,dz);
1284  for (int qy = 0; qy < Q1D; ++qy)
1285  {
1286  for (int qx = 0; qx < Q1D; ++qx)
1287  {
1288  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1289  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1290  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1291  }
1292  }
1293  }
1294  }
1295 
1296  osc += D1Dx * D1Dy * D1Dz;
1297  }
1298 
1299  {
1300  // y component
1301  const int D1Dz = D1D;
1302  const int D1Dy = D1D - 1;
1303  const int D1Dx = D1D;
1304 
1305  for (int dz = 0; dz < D1Dz; ++dz)
1306  {
1307  double gradXY[MAX_Q1D][MAX_Q1D][2];
1308  for (int qy = 0; qy < Q1D; ++qy)
1309  {
1310  for (int qx = 0; qx < Q1D; ++qx)
1311  {
1312  for (int d = 0; d < 2; ++d)
1313  {
1314  gradXY[qy][qx][d] = 0.0;
1315  }
1316  }
1317  }
1318 
1319  for (int dx = 0; dx < D1Dx; ++dx)
1320  {
1321  double massY[MAX_Q1D];
1322  for (int qy = 0; qy < Q1D; ++qy)
1323  {
1324  massY[qy] = 0.0;
1325  }
1326 
1327  for (int dy = 0; dy < D1Dy; ++dy)
1328  {
1329  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1330  for (int qy = 0; qy < Q1D; ++qy)
1331  {
1332  massY[qy] += t * Bo(qy,dy);
1333  }
1334  }
1335 
1336  for (int qx = 0; qx < Q1D; ++qx)
1337  {
1338  const double wx = Bc(qx,dx);
1339  const double wDx = Gc(qx,dx);
1340  for (int qy = 0; qy < Q1D; ++qy)
1341  {
1342  const double wy = massY[qy];
1343  gradXY[qy][qx][0] += wDx * wy;
1344  gradXY[qy][qx][1] += wx * wy;
1345  }
1346  }
1347  }
1348 
1349  for (int qz = 0; qz < Q1D; ++qz)
1350  {
1351  const double wz = Bc(qz,dz);
1352  const double wDz = Gc(qz,dz);
1353  for (int qy = 0; qy < Q1D; ++qy)
1354  {
1355  for (int qx = 0; qx < Q1D; ++qx)
1356  {
1357  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1358  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1359  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1360  }
1361  }
1362  }
1363  }
1364 
1365  osc += D1Dx * D1Dy * D1Dz;
1366  }
1367 
1368  {
1369  // z component
1370  const int D1Dz = D1D - 1;
1371  const int D1Dy = D1D;
1372  const int D1Dx = D1D;
1373 
1374  for (int dx = 0; dx < D1Dx; ++dx)
1375  {
1376  double gradYZ[MAX_Q1D][MAX_Q1D][2];
1377  for (int qz = 0; qz < Q1D; ++qz)
1378  {
1379  for (int qy = 0; qy < Q1D; ++qy)
1380  {
1381  for (int d = 0; d < 2; ++d)
1382  {
1383  gradYZ[qz][qy][d] = 0.0;
1384  }
1385  }
1386  }
1387 
1388  for (int dy = 0; dy < D1Dy; ++dy)
1389  {
1390  double massZ[MAX_Q1D];
1391  for (int qz = 0; qz < Q1D; ++qz)
1392  {
1393  massZ[qz] = 0.0;
1394  }
1395 
1396  for (int dz = 0; dz < D1Dz; ++dz)
1397  {
1398  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1399  for (int qz = 0; qz < Q1D; ++qz)
1400  {
1401  massZ[qz] += t * Bo(qz,dz);
1402  }
1403  }
1404 
1405  for (int qy = 0; qy < Q1D; ++qy)
1406  {
1407  const double wy = Bc(qy,dy);
1408  const double wDy = Gc(qy,dy);
1409  for (int qz = 0; qz < Q1D; ++qz)
1410  {
1411  const double wz = massZ[qz];
1412  gradYZ[qz][qy][0] += wz * wy;
1413  gradYZ[qz][qy][1] += wz * wDy;
1414  }
1415  }
1416  }
1417 
1418  for (int qx = 0; qx < Q1D; ++qx)
1419  {
1420  const double wx = Bc(qx,dx);
1421  const double wDx = Gc(qx,dx);
1422 
1423  for (int qy = 0; qy < Q1D; ++qy)
1424  {
1425  for (int qz = 0; qz < Q1D; ++qz)
1426  {
1427  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1428  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1429  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1430  }
1431  }
1432  }
1433  }
1434  }
1435 
1436  // Apply D operator.
1437  for (int qz = 0; qz < Q1D; ++qz)
1438  {
1439  for (int qy = 0; qy < Q1D; ++qy)
1440  {
1441  for (int qx = 0; qx < Q1D; ++qx)
1442  {
1443  const double O11 = op(qx,qy,qz,0,e);
1444  const double O12 = op(qx,qy,qz,1,e);
1445  const double O13 = op(qx,qy,qz,2,e);
1446  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1447  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1448  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1449  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1450  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1451  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1452 
1453  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1454  (O13 * curl[qz][qy][qx][2]);
1455  const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1456  (O23 * curl[qz][qy][qx][2]);
1457  const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1458  (O33 * curl[qz][qy][qx][2]);
1459 
1460  curl[qz][qy][qx][0] = c1;
1461  curl[qz][qy][qx][1] = c2;
1462  curl[qz][qy][qx][2] = c3;
1463  }
1464  }
1465  }
1466 
1467  // x component
1468  osc = 0;
1469  {
1470  const int D1Dz = D1D;
1471  const int D1Dy = D1D;
1472  const int D1Dx = D1D - 1;
1473 
1474  for (int qz = 0; qz < Q1D; ++qz)
1475  {
1476  double gradXY12[MAX_D1D][MAX_D1D];
1477  double gradXY21[MAX_D1D][MAX_D1D];
1478 
1479  for (int dy = 0; dy < D1Dy; ++dy)
1480  {
1481  for (int dx = 0; dx < D1Dx; ++dx)
1482  {
1483  gradXY12[dy][dx] = 0.0;
1484  gradXY21[dy][dx] = 0.0;
1485  }
1486  }
1487  for (int qy = 0; qy < Q1D; ++qy)
1488  {
1489  double massX[MAX_D1D][2];
1490  for (int dx = 0; dx < D1Dx; ++dx)
1491  {
1492  for (int n = 0; n < 2; ++n)
1493  {
1494  massX[dx][n] = 0.0;
1495  }
1496  }
1497  for (int qx = 0; qx < Q1D; ++qx)
1498  {
1499  for (int dx = 0; dx < D1Dx; ++dx)
1500  {
1501  const double wx = Bot(dx,qx);
1502 
1503  massX[dx][0] += wx * curl[qz][qy][qx][1];
1504  massX[dx][1] += wx * curl[qz][qy][qx][2];
1505  }
1506  }
1507  for (int dy = 0; dy < D1Dy; ++dy)
1508  {
1509  const double wy = Bct(dy,qy);
1510  const double wDy = Gct(dy,qy);
1511 
1512  for (int dx = 0; dx < D1Dx; ++dx)
1513  {
1514  gradXY21[dy][dx] += massX[dx][0] * wy;
1515  gradXY12[dy][dx] += massX[dx][1] * wDy;
1516  }
1517  }
1518  }
1519 
1520  for (int dz = 0; dz < D1Dz; ++dz)
1521  {
1522  const double wz = Bct(dz,qz);
1523  const double wDz = Gct(dz,qz);
1524  for (int dy = 0; dy < D1Dy; ++dy)
1525  {
1526  for (int dx = 0; dx < D1Dx; ++dx)
1527  {
1528  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1529  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1530  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1531  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1532  }
1533  }
1534  }
1535  } // loop qz
1536 
1537  osc += D1Dx * D1Dy * D1Dz;
1538  }
1539 
1540  // y component
1541  {
1542  const int D1Dz = D1D;
1543  const int D1Dy = D1D - 1;
1544  const int D1Dx = D1D;
1545 
1546  for (int qz = 0; qz < Q1D; ++qz)
1547  {
1548  double gradXY02[MAX_D1D][MAX_D1D];
1549  double gradXY20[MAX_D1D][MAX_D1D];
1550 
1551  for (int dy = 0; dy < D1Dy; ++dy)
1552  {
1553  for (int dx = 0; dx < D1Dx; ++dx)
1554  {
1555  gradXY02[dy][dx] = 0.0;
1556  gradXY20[dy][dx] = 0.0;
1557  }
1558  }
1559  for (int qx = 0; qx < Q1D; ++qx)
1560  {
1561  double massY[MAX_D1D][2];
1562  for (int dy = 0; dy < D1Dy; ++dy)
1563  {
1564  massY[dy][0] = 0.0;
1565  massY[dy][1] = 0.0;
1566  }
1567  for (int qy = 0; qy < Q1D; ++qy)
1568  {
1569  for (int dy = 0; dy < D1Dy; ++dy)
1570  {
1571  const double wy = Bot(dy,qy);
1572 
1573  massY[dy][0] += wy * curl[qz][qy][qx][2];
1574  massY[dy][1] += wy * curl[qz][qy][qx][0];
1575  }
1576  }
1577  for (int dx = 0; dx < D1Dx; ++dx)
1578  {
1579  const double wx = Bct(dx,qx);
1580  const double wDx = Gct(dx,qx);
1581 
1582  for (int dy = 0; dy < D1Dy; ++dy)
1583  {
1584  gradXY02[dy][dx] += massY[dy][0] * wDx;
1585  gradXY20[dy][dx] += massY[dy][1] * wx;
1586  }
1587  }
1588  }
1589 
1590  for (int dz = 0; dz < D1Dz; ++dz)
1591  {
1592  const double wz = Bct(dz,qz);
1593  const double wDz = Gct(dz,qz);
1594  for (int dy = 0; dy < D1Dy; ++dy)
1595  {
1596  for (int dx = 0; dx < D1Dx; ++dx)
1597  {
1598  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1599  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1600  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1601  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1602  }
1603  }
1604  }
1605  } // loop qz
1606 
1607  osc += D1Dx * D1Dy * D1Dz;
1608  }
1609 
1610  // z component
1611  {
1612  const int D1Dz = D1D - 1;
1613  const int D1Dy = D1D;
1614  const int D1Dx = D1D;
1615 
1616  for (int qx = 0; qx < Q1D; ++qx)
1617  {
1618  double gradYZ01[MAX_D1D][MAX_D1D];
1619  double gradYZ10[MAX_D1D][MAX_D1D];
1620 
1621  for (int dy = 0; dy < D1Dy; ++dy)
1622  {
1623  for (int dz = 0; dz < D1Dz; ++dz)
1624  {
1625  gradYZ01[dz][dy] = 0.0;
1626  gradYZ10[dz][dy] = 0.0;
1627  }
1628  }
1629  for (int qy = 0; qy < Q1D; ++qy)
1630  {
1631  double massZ[MAX_D1D][2];
1632  for (int dz = 0; dz < D1Dz; ++dz)
1633  {
1634  for (int n = 0; n < 2; ++n)
1635  {
1636  massZ[dz][n] = 0.0;
1637  }
1638  }
1639  for (int qz = 0; qz < Q1D; ++qz)
1640  {
1641  for (int dz = 0; dz < D1Dz; ++dz)
1642  {
1643  const double wz = Bot(dz,qz);
1644 
1645  massZ[dz][0] += wz * curl[qz][qy][qx][0];
1646  massZ[dz][1] += wz * curl[qz][qy][qx][1];
1647  }
1648  }
1649  for (int dy = 0; dy < D1Dy; ++dy)
1650  {
1651  const double wy = Bct(dy,qy);
1652  const double wDy = Gct(dy,qy);
1653 
1654  for (int dz = 0; dz < D1Dz; ++dz)
1655  {
1656  gradYZ01[dz][dy] += wy * massZ[dz][1];
1657  gradYZ10[dz][dy] += wDy * massZ[dz][0];
1658  }
1659  }
1660  }
1661 
1662  for (int dx = 0; dx < D1Dx; ++dx)
1663  {
1664  const double wx = Bct(dx,qx);
1665  const double wDx = Gct(dx,qx);
1666 
1667  for (int dy = 0; dy < D1Dy; ++dy)
1668  {
1669  for (int dz = 0; dz < D1Dz; ++dz)
1670  {
1671  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1672  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1673  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1674  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1675  }
1676  }
1677  }
1678  } // loop qx
1679  }
1680  }); // end of element loop
1681 }
1682 
1683 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1684 static void SmemPACurlCurlApply3D(const int D1D,
1685  const int Q1D,
1686  const bool symmetric,
1687  const int NE,
1688  const Array<double> &bo,
1689  const Array<double> &bc,
1690  const Array<double> &bot,
1691  const Array<double> &bct,
1692  const Array<double> &gc,
1693  const Array<double> &gct,
1694  const Vector &pa_data,
1695  const Vector &x,
1696  Vector &y)
1697 {
1698  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1699  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1700  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1701  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1702  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1703  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1704  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1705 
1706  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1707  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1708  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1709  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1710  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1711  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1712 
1713  const int s = symmetric ? 6 : 9;
1714 
1715  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1716  {
1717  constexpr int VDIM = 3;
1718 
1719  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1720  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1721  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1722 
1723  double ope[9];
1724  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1725  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1726 
1727  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1728 
1729  MFEM_FOREACH_THREAD(qx,x,Q1D)
1730  {
1731  MFEM_FOREACH_THREAD(qy,y,Q1D)
1732  {
1733  MFEM_FOREACH_THREAD(qz,z,Q1D)
1734  {
1735  for (int i=0; i<s; ++i)
1736  {
1737  ope[i] = op(qx,qy,qz,i,e);
1738  }
1739  }
1740  }
1741  }
1742 
1743  const int tidx = MFEM_THREAD_ID(x);
1744  const int tidy = MFEM_THREAD_ID(y);
1745  const int tidz = MFEM_THREAD_ID(z);
1746 
1747  if (tidz == 0)
1748  {
1749  MFEM_FOREACH_THREAD(d,y,D1D)
1750  {
1751  MFEM_FOREACH_THREAD(q,x,Q1D)
1752  {
1753  sBc[d][q] = Bc(q,d);
1754  sGc[d][q] = Gc(q,d);
1755  if (d < D1D-1)
1756  {
1757  sBo[d][q] = Bo(q,d);
1758  }
1759  }
1760  }
1761  }
1762  MFEM_SYNC_THREAD;
1763 
1764  for (int qz=0; qz < Q1D; ++qz)
1765  {
1766  if (tidz == qz)
1767  {
1768  MFEM_FOREACH_THREAD(qy,y,Q1D)
1769  {
1770  MFEM_FOREACH_THREAD(qx,x,Q1D)
1771  {
1772  for (int i=0; i<3; ++i)
1773  {
1774  curl[qy][qx][i] = 0.0;
1775  }
1776  }
1777  }
1778  }
1779 
1780  int osc = 0;
1781  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1782  {
1783  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1784  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1785  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1786 
1787  MFEM_FOREACH_THREAD(dz,z,D1Dz)
1788  {
1789  MFEM_FOREACH_THREAD(dy,y,D1Dy)
1790  {
1791  MFEM_FOREACH_THREAD(dx,x,D1Dx)
1792  {
1793  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1794  }
1795  }
1796  }
1797  MFEM_SYNC_THREAD;
1798 
1799  if (tidz == qz)
1800  {
1801  if (c == 0)
1802  {
1803  for (int i=0; i<s; ++i)
1804  {
1805  sop[i][tidx][tidy] = ope[i];
1806  }
1807  }
1808 
1809  MFEM_FOREACH_THREAD(qy,y,Q1D)
1810  {
1811  MFEM_FOREACH_THREAD(qx,x,Q1D)
1812  {
1813  double u = 0.0;
1814  double v = 0.0;
1815 
1816  // We treat x, y, z components separately for optimization specific to each.
1817  if (c == 0) // x component
1818  {
1819  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1820 
1821  for (int dz = 0; dz < D1Dz; ++dz)
1822  {
1823  const double wz = sBc[dz][qz];
1824  const double wDz = sGc[dz][qz];
1825 
1826  for (int dy = 0; dy < D1Dy; ++dy)
1827  {
1828  const double wy = sBc[dy][qy];
1829  const double wDy = sGc[dy][qy];
1830 
1831  for (int dx = 0; dx < D1Dx; ++dx)
1832  {
1833  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1834  u += wx * wDy * wz;
1835  v += wx * wy * wDz;
1836  }
1837  }
1838  }
1839 
1840  curl[qy][qx][1] += v; // (u_0)_{x_2}
1841  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1842  }
1843  else if (c == 1) // y component
1844  {
1845  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1846 
1847  for (int dz = 0; dz < D1Dz; ++dz)
1848  {
1849  const double wz = sBc[dz][qz];
1850  const double wDz = sGc[dz][qz];
1851 
1852  for (int dy = 0; dy < D1Dy; ++dy)
1853  {
1854  const double wy = sBo[dy][qy];
1855 
1856  for (int dx = 0; dx < D1Dx; ++dx)
1857  {
1858  const double t = sX[dz][dy][dx];
1859  const double wx = t * sBc[dx][qx];
1860  const double wDx = t * sGc[dx][qx];
1861 
1862  u += wDx * wy * wz;
1863  v += wx * wy * wDz;
1864  }
1865  }
1866  }
1867 
1868  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1869  curl[qy][qx][2] += u; // (u_1)_{x_0}
1870  }
1871  else // z component
1872  {
1873  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1874 
1875  for (int dz = 0; dz < D1Dz; ++dz)
1876  {
1877  const double wz = sBo[dz][qz];
1878 
1879  for (int dy = 0; dy < D1Dy; ++dy)
1880  {
1881  const double wy = sBc[dy][qy];
1882  const double wDy = sGc[dy][qy];
1883 
1884  for (int dx = 0; dx < D1Dx; ++dx)
1885  {
1886  const double t = sX[dz][dy][dx];
1887  const double wx = t * sBc[dx][qx];
1888  const double wDx = t * sGc[dx][qx];
1889 
1890  u += wDx * wy * wz;
1891  v += wx * wDy * wz;
1892  }
1893  }
1894  }
1895 
1896  curl[qy][qx][0] += v; // (u_2)_{x_1}
1897  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1898  }
1899  } // qx
1900  } // qy
1901  } // tidz == qz
1902 
1903  osc += D1Dx * D1Dy * D1Dz;
1904  MFEM_SYNC_THREAD;
1905  } // c
1906 
1907  double dxyz1 = 0.0;
1908  double dxyz2 = 0.0;
1909  double dxyz3 = 0.0;
1910 
1911  MFEM_FOREACH_THREAD(dz,z,D1D)
1912  {
1913  const double wcz = sBc[dz][qz];
1914  const double wcDz = sGc[dz][qz];
1915  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1916 
1917  MFEM_FOREACH_THREAD(dy,y,D1D)
1918  {
1919  MFEM_FOREACH_THREAD(dx,x,D1D)
1920  {
1921  for (int qy = 0; qy < Q1D; ++qy)
1922  {
1923  const double wcy = sBc[dy][qy];
1924  const double wcDy = sGc[dy][qy];
1925  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1926 
1927  for (int qx = 0; qx < Q1D; ++qx)
1928  {
1929  const double O11 = sop[0][qx][qy];
1930  const double O12 = sop[1][qx][qy];
1931  const double O13 = sop[2][qx][qy];
1932  const double O21 = symmetric ? O12 : sop[3][qx][qy];
1933  const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1934  const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1935  const double O31 = symmetric ? O13 : sop[6][qx][qy];
1936  const double O32 = symmetric ? O23 : sop[7][qx][qy];
1937  const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1938 
1939  const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1940  (O13 * curl[qy][qx][2]);
1941  const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1942  (O23 * curl[qy][qx][2]);
1943  const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1944  (O33 * curl[qy][qx][2]);
1945 
1946  const double wcx = sBc[dx][qx];
1947  const double wDx = sGc[dx][qx];
1948 
1949  if (dx < D1D-1)
1950  {
1951  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1952  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1953  const double wx = sBo[dx][qx];
1954  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1955  }
1956 
1957  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1958  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1959  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1960 
1961  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1962  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1963  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1964  } // qx
1965  } // qy
1966  } // dx
1967  } // dy
1968  } // dz
1969 
1970  MFEM_SYNC_THREAD;
1971 
1972  MFEM_FOREACH_THREAD(dz,z,D1D)
1973  {
1974  MFEM_FOREACH_THREAD(dy,y,D1D)
1975  {
1976  MFEM_FOREACH_THREAD(dx,x,D1D)
1977  {
1978  if (dx < D1D-1)
1979  {
1980  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
1981  }
1982  if (dy < D1D-1)
1983  {
1984  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
1985  }
1986  if (dz < D1D-1)
1987  {
1988  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
1989  }
1990  }
1991  }
1992  }
1993  } // qz
1994  }); // end of element loop
1995 }
1996 
1997 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
1998 {
1999  if (dim == 3)
2000  {
2001  if (Device::Allows(Backend::DEVICE_MASK))
2002  {
2003  const int ID = (dofs1D << 4) | quad1D;
2004  switch (ID)
2005  {
2006  case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2007  mapsO->B, mapsC->B, mapsO->Bt,
2008  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2009  case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2010  mapsO->B, mapsC->B, mapsO->Bt,
2011  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2012  case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2013  mapsO->B,
2014  mapsC->B, mapsO->Bt,
2015  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2016  case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2017  mapsO->B, mapsC->B, mapsO->Bt,
2018  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2019  default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2020  mapsC->B, mapsO->Bt, mapsC->Bt,
2021  mapsC->G, mapsC->Gt, pa_data, x, y);
2022  }
2023  }
2024  else
2025  PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2026  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2027  }
2028  else if (dim == 2)
2029  {
2030  PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2031  mapsC->G, mapsC->Gt, pa_data, x, y);
2032  }
2033  else
2034  {
2035  MFEM_ABORT("Unsupported dimension!");
2036  }
2037 }
2038 
2039 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2040  const int Q1D,
2041  const int NE,
2042  const Array<double> &bo,
2043  const Array<double> &gc,
2044  const Vector &pa_data,
2045  Vector &diag)
2046 {
2047  constexpr static int VDIM = 2;
2048  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2049 
2050  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2051  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2052  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2053  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2054 
2055  MFEM_FORALL(e, NE,
2056  {
2057  int osc = 0;
2058 
2059  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2060  {
2061  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2062  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2063 
2064  double t[MAX_Q1D];
2065 
2066  for (int dy = 0; dy < D1Dy; ++dy)
2067  {
2068  for (int qx = 0; qx < Q1D; ++qx)
2069  {
2070  t[qx] = 0.0;
2071  for (int qy = 0; qy < Q1D; ++qy)
2072  {
2073  const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2074  t[qx] += wy * wy * op(qx,qy,e);
2075  }
2076  }
2077 
2078  for (int dx = 0; dx < D1Dx; ++dx)
2079  {
2080  for (int qx = 0; qx < Q1D; ++qx)
2081  {
2082  const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2083  D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2084  }
2085  }
2086  }
2087 
2088  osc += D1Dx * D1Dy;
2089  } // loop c
2090  }); // end of element loop
2091 }
2092 
2093 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2094 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2095  const int Q1D,
2096  const bool symmetric,
2097  const int NE,
2098  const Array<double> &bo,
2099  const Array<double> &bc,
2100  const Array<double> &go,
2101  const Array<double> &gc,
2102  const Vector &pa_data,
2103  Vector &diag)
2104 {
2105  constexpr static int VDIM = 3;
2106  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2107  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2108 
2109  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2110  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2111  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2112  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2113  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2114  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2115 
2116  const int s = symmetric ? 6 : 9;
2117  const int i11 = 0;
2118  const int i12 = 1;
2119  const int i13 = 2;
2120  const int i21 = symmetric ? i12 : 3;
2121  const int i22 = symmetric ? 3 : 4;
2122  const int i23 = symmetric ? 4 : 5;
2123  const int i31 = symmetric ? i13 : 6;
2124  const int i32 = symmetric ? i23 : 7;
2125  const int i33 = symmetric ? 5 : 8;
2126 
2127  MFEM_FORALL(e, NE,
2128  {
2129  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2130  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2131  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2132  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2133  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2134 
2135  // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2136  // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2137 
2138  int osc = 0;
2139 
2140  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2141  {
2142  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2143  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2144  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2145 
2146  double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2147 
2148  // z contraction
2149  for (int qx = 0; qx < Q1D; ++qx)
2150  {
2151  for (int qy = 0; qy < Q1D; ++qy)
2152  {
2153  for (int dz = 0; dz < D1Dz; ++dz)
2154  {
2155  for (int i=0; i<s; ++i)
2156  {
2157  for (int d=0; d<3; ++d)
2158  {
2159  zt[qx][qy][dz][i][d] = 0.0;
2160  }
2161  }
2162 
2163  for (int qz = 0; qz < Q1D; ++qz)
2164  {
2165  const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2166  const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2167 
2168  for (int i=0; i<s; ++i)
2169  {
2170  zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2171  zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2172  zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2173  }
2174  }
2175  }
2176  }
2177  } // end of z contraction
2178 
2179  double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2180 
2181  // y contraction
2182  for (int qx = 0; qx < Q1D; ++qx)
2183  {
2184  for (int dz = 0; dz < D1Dz; ++dz)
2185  {
2186  for (int dy = 0; dy < D1Dy; ++dy)
2187  {
2188  for (int i=0; i<s; ++i)
2189  {
2190  for (int d=0; d<3; ++d)
2191  for (int j=0; j<3; ++j)
2192  {
2193  yt[qx][dy][dz][i][d][j] = 0.0;
2194  }
2195  }
2196 
2197  for (int qy = 0; qy < Q1D; ++qy)
2198  {
2199  const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2200  const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2201 
2202  for (int i=0; i<s; ++i)
2203  {
2204  for (int d=0; d<3; ++d)
2205  {
2206  yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2207  yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2208  yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2209  }
2210  }
2211  }
2212  }
2213  }
2214  } // end of y contraction
2215 
2216  // x contraction
2217  for (int dz = 0; dz < D1Dz; ++dz)
2218  {
2219  for (int dy = 0; dy < D1Dy; ++dy)
2220  {
2221  for (int dx = 0; dx < D1Dx; ++dx)
2222  {
2223  for (int qx = 0; qx < Q1D; ++qx)
2224  {
2225  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2226  const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2227 
2228  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2229  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2230  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2231  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2232  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2233 
2234  /*
2235  const double O11 = op(q,0,e);
2236  const double O12 = op(q,1,e);
2237  const double O13 = op(q,2,e);
2238  const double O22 = op(q,3,e);
2239  const double O23 = op(q,4,e);
2240  const double O33 = op(q,5,e);
2241  */
2242 
2243  if (c == 0)
2244  {
2245  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2246  const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2247  - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2248 
2249  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2250  }
2251  else if (c == 1)
2252  {
2253  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2254  const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2255  - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2256  + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2257 
2258  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2259  }
2260  else
2261  {
2262  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2263  const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2264  - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2265  + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2266 
2267  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2268  }
2269  }
2270  }
2271  }
2272  } // end of x contraction
2273 
2274  osc += D1Dx * D1Dy * D1Dz;
2275  } // loop c
2276  }); // end of element loop
2277 }
2278 
2279 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2280 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2281  const int Q1D,
2282  const bool symmetric,
2283  const int NE,
2284  const Array<double> &bo,
2285  const Array<double> &bc,
2286  const Array<double> &go,
2287  const Array<double> &gc,
2288  const Vector &pa_data,
2289  Vector &diag)
2290 {
2291  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2292  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2293 
2294  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2295  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2296  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2297  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2298  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2299  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2300 
2301  const int s = symmetric ? 6 : 9;
2302  const int i11 = 0;
2303  const int i12 = 1;
2304  const int i13 = 2;
2305  const int i21 = symmetric ? i12 : 3;
2306  const int i22 = symmetric ? 3 : 4;
2307  const int i23 = symmetric ? 4 : 5;
2308  const int i31 = symmetric ? i13 : 6;
2309  const int i32 = symmetric ? i23 : 7;
2310  const int i33 = symmetric ? 5 : 8;
2311 
2312  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2313  {
2314  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2315  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2316  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2317  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2318  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2319 
2320  constexpr int VDIM = 3;
2321 
2322  MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2323  MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2324  MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2325  MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2326 
2327  double ope[9];
2328  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2329 
2330  MFEM_FOREACH_THREAD(qx,x,Q1D)
2331  {
2332  MFEM_FOREACH_THREAD(qy,y,Q1D)
2333  {
2334  MFEM_FOREACH_THREAD(qz,z,Q1D)
2335  {
2336  for (int i=0; i<s; ++i)
2337  {
2338  ope[i] = op(qx,qy,qz,i,e);
2339  }
2340  }
2341  }
2342  }
2343 
2344  const int tidx = MFEM_THREAD_ID(x);
2345  const int tidy = MFEM_THREAD_ID(y);
2346  const int tidz = MFEM_THREAD_ID(z);
2347 
2348  if (tidz == 0)
2349  {
2350  MFEM_FOREACH_THREAD(d,y,D1D)
2351  {
2352  MFEM_FOREACH_THREAD(q,x,Q1D)
2353  {
2354  sBc[q][d] = Bc(q,d);
2355  sGc[q][d] = Gc(q,d);
2356  if (d < D1D-1)
2357  {
2358  sBo[q][d] = Bo(q,d);
2359  sGo[q][d] = Go(q,d);
2360  }
2361  }
2362  }
2363  }
2364  MFEM_SYNC_THREAD;
2365 
2366  int osc = 0;
2367  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2368  {
2369  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2370  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2371  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2372 
2373  double dxyz = 0.0;
2374 
2375  for (int qz=0; qz < Q1D; ++qz)
2376  {
2377  if (tidz == qz)
2378  {
2379  for (int i=0; i<s; ++i)
2380  {
2381  sop[i][tidx][tidy] = ope[i];
2382  }
2383  }
2384 
2385  MFEM_SYNC_THREAD;
2386 
2387  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2388  {
2389  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2390  const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2391 
2392  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2393  {
2394  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2395  {
2396  for (int qy = 0; qy < Q1D; ++qy)
2397  {
2398  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2399  const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2400 
2401  for (int qx = 0; qx < Q1D; ++qx)
2402  {
2403  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2404  const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2405 
2406  if (c == 0)
2407  {
2408  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2409 
2410  // (u_0)_{x_2} O22 (u_0)_{x_2}
2411  dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2412 
2413  // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2414  dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2415 
2416  // (u_0)_{x_1} O33 (u_0)_{x_1}
2417  dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2418  }
2419  else if (c == 1)
2420  {
2421  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2422 
2423  // (u_1)_{x_2} O11 (u_1)_{x_2}
2424  dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2425 
2426  // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2427  dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2428 
2429  // (u_1)_{x_0} O33 (u_1)_{x_0})
2430  dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2431  }
2432  else
2433  {
2434  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2435 
2436  // (u_2)_{x_1} O11 (u_2)_{x_1}
2437  dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2438 
2439  // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2440  dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2441 
2442  // (u_2)_{x_0} O22 (u_2)_{x_0}
2443  dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2444  }
2445  }
2446  }
2447  }
2448  }
2449  }
2450 
2451  MFEM_SYNC_THREAD;
2452  } // qz loop
2453 
2454  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2455  {
2456  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2457  {
2458  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2459  {
2460  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2461  }
2462  }
2463  }
2464 
2465  osc += D1Dx * D1Dy * D1Dz;
2466  } // c loop
2467  }); // end of element loop
2468 }
2469 
2470 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2471 {
2472  if (dim == 3)
2473  {
2474  if (Device::Allows(Backend::DEVICE_MASK))
2475  {
2476  const int ID = (dofs1D << 4) | quad1D;
2477  switch (ID)
2478  {
2479  case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2480  symmetric, ne,
2481  mapsO->B, mapsC->B,
2482  mapsO->G, mapsC->G,
2483  pa_data, diag);
2484  case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2485  symmetric, ne,
2486  mapsO->B, mapsC->B,
2487  mapsO->G, mapsC->G,
2488  pa_data, diag);
2489  case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2490  symmetric, ne,
2491  mapsO->B, mapsC->B,
2492  mapsO->G, mapsC->G,
2493  pa_data, diag);
2494  case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2495  symmetric, ne,
2496  mapsO->B, mapsC->B,
2497  mapsO->G, mapsC->G,
2498  pa_data, diag);
2499  default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2500  mapsO->B, mapsC->B,
2501  mapsO->G, mapsC->G,
2502  pa_data, diag);
2503  }
2504  }
2505  else
2506  PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2507  mapsO->B, mapsC->B,
2508  mapsO->G, mapsC->G,
2509  pa_data, diag);
2510  }
2511  else if (dim == 2)
2512  {
2513  PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2514  mapsO->B, mapsC->G, pa_data, diag);
2515  }
2516  else
2517  {
2518  MFEM_ABORT("Unsupported dimension!");
2519  }
2520 }
2521 
2522 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2523 // integrated against H(curl) test functions corresponding to y.
2524 void PAHcurlH1Apply3D(const int D1D,
2525  const int Q1D,
2526  const int NE,
2527  const Array<double> &bc,
2528  const Array<double> &gc,
2529  const Array<double> &bot,
2530  const Array<double> &bct,
2531  const Vector &pa_data,
2532  const Vector &x,
2533  Vector &y)
2534 {
2535  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2536  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2537 
2538  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2539  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2540 
2541  constexpr static int VDIM = 3;
2542 
2543  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2544  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2545  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2546  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2547  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2548  auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2549  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2550 
2551  MFEM_FORALL(e, NE,
2552  {
2553  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2554 
2555  for (int qz = 0; qz < Q1D; ++qz)
2556  {
2557  for (int qy = 0; qy < Q1D; ++qy)
2558  {
2559  for (int qx = 0; qx < Q1D; ++qx)
2560  {
2561  for (int c = 0; c < VDIM; ++c)
2562  {
2563  mass[qz][qy][qx][c] = 0.0;
2564  }
2565  }
2566  }
2567  }
2568 
2569  for (int dz = 0; dz < D1D; ++dz)
2570  {
2571  double gradXY[MAX_Q1D][MAX_Q1D][3];
2572  for (int qy = 0; qy < Q1D; ++qy)
2573  {
2574  for (int qx = 0; qx < Q1D; ++qx)
2575  {
2576  gradXY[qy][qx][0] = 0.0;
2577  gradXY[qy][qx][1] = 0.0;
2578  gradXY[qy][qx][2] = 0.0;
2579  }
2580  }
2581  for (int dy = 0; dy < D1D; ++dy)
2582  {
2583  double gradX[MAX_Q1D][2];
2584  for (int qx = 0; qx < Q1D; ++qx)
2585  {
2586  gradX[qx][0] = 0.0;
2587  gradX[qx][1] = 0.0;
2588  }
2589  for (int dx = 0; dx < D1D; ++dx)
2590  {
2591  const double s = X(dx,dy,dz,e);
2592  for (int qx = 0; qx < Q1D; ++qx)
2593  {
2594  gradX[qx][0] += s * Bc(qx,dx);
2595  gradX[qx][1] += s * Gc(qx,dx);
2596  }
2597  }
2598  for (int qy = 0; qy < Q1D; ++qy)
2599  {
2600  const double wy = Bc(qy,dy);
2601  const double wDy = Gc(qy,dy);
2602  for (int qx = 0; qx < Q1D; ++qx)
2603  {
2604  const double wx = gradX[qx][0];
2605  const double wDx = gradX[qx][1];
2606  gradXY[qy][qx][0] += wDx * wy;
2607  gradXY[qy][qx][1] += wx * wDy;
2608  gradXY[qy][qx][2] += wx * wy;
2609  }
2610  }
2611  }
2612  for (int qz = 0; qz < Q1D; ++qz)
2613  {
2614  const double wz = Bc(qz,dz);
2615  const double wDz = Gc(qz,dz);
2616  for (int qy = 0; qy < Q1D; ++qy)
2617  {
2618  for (int qx = 0; qx < Q1D; ++qx)
2619  {
2620  mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2621  mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2622  mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2623  }
2624  }
2625  }
2626  }
2627 
2628  // Apply D operator.
2629  for (int qz = 0; qz < Q1D; ++qz)
2630  {
2631  for (int qy = 0; qy < Q1D; ++qy)
2632  {
2633  for (int qx = 0; qx < Q1D; ++qx)
2634  {
2635  const double O11 = op(qx,qy,qz,0,e);
2636  const double O12 = op(qx,qy,qz,1,e);
2637  const double O13 = op(qx,qy,qz,2,e);
2638  const double O22 = op(qx,qy,qz,3,e);
2639  const double O23 = op(qx,qy,qz,4,e);
2640  const double O33 = op(qx,qy,qz,5,e);
2641  const double massX = mass[qz][qy][qx][0];
2642  const double massY = mass[qz][qy][qx][1];
2643  const double massZ = mass[qz][qy][qx][2];
2644  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2645  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2646  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2647  }
2648  }
2649  }
2650 
2651  for (int qz = 0; qz < Q1D; ++qz)
2652  {
2653  double massXY[MAX_D1D][MAX_D1D];
2654 
2655  int osc = 0;
2656 
2657  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2658  {
2659  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2660  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2661  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2662 
2663  for (int dy = 0; dy < D1Dy; ++dy)
2664  {
2665  for (int dx = 0; dx < D1Dx; ++dx)
2666  {
2667  massXY[dy][dx] = 0.0;
2668  }
2669  }
2670  for (int qy = 0; qy < Q1D; ++qy)
2671  {
2672  double massX[MAX_D1D];
2673  for (int dx = 0; dx < D1Dx; ++dx)
2674  {
2675  massX[dx] = 0;
2676  }
2677  for (int qx = 0; qx < Q1D; ++qx)
2678  {
2679  for (int dx = 0; dx < D1Dx; ++dx)
2680  {
2681  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2682  }
2683  }
2684  for (int dy = 0; dy < D1Dy; ++dy)
2685  {
2686  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2687  for (int dx = 0; dx < D1Dx; ++dx)
2688  {
2689  massXY[dy][dx] += massX[dx] * wy;
2690  }
2691  }
2692  }
2693 
2694  for (int dz = 0; dz < D1Dz; ++dz)
2695  {
2696  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2697  for (int dy = 0; dy < D1Dy; ++dy)
2698  {
2699  for (int dx = 0; dx < D1Dx; ++dx)
2700  {
2701  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2702  }
2703  }
2704  }
2705 
2706  osc += D1Dx * D1Dy * D1Dz;
2707  } // loop c
2708  } // loop qz
2709  }); // end of element loop
2710 }
2711 
2712 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2713 // integrated against H(curl) test functions corresponding to y.
2714 void PAHcurlH1Apply2D(const int D1D,
2715  const int Q1D,
2716  const int NE,
2717  const Array<double> &bc,
2718  const Array<double> &gc,
2719  const Array<double> &bot,
2720  const Array<double> &bct,
2721  const Vector &pa_data,
2722  const Vector &x,
2723  Vector &y)
2724 {
2725  constexpr static int VDIM = 2;
2726  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2727  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2728 
2729  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2730  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2731  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2732  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2733  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
2734  auto X = Reshape(x.Read(), D1D, D1D, NE);
2735  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2736 
2737  MFEM_FORALL(e, NE,
2738  {
2739  double mass[MAX_Q1D][MAX_Q1D][VDIM];
2740 
2741  for (int qy = 0; qy < Q1D; ++qy)
2742  {
2743  for (int qx = 0; qx < Q1D; ++qx)
2744  {
2745  for (int c = 0; c < VDIM; ++c)
2746  {
2747  mass[qy][qx][c] = 0.0;
2748  }
2749  }
2750  }
2751 
2752  for (int dy = 0; dy < D1D; ++dy)
2753  {
2754  double gradX[MAX_Q1D][2];
2755  for (int qx = 0; qx < Q1D; ++qx)
2756  {
2757  gradX[qx][0] = 0.0;
2758  gradX[qx][1] = 0.0;
2759  }
2760  for (int dx = 0; dx < D1D; ++dx)
2761  {
2762  const double s = X(dx,dy,e);
2763  for (int qx = 0; qx < Q1D; ++qx)
2764  {
2765  gradX[qx][0] += s * Bc(qx,dx);
2766  gradX[qx][1] += s * Gc(qx,dx);
2767  }
2768  }
2769  for (int qy = 0; qy < Q1D; ++qy)
2770  {
2771  const double wy = Bc(qy,dy);
2772  const double wDy = Gc(qy,dy);
2773  for (int qx = 0; qx < Q1D; ++qx)
2774  {
2775  const double wx = gradX[qx][0];
2776  const double wDx = gradX[qx][1];
2777  mass[qy][qx][0] += wDx * wy;
2778  mass[qy][qx][1] += wx * wDy;
2779  }
2780  }
2781  }
2782 
2783  // Apply D operator.
2784  for (int qy = 0; qy < Q1D; ++qy)
2785  {
2786  for (int qx = 0; qx < Q1D; ++qx)
2787  {
2788  const double O11 = op(qx,qy,0,e);
2789  const double O12 = op(qx,qy,1,e);
2790  const double O22 = op(qx,qy,2,e);
2791  const double massX = mass[qy][qx][0];
2792  const double massY = mass[qy][qx][1];
2793  mass[qy][qx][0] = (O11*massX)+(O12*massY);
2794  mass[qy][qx][1] = (O12*massX)+(O22*massY);
2795  }
2796  }
2797 
2798  for (int qy = 0; qy < Q1D; ++qy)
2799  {
2800  int osc = 0;
2801 
2802  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2803  {
2804  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2805  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2806 
2807  double massX[MAX_D1D];
2808  for (int dx = 0; dx < D1Dx; ++dx)
2809  {
2810  massX[dx] = 0.0;
2811  }
2812  for (int qx = 0; qx < Q1D; ++qx)
2813  {
2814  for (int dx = 0; dx < D1Dx; ++dx)
2815  {
2816  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2817  }
2818  }
2819 
2820  for (int dy = 0; dy < D1Dy; ++dy)
2821  {
2822  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2823 
2824  for (int dx = 0; dx < D1Dx; ++dx)
2825  {
2826  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
2827  }
2828  }
2829 
2830  osc += D1Dx * D1Dy;
2831  } // loop c
2832  }
2833  }); // end of element loop
2834 }
2835 
2836 // PA H(curl) assemble kernel
2837 void PAHcurlL2Setup(const int NQ,
2838  const int coeffDim,
2839  const int NE,
2840  const Array<double> &w,
2841  Vector &coeff,
2842  Vector &op)
2843 {
2844  auto W = w.Read();
2845  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
2846  auto y = Reshape(op.Write(), coeffDim, NQ, NE);
2847 
2848  MFEM_FORALL(e, NE,
2849  {
2850  for (int q = 0; q < NQ; ++q)
2851  {
2852  for (int c=0; c<coeffDim; ++c)
2853  {
2854  y(c,q,e) = W[q] * C(c,q,e);
2855  }
2856  }
2857  });
2858 }
2859 
2860 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
2861  const FiniteElementSpace &test_fes)
2862 {
2863  // Assumes tensor-product elements, with vector test and trial spaces.
2864  Mesh *mesh = trial_fes.GetMesh();
2865  const FiniteElement *trial_fel = trial_fes.GetFE(0);
2866  const FiniteElement *test_fel = test_fes.GetFE(0);
2867 
2868  const VectorTensorFiniteElement *trial_el =
2869  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
2870  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
2871 
2872  const VectorTensorFiniteElement *test_el =
2873  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
2874  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
2875 
2876  const IntegrationRule *ir
2877  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
2878  *mesh->GetElementTransformation(0));
2879  const int dims = trial_el->GetDim();
2880  MFEM_VERIFY(dims == 3, "");
2881 
2882  const int nq = ir->GetNPoints();
2883  dim = mesh->Dimension();
2884  MFEM_VERIFY(dim == 3, "");
2885 
2886  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
2887 
2888  ne = trial_fes.GetNE();
2889  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
2890  mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2891  mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2892  mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2893  mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2894  dofs1D = mapsC->ndof;
2895  quad1D = mapsC->nqpt;
2896  dofs1Dtest = mapsCtest->ndof;
2897 
2898  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
2899 
2900  testType = test_el->GetDerivType();
2901  trialType = trial_el->GetDerivType();
2902 
2903  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
2904  coeffDim = (DQ ? 3 : 1);
2905 
2906  pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType());
2907 
2908  Vector coeff(coeffDim * nq * ne);
2909  coeff = 1.0;
2910  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
2911  if (Q || DQ)
2912  {
2913  Vector V(coeffDim);
2914  if (DQ)
2915  {
2916  MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
2917  }
2918 
2919  for (int e=0; e<ne; ++e)
2920  {
2921  ElementTransformation *tr = mesh->GetElementTransformation(e);
2922 
2923  for (int p=0; p<nq; ++p)
2924  {
2925  if (DQ)
2926  {
2927  DQ->Eval(V, *tr, ir->IntPoint(p));
2928  for (int i=0; i<coeffDim; ++i)
2929  {
2930  coeffh(i, p, e) = V[i];
2931  }
2932  }
2933  else
2934  {
2935  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
2936  }
2937  }
2938  }
2939  }
2940 
2941  if (testType == mfem::FiniteElement::CURL &&
2942  trialType == mfem::FiniteElement::CURL && dim == 3)
2943  {
2944  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
2945  }
2946  else if (testType == mfem::FiniteElement::DIV &&
2947  trialType == mfem::FiniteElement::CURL && dim == 3 &&
2948  test_fel->GetOrder() == trial_fel->GetOrder())
2949  {
2950  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
2951  pa_data);
2952  }
2953  else
2954  {
2955  MFEM_ABORT("Unknown kernel.");
2956  }
2957 }
2958 
2959 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
2960 // integrated against H(curl) test functions corresponding to y.
2961 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2962 static void PAHcurlL2Apply3D(const int D1D,
2963  const int Q1D,
2964  const int coeffDim,
2965  const int NE,
2966  const Array<double> &bo,
2967  const Array<double> &bc,
2968  const Array<double> &bot,
2969  const Array<double> &bct,
2970  const Array<double> &gc,
2971  const Vector &pa_data,
2972  const Vector &x,
2973  Vector &y)
2974 {
2975  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2976  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2977  // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2978  // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
2979  // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
2980  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2981  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2982  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2983 
2984  constexpr static int VDIM = 3;
2985 
2986  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2987  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2988  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2989  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2990  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2991  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
2992  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2993  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2994 
2995  MFEM_FORALL(e, NE,
2996  {
2997  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2998  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
2999 
3000  for (int qz = 0; qz < Q1D; ++qz)
3001  {
3002  for (int qy = 0; qy < Q1D; ++qy)
3003  {
3004  for (int qx = 0; qx < Q1D; ++qx)
3005  {
3006  for (int c = 0; c < VDIM; ++c)
3007  {
3008  curl[qz][qy][qx][c] = 0.0;
3009  }
3010  }
3011  }
3012  }
3013 
3014  // We treat x, y, z components separately for optimization specific to each.
3015 
3016  int osc = 0;
3017 
3018  {
3019  // x component
3020  const int D1Dz = D1D;
3021  const int D1Dy = D1D;
3022  const int D1Dx = D1D - 1;
3023 
3024  for (int dz = 0; dz < D1Dz; ++dz)
3025  {
3026  double gradXY[MAX_Q1D][MAX_Q1D][2];
3027  for (int qy = 0; qy < Q1D; ++qy)
3028  {
3029  for (int qx = 0; qx < Q1D; ++qx)
3030  {
3031  for (int d = 0; d < 2; ++d)
3032  {
3033  gradXY[qy][qx][d] = 0.0;
3034  }
3035  }
3036  }
3037 
3038  for (int dy = 0; dy < D1Dy; ++dy)
3039  {
3040  double massX[MAX_Q1D];
3041  for (int qx = 0; qx < Q1D; ++qx)
3042  {
3043  massX[qx] = 0.0;
3044  }
3045 
3046  for (int dx = 0; dx < D1Dx; ++dx)
3047  {
3048  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3049  for (int qx = 0; qx < Q1D; ++qx)
3050  {
3051  massX[qx] += t * Bo(qx,dx);
3052  }
3053  }
3054 
3055  for (int qy = 0; qy < Q1D; ++qy)
3056  {
3057  const double wy = Bc(qy,dy);
3058  const double wDy = Gc(qy,dy);
3059  for (int qx = 0; qx < Q1D; ++qx)
3060  {
3061  const double wx = massX[qx];
3062  gradXY[qy][qx][0] += wx * wDy;
3063  gradXY[qy][qx][1] += wx * wy;
3064  }
3065  }
3066  }
3067 
3068  for (int qz = 0; qz < Q1D; ++qz)
3069  {
3070  const double wz = Bc(qz,dz);
3071  const double wDz = Gc(qz,dz);
3072  for (int qy = 0; qy < Q1D; ++qy)
3073  {
3074  for (int qx = 0; qx < Q1D; ++qx)
3075  {
3076  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3077  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3078  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3079  }
3080  }
3081  }
3082  }
3083 
3084  osc += D1Dx * D1Dy * D1Dz;
3085  }
3086 
3087  {
3088  // y component
3089  const int D1Dz = D1D;
3090  const int D1Dy = D1D - 1;
3091  const int D1Dx = D1D;
3092 
3093  for (int dz = 0; dz < D1Dz; ++dz)
3094  {
3095  double gradXY[MAX_Q1D][MAX_Q1D][2];
3096  for (int qy = 0; qy < Q1D; ++qy)
3097  {
3098  for (int qx = 0; qx < Q1D; ++qx)
3099  {
3100  for (int d = 0; d < 2; ++d)
3101  {
3102  gradXY[qy][qx][d] = 0.0;
3103  }
3104  }
3105  }
3106 
3107  for (int dx = 0; dx < D1Dx; ++dx)
3108  {
3109  double massY[MAX_Q1D];
3110  for (int qy = 0; qy < Q1D; ++qy)
3111  {
3112  massY[qy] = 0.0;
3113  }
3114 
3115  for (int dy = 0; dy < D1Dy; ++dy)
3116  {
3117  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3118  for (int qy = 0; qy < Q1D; ++qy)
3119  {
3120  massY[qy] += t * Bo(qy,dy);
3121  }
3122  }
3123 
3124  for (int qx = 0; qx < Q1D; ++qx)
3125  {
3126  const double wx = Bc(qx,dx);
3127  const double wDx = Gc(qx,dx);
3128  for (int qy = 0; qy < Q1D; ++qy)
3129  {
3130  const double wy = massY[qy];
3131  gradXY[qy][qx][0] += wDx * wy;
3132  gradXY[qy][qx][1] += wx * wy;
3133  }
3134  }
3135  }
3136 
3137  for (int qz = 0; qz < Q1D; ++qz)
3138  {
3139  const double wz = Bc(qz,dz);
3140  const double wDz = Gc(qz,dz);
3141  for (int qy = 0; qy < Q1D; ++qy)
3142  {
3143  for (int qx = 0; qx < Q1D; ++qx)
3144  {
3145  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3146  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3147  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3148  }
3149  }
3150  }
3151  }
3152 
3153  osc += D1Dx * D1Dy * D1Dz;
3154  }
3155 
3156  {
3157  // z component
3158  const int D1Dz = D1D - 1;
3159  const int D1Dy = D1D;
3160  const int D1Dx = D1D;
3161 
3162  for (int dx = 0; dx < D1Dx; ++dx)
3163  {
3164  double gradYZ[MAX_Q1D][MAX_Q1D][2];
3165  for (int qz = 0; qz < Q1D; ++qz)
3166  {
3167  for (int qy = 0; qy < Q1D; ++qy)
3168  {
3169  for (int d = 0; d < 2; ++d)
3170  {
3171  gradYZ[qz][qy][d] = 0.0;
3172  }
3173  }
3174  }
3175 
3176  for (int dy = 0; dy < D1Dy; ++dy)
3177  {
3178  double massZ[MAX_Q1D];
3179  for (int qz = 0; qz < Q1D; ++qz)
3180  {
3181  massZ[qz] = 0.0;
3182  }
3183 
3184  for (int dz = 0; dz < D1Dz; ++dz)
3185  {
3186  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3187  for (int qz = 0; qz < Q1D; ++qz)
3188  {
3189  massZ[qz] += t * Bo(qz,dz);
3190  }
3191  }
3192 
3193  for (int qy = 0; qy < Q1D; ++qy)
3194  {
3195  const double wy = Bc(qy,dy);
3196  const double wDy = Gc(qy,dy);
3197  for (int qz = 0; qz < Q1D; ++qz)
3198  {
3199  const double wz = massZ[qz];
3200  gradYZ[qz][qy][0] += wz * wy;
3201  gradYZ[qz][qy][1] += wz * wDy;
3202  }
3203  }
3204  }
3205 
3206  for (int qx = 0; qx < Q1D; ++qx)
3207  {
3208  const double wx = Bc(qx,dx);
3209  const double wDx = Gc(qx,dx);
3210 
3211  for (int qy = 0; qy < Q1D; ++qy)
3212  {
3213  for (int qz = 0; qz < Q1D; ++qz)
3214  {
3215  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3216  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3217  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3218  }
3219  }
3220  }
3221  }
3222  }
3223 
3224  // Apply D operator.
3225  for (int qz = 0; qz < Q1D; ++qz)
3226  {
3227  for (int qy = 0; qy < Q1D; ++qy)
3228  {
3229  for (int qx = 0; qx < Q1D; ++qx)
3230  {
3231  for (int c = 0; c < VDIM; ++c)
3232  {
3233  curl[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
3234  }
3235  }
3236  }
3237  }
3238 
3239  for (int qz = 0; qz < Q1D; ++qz)
3240  {
3241  double massXY[MAX_D1D][MAX_D1D];
3242 
3243  osc = 0;
3244 
3245  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3246  {
3247  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3248  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3249  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3250 
3251  for (int dy = 0; dy < D1Dy; ++dy)
3252  {
3253  for (int dx = 0; dx < D1Dx; ++dx)
3254  {
3255  massXY[dy][dx] = 0.0;
3256  }
3257  }
3258  for (int qy = 0; qy < Q1D; ++qy)
3259  {
3260  double massX[MAX_D1D];
3261  for (int dx = 0; dx < D1Dx; ++dx)
3262  {
3263  massX[dx] = 0.0;
3264  }
3265  for (int qx = 0; qx < Q1D; ++qx)
3266  {
3267  for (int dx = 0; dx < D1Dx; ++dx)
3268  {
3269  massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3270  }
3271  }
3272  for (int dy = 0; dy < D1Dy; ++dy)
3273  {
3274  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3275  for (int dx = 0; dx < D1Dx; ++dx)
3276  {
3277  massXY[dy][dx] += massX[dx] * wy;
3278  }
3279  }
3280  }
3281 
3282  for (int dz = 0; dz < D1Dz; ++dz)
3283  {
3284  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
3285  for (int dy = 0; dy < D1Dy; ++dy)
3286  {
3287  for (int dx = 0; dx < D1Dx; ++dx)
3288  {
3289  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
3290  }
3291  }
3292  }
3293 
3294  osc += D1Dx * D1Dy * D1Dz;
3295  } // loop c
3296  } // loop qz
3297  }); // end of element loop
3298 }
3299 
3300 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3301 // integrated against H(curl) test functions corresponding to y.
3302 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3303 static void SmemPAHcurlL2Apply3D(const int D1D,
3304  const int Q1D,
3305  const int coeffDim,
3306  const int NE,
3307  const Array<double> &bo,
3308  const Array<double> &bc,
3309  const Array<double> &gc,
3310  const Vector &pa_data,
3311  const Vector &x,
3312  Vector &y)
3313 {
3314  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3315  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3316 
3317  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3318  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3319  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3320  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3321  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3322  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3323 
3324  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
3325  {
3326  constexpr int VDIM = 3;
3327  constexpr int maxCoeffDim = 3;
3328 
3329  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
3330  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
3331  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
3332 
3333  double opc[maxCoeffDim];
3334  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
3335  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
3336 
3337  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
3338 
3339  MFEM_FOREACH_THREAD(qx,x,Q1D)
3340  {
3341  MFEM_FOREACH_THREAD(qy,y,Q1D)
3342  {
3343  MFEM_FOREACH_THREAD(qz,z,Q1D)
3344  {
3345  for (int i=0; i<coeffDim; ++i)
3346  {
3347  opc[i] = op(i,qx,qy,qz,e);
3348  }
3349  }
3350  }
3351  }
3352 
3353  const int tidx = MFEM_THREAD_ID(x);
3354  const int tidy = MFEM_THREAD_ID(y);
3355  const int tidz = MFEM_THREAD_ID(z);
3356 
3357  if (tidz == 0)
3358  {
3359  MFEM_FOREACH_THREAD(d,y,D1D)
3360  {
3361  MFEM_FOREACH_THREAD(q,x,Q1D)
3362  {
3363  sBc[d][q] = Bc(q,d);
3364  sGc[d][q] = Gc(q,d);
3365  if (d < D1D-1)
3366  {
3367  sBo[d][q] = Bo(q,d);
3368  }
3369  }
3370  }
3371  }
3372  MFEM_SYNC_THREAD;
3373 
3374  for (int qz=0; qz < Q1D; ++qz)
3375  {
3376  if (tidz == qz)
3377  {
3378  MFEM_FOREACH_THREAD(qy,y,Q1D)
3379  {
3380  MFEM_FOREACH_THREAD(qx,x,Q1D)
3381  {
3382  for (int i=0; i<3; ++i)
3383  {
3384  curl[qy][qx][i] = 0.0;
3385  }
3386  }
3387  }
3388  }
3389 
3390  int osc = 0;
3391  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3392  {
3393  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3394  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3395  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3396 
3397  MFEM_FOREACH_THREAD(dz,z,D1Dz)
3398  {
3399  MFEM_FOREACH_THREAD(dy,y,D1Dy)
3400  {
3401  MFEM_FOREACH_THREAD(dx,x,D1Dx)
3402  {
3403  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3404  }
3405  }
3406  }
3407  MFEM_SYNC_THREAD;
3408 
3409  if (tidz == qz)
3410  {
3411  if (c == 0)
3412  {
3413  for (int i=0; i<coeffDim; ++i)
3414  {
3415  sop[i][tidx][tidy] = opc[i];
3416  }
3417  }
3418 
3419  MFEM_FOREACH_THREAD(qy,y,Q1D)
3420  {
3421  MFEM_FOREACH_THREAD(qx,x,Q1D)
3422  {
3423  double u = 0.0;
3424  double v = 0.0;
3425 
3426  // We treat x, y, z components separately for optimization specific to each.
3427  if (c == 0) // x component
3428  {
3429  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3430 
3431  for (int dz = 0; dz < D1Dz; ++dz)
3432  {
3433  const double wz = sBc[dz][qz];
3434  const double wDz = sGc[dz][qz];
3435 
3436  for (int dy = 0; dy < D1Dy; ++dy)
3437  {
3438  const double wy = sBc[dy][qy];
3439  const double wDy = sGc[dy][qy];
3440 
3441  for (int dx = 0; dx < D1Dx; ++dx)
3442  {
3443  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
3444  u += wx * wDy * wz;
3445  v += wx * wy * wDz;
3446  }
3447  }
3448  }
3449 
3450  curl[qy][qx][1] += v; // (u_0)_{x_2}
3451  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
3452  }
3453  else if (c == 1) // y component
3454  {
3455  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3456 
3457  for (int dz = 0; dz < D1Dz; ++dz)
3458  {
3459  const double wz = sBc[dz][qz];
3460  const double wDz = sGc[dz][qz];
3461 
3462  for (int dy = 0; dy < D1Dy; ++dy)
3463  {
3464  const double wy = sBo[dy][qy];
3465 
3466  for (int dx = 0; dx < D1Dx; ++dx)
3467  {
3468  const double t = sX[dz][dy][dx];
3469  const double wx = t * sBc[dx][qx];
3470  const double wDx = t * sGc[dx][qx];
3471 
3472  u += wDx * wy * wz;
3473  v += wx * wy * wDz;
3474  }
3475  }
3476  }
3477 
3478  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
3479  curl[qy][qx][2] += u; // (u_1)_{x_0}
3480  }
3481  else // z component
3482  {
3483  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3484 
3485  for (int dz = 0; dz < D1Dz; ++dz)
3486  {
3487  const double wz = sBo[dz][qz];
3488 
3489  for (int dy = 0; dy < D1Dy; ++dy)
3490  {
3491  const double wy = sBc[dy][qy];
3492  const double wDy = sGc[dy][qy];
3493 
3494  for (int dx = 0; dx < D1Dx; ++dx)
3495  {
3496  const double t = sX[dz][dy][dx];
3497  const double wx = t * sBc[dx][qx];
3498  const double wDx = t * sGc[dx][qx];
3499 
3500  u += wDx * wy * wz;
3501  v += wx * wDy * wz;
3502  }
3503  }
3504  }
3505 
3506  curl[qy][qx][0] += v; // (u_2)_{x_1}
3507  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
3508  }
3509  } // qx
3510  } // qy
3511  } // tidz == qz
3512 
3513  osc += D1Dx * D1Dy * D1Dz;
3514  MFEM_SYNC_THREAD;
3515  } // c
3516 
3517  double dxyz1 = 0.0;
3518  double dxyz2 = 0.0;
3519  double dxyz3 = 0.0;
3520 
3521  MFEM_FOREACH_THREAD(dz,z,D1D)
3522  {
3523  const double wcz = sBc[dz][qz];
3524  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
3525 
3526  MFEM_FOREACH_THREAD(dy,y,D1D)
3527  {
3528  MFEM_FOREACH_THREAD(dx,x,D1D)
3529  {
3530  for (int qy = 0; qy < Q1D; ++qy)
3531  {
3532  const double wcy = sBc[dy][qy];
3533  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
3534 
3535  for (int qx = 0; qx < Q1D; ++qx)
3536  {
3537  const double O1 = sop[0][qx][qy];
3538  const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
3539  const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
3540 
3541  const double c1 = O1 * curl[qy][qx][0];
3542  const double c2 = O2 * curl[qy][qx][1];
3543  const double c3 = O3 * curl[qy][qx][2];
3544 
3545  const double wcx = sBc[dx][qx];
3546 
3547  if (dx < D1D-1)
3548  {
3549  const double wx = sBo[dx][qx];
3550  dxyz1 += c1 * wx * wcy * wcz;
3551  }
3552 
3553  dxyz2 += c2 * wcx * wy * wcz;
3554  dxyz3 += c3 * wcx * wcy * wz;
3555  } // qx
3556  } // qy
3557  } // dx
3558  } // dy
3559  } // dz
3560 
3561  MFEM_SYNC_THREAD;
3562 
3563  MFEM_FOREACH_THREAD(dz,z,D1D)
3564  {
3565  MFEM_FOREACH_THREAD(dy,y,D1D)
3566  {
3567  MFEM_FOREACH_THREAD(dx,x,D1D)
3568  {
3569  if (dx < D1D-1)
3570  {
3571  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
3572  }
3573  if (dy < D1D-1)
3574  {
3575  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
3576  }
3577  if (dz < D1D-1)
3578  {
3579  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
3580  }
3581  }
3582  }
3583  }
3584  } // qz
3585  }); // end of element loop
3586 }
3587 
3588 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3589 // integrated against H(div) test functions corresponding to y.
3590 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3591 static void PAHcurlHdivApply3D(const int D1D,
3592  const int D1Dtest,
3593  const int Q1D,
3594  const int NE,
3595  const Array<double> &bo,
3596  const Array<double> &bc,
3597  const Array<double> &bot,
3598  const Array<double> &bct,
3599  const Array<double> &gc,
3600  const Vector &pa_data,
3601  const Vector &x,
3602  Vector &y)
3603 {
3604  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3605  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3606  // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
3607  // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
3608  // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
3609  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3610  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3611  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3612 
3613  constexpr static int VDIM = 3;
3614 
3615  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3616  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3617  auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
3618  auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
3619  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3620  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
3621  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3622  auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
3623 
3624  MFEM_FORALL(e, NE,
3625  {
3626  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3627  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3628 
3629  for (int qz = 0; qz < Q1D; ++qz)
3630  {
3631  for (int qy = 0; qy < Q1D; ++qy)
3632  {
3633  for (int qx = 0; qx < Q1D; ++qx)
3634  {
3635  for (int c = 0; c < VDIM; ++c)
3636  {
3637  curl[qz][qy][qx][c] = 0.0;
3638  }
3639  }
3640  }
3641  }
3642 
3643  // We treat x, y, z components separately for optimization specific to each.
3644 
3645  int osc = 0;
3646 
3647  {
3648  // x component
3649  const int D1Dz = D1D;
3650  const int D1Dy = D1D;
3651  const int D1Dx = D1D - 1;
3652 
3653  for (int dz = 0; dz < D1Dz; ++dz)
3654  {
3655  double gradXY[MAX_Q1D][MAX_Q1D][2];
3656  for (int qy = 0; qy < Q1D; ++qy)
3657  {
3658  for (int qx = 0; qx < Q1D; ++qx)
3659  {
3660  for (int d = 0; d < 2; ++d)
3661  {
3662  gradXY[qy][qx][d] = 0.0;
3663  }
3664  }
3665  }
3666 
3667  for (int dy = 0; dy < D1Dy; ++dy)
3668  {
3669  double massX[MAX_Q1D];
3670  for (int qx = 0; qx < Q1D; ++qx)
3671  {
3672  massX[qx] = 0.0;
3673  }
3674 
3675  for (int dx = 0; dx < D1Dx; ++dx)
3676  {
3677  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3678  for (int qx = 0; qx < Q1D; ++qx)
3679  {
3680  massX[qx] += t * Bo(qx,dx);
3681  }
3682  }
3683 
3684  for (int qy = 0; qy < Q1D; ++qy)
3685  {
3686  const double wy = Bc(qy,dy);
3687  const double wDy = Gc(qy,dy);
3688  for (int qx = 0; qx < Q1D; ++qx)
3689  {
3690  const double wx = massX[qx];
3691  gradXY[qy][qx][0] += wx * wDy;
3692  gradXY[qy][qx][1] += wx * wy;
3693  }
3694  }
3695  }
3696 
3697  for (int qz = 0; qz < Q1D; ++qz)
3698  {
3699  const double wz = Bc(qz,dz);
3700  const double wDz = Gc(qz,dz);
3701  for (int qy = 0; qy < Q1D; ++qy)
3702  {
3703  for (int qx = 0; qx < Q1D; ++qx)
3704  {
3705  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3706  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3707  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3708  }
3709  }
3710  }
3711  }
3712 
3713  osc += D1Dx * D1Dy * D1Dz;
3714  }
3715 
3716  {
3717  // y component
3718  const int D1Dz = D1D;
3719  const int D1Dy = D1D - 1;
3720  const int D1Dx = D1D;
3721 
3722  for (int dz = 0; dz < D1Dz; ++dz)
3723  {
3724  double gradXY[MAX_Q1D][MAX_Q1D][2];
3725  for (int qy = 0; qy < Q1D; ++qy)
3726  {
3727  for (int qx = 0; qx < Q1D; ++qx)
3728  {
3729  for (int d = 0; d < 2; ++d)
3730  {
3731  gradXY[qy][qx][d] = 0.0;
3732  }
3733  }
3734  }
3735 
3736  for (int dx = 0; dx < D1Dx; ++dx)
3737  {
3738  double massY[MAX_Q1D];
3739  for (int qy = 0; qy < Q1D; ++qy)
3740  {
3741  massY[qy] = 0.0;
3742  }
3743 
3744  for (int dy = 0; dy < D1Dy; ++dy)
3745  {
3746  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3747  for (int qy = 0; qy < Q1D; ++qy)
3748  {
3749  massY[qy] += t * Bo(qy,dy);
3750  }
3751  }
3752 
3753  for (int qx = 0; qx < Q1D; ++qx)
3754  {
3755  const double wx = Bc(qx,dx);
3756  const double wDx = Gc(qx,dx);
3757  for (int qy = 0; qy < Q1D; ++qy)
3758  {
3759  const double wy = massY[qy];
3760  gradXY[qy][qx][0] += wDx * wy;
3761  gradXY[qy][qx][1] += wx * wy;
3762  }
3763  }
3764  }
3765 
3766  for (int qz = 0; qz < Q1D; ++qz)
3767  {
3768  const double wz = Bc(qz,dz);
3769  const double wDz = Gc(qz,dz);
3770  for (int qy = 0; qy < Q1D; ++qy)
3771  {
3772  for (int qx = 0; qx < Q1D; ++qx)
3773  {
3774  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3775  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3776  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3777  }
3778  }
3779  }
3780  }
3781 
3782  osc += D1Dx * D1Dy * D1Dz;
3783  }
3784 
3785  {
3786  // z component
3787  const int D1Dz = D1D - 1;
3788  const int D1Dy = D1D;
3789  const int D1Dx = D1D;
3790 
3791  for (int dx = 0; dx < D1Dx; ++dx)
3792  {
3793  double gradYZ[MAX_Q1D][MAX_Q1D][2];
3794  for (int qz = 0; qz < Q1D; ++qz)
3795  {
3796  for (int qy = 0; qy < Q1D; ++qy)
3797  {
3798  for (int d = 0; d < 2; ++d)
3799  {
3800  gradYZ[qz][qy][d] = 0.0;
3801  }
3802  }
3803  }
3804 
3805  for (int dy = 0; dy < D1Dy; ++dy)
3806  {
3807  double massZ[MAX_Q1D];
3808  for (int qz = 0; qz < Q1D; ++qz)
3809  {
3810  massZ[qz] = 0.0;
3811  }
3812 
3813  for (int dz = 0; dz < D1Dz; ++dz)
3814  {
3815  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3816  for (int qz = 0; qz < Q1D; ++qz)
3817  {
3818  massZ[qz] += t * Bo(qz,dz);
3819  }
3820  }
3821 
3822  for (int qy = 0; qy < Q1D; ++qy)
3823  {
3824  const double wy = Bc(qy,dy);
3825  const double wDy = Gc(qy,dy);
3826  for (int qz = 0; qz < Q1D; ++qz)
3827  {
3828  const double wz = massZ[qz];
3829  gradYZ[qz][qy][0] += wz * wy;
3830  gradYZ[qz][qy][1] += wz * wDy;
3831  }
3832  }
3833  }
3834 
3835  for (int qx = 0; qx < Q1D; ++qx)
3836  {
3837  const double wx = Bc(qx,dx);
3838  const double wDx = Gc(qx,dx);
3839 
3840  for (int qy = 0; qy < Q1D; ++qy)
3841  {
3842  for (int qz = 0; qz < Q1D; ++qz)
3843  {
3844  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3845  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3846  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3847  }
3848  }
3849  }
3850  }
3851  }
3852 
3853  // Apply D operator.
3854  for (int qz = 0; qz < Q1D; ++qz)
3855  {
3856  for (int qy = 0; qy < Q1D; ++qy)
3857  {
3858  for (int qx = 0; qx < Q1D; ++qx)
3859  {
3860  const double O11 = op(qx,qy,qz,0,e);
3861  const double O12 = op(qx,qy,qz,1,e);
3862  const double O13 = op(qx,qy,qz,2,e);
3863  const double O22 = op(qx,qy,qz,3,e);
3864  const double O23 = op(qx,qy,qz,4,e);
3865  const double O33 = op(qx,qy,qz,5,e);
3866 
3867  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
3868  (O13 * curl[qz][qy][qx][2]);
3869  const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
3870  (O23 * curl[qz][qy][qx][2]);
3871  const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
3872  (O33 * curl[qz][qy][qx][2]);
3873 
3874  curl[qz][qy][qx][0] = c1;
3875  curl[qz][qy][qx][1] = c2;
3876  curl[qz][qy][qx][2] = c3;
3877  }
3878  }
3879  }
3880 
3881  for (int qz = 0; qz < Q1D; ++qz)
3882  {
3883  double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
3884 
3885  osc = 0;
3886 
3887  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3888  {
3889  const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
3890  const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
3891  const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
3892 
3893  for (int dy = 0; dy < D1Dy; ++dy)
3894  {
3895  for (int dx = 0; dx < D1Dx; ++dx)
3896  {
3897  massXY[dy][dx] = 0.0;
3898  }
3899  }
3900  for (int qy = 0; qy < Q1D; ++qy)
3901  {
3902  double massX[HCURL_MAX_D1D];
3903  for (int dx = 0; dx < D1Dx; ++dx)
3904  {
3905  massX[dx] = 0.0;
3906  }
3907  for (int qx = 0; qx < Q1D; ++qx)
3908  {
3909  for (int dx = 0; dx < D1Dx; ++dx)
3910  {
3911  massX[dx] += curl[qz][qy][qx][c] *
3912  ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
3913  }
3914  }
3915  for (int dy = 0; dy < D1Dy; ++dy)
3916  {
3917  const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
3918  for (int dx = 0; dx < D1Dx; ++dx)
3919  {
3920  massXY[dy][dx] += massX[dx] * wy;
3921  }
3922  }
3923  }
3924 
3925  for (int dz = 0; dz < D1Dz; ++dz)
3926  {
3927  const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
3928  for (int dy = 0; dy < D1Dy; ++dy)
3929  {
3930  for (int dx = 0; dx < D1Dx; ++dx)
3931  {
3932  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
3933  massXY[dy][dx] * wz;
3934  }
3935  }
3936  }
3937 
3938  osc += D1Dx * D1Dy * D1Dz;
3939  } // loop c
3940  } // loop qz
3941  }); // end of element loop
3942 }
3943 
3944 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3945 {
3946  if (testType == mfem::FiniteElement::CURL &&
3947  trialType == mfem::FiniteElement::CURL && dim == 3)
3948  {
3949  if (Device::Allows(Backend::DEVICE_MASK))
3950  {
3951  const int ID = (dofs1D << 4) | quad1D;
3952  switch (ID)
3953  {
3954  case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, coeffDim, ne,
3955  mapsO->B, mapsC->B,
3956  mapsC->G, pa_data, x, y);
3957  case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, coeffDim, ne,
3958  mapsO->B, mapsC->B,
3959  mapsC->G, pa_data, x, y);
3960  case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, coeffDim, ne,
3961  mapsO->B, mapsC->B,
3962  mapsC->G, pa_data, x, y);
3963  case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, coeffDim, ne,
3964  mapsO->B, mapsC->B,
3965  mapsC->G, pa_data, x, y);
3966  default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B,
3967  mapsC->B,
3968  mapsC->G, pa_data, x, y);
3969  }
3970  }
3971  else
3972  PAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
3973  mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
3974  }
3975  else if (testType == mfem::FiniteElement::DIV &&
3976  trialType == mfem::FiniteElement::CURL && dim == 3)
3977  PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
3978  mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
3979  pa_data, x, y);
3980  else
3981  {
3982  MFEM_ABORT("Unsupported dimension or space!");
3983  }
3984 }
3985 
3986 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
3987  &trial_fes,
3988  const FiniteElementSpace &test_fes)
3989 {
3990  // Assumes tensor-product elements, with vector test and trial spaces.
3991  Mesh *mesh = trial_fes.GetMesh();
3992  const FiniteElement *trial_fel = trial_fes.GetFE(0);
3993  const FiniteElement *test_fel = test_fes.GetFE(0);
3994 
3995  const VectorTensorFiniteElement *trial_el =
3996  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3997  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
3998 
3999  const VectorTensorFiniteElement *test_el =
4000  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
4001  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
4002 
4003  const IntegrationRule *ir
4004  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
4005  *mesh->GetElementTransformation(0));
4006  const int dims = trial_el->GetDim();
4007  MFEM_VERIFY(dims == 3, "");
4008 
4009  const int nq = ir->GetNPoints();
4010  dim = mesh->Dimension();
4011  MFEM_VERIFY(dim == 3, "");
4012 
4013  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
4014 
4015  ne = trial_fes.GetNE();
4016  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
4017  mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
4018  mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
4019  dofs1D = mapsC->ndof;
4020  quad1D = mapsC->nqpt;
4021 
4022  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
4023 
4024  coeffDim = DQ ? 3 : 1;
4025 
4026  pa_data.SetSize(coeffDim * nq * ne, Device::GetMemoryType());
4027 
4028  Vector coeff(coeffDim * nq * ne);
4029  coeff = 1.0;
4030  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
4031  if (Q || DQ)
4032  {
4033  Vector V(coeffDim);
4034  if (DQ)
4035  {
4036  MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
4037  }
4038 
4039  for (int e=0; e<ne; ++e)
4040  {
4041  ElementTransformation *tr = mesh->GetElementTransformation(e);
4042 
4043  for (int p=0; p<nq; ++p)
4044  {
4045  if (DQ)
4046  {
4047  DQ->Eval(V, *tr, ir->IntPoint(p));
4048  for (int i=0; i<coeffDim; ++i)
4049  {
4050  coeffh(i, p, e) = V[i];
4051  }
4052  }
4053  else
4054  {
4055  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
4056  }
4057  }
4058  }
4059  }
4060 
4061  testType = test_el->GetDerivType();
4062  trialType = trial_el->GetDerivType();
4063 
4064  if (trialType == mfem::FiniteElement::CURL && dim == 3)
4065  {
4066  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
4067  }
4068  else
4069  {
4070  MFEM_ABORT("Unknown kernel.");
4071  }
4072 }
4073 
4074 // Apply to x corresponding to DOF's in H(curl) (trial), integrated against curl
4075 // of H(curl) test functions corresponding to y.
4076 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4077 static void PAHcurlL2Apply3DTranspose(const int D1D,
4078  const int Q1D,
4079  const int coeffDim,
4080  const int NE,
4081  const Array<double> &bo,
4082  const Array<double> &bc,
4083  const Array<double> &bot,
4084  const Array<double> &bct,
4085  const Array<double> &gct,
4086  const Vector &pa_data,
4087  const Vector &x,
4088  Vector &y)
4089 {
4090  // See PAHcurlL2Apply3D for comments.
4091 
4092  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4093  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4094 
4095  constexpr static int VDIM = 3;
4096 
4097  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4098  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4099  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
4100  auto Bct = Reshape(bct.Read(), D1D, Q1D);
4101  auto Gct = Reshape(gct.Read(), D1D, Q1D);
4102  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4103  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4104  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4105 
4106  MFEM_FORALL(e, NE,
4107  {
4108  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4109 
4110  for (int qz = 0; qz < Q1D; ++qz)
4111  {
4112  for (int qy = 0; qy < Q1D; ++qy)
4113  {
4114  for (int qx = 0; qx < Q1D; ++qx)
4115  {
4116  for (int c = 0; c < VDIM; ++c)
4117  {
4118  mass[qz][qy][qx][c] = 0.0;
4119  }
4120  }
4121  }
4122  }
4123 
4124  int osc = 0;
4125 
4126  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4127  {
4128  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4129  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4130  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4131 
4132  for (int dz = 0; dz < D1Dz; ++dz)
4133  {
4134  double massXY[MAX_Q1D][MAX_Q1D];
4135  for (int qy = 0; qy < Q1D; ++qy)
4136  {
4137  for (int qx = 0; qx < Q1D; ++qx)
4138  {
4139  massXY[qy][qx] = 0.0;
4140  }
4141  }
4142 
4143  for (int dy = 0; dy < D1Dy; ++dy)
4144  {
4145  double massX[MAX_Q1D];
4146  for (int qx = 0; qx < Q1D; ++qx)
4147  {
4148  massX[qx] = 0.0;
4149  }
4150 
4151  for (int dx = 0; dx < D1Dx; ++dx)
4152  {
4153  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4154  for (int qx = 0; qx < Q1D; ++qx)
4155  {
4156  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
4157  }
4158  }
4159 
4160  for (int qy = 0; qy < Q1D; ++qy)
4161  {
4162  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
4163  for (int qx = 0; qx < Q1D; ++qx)
4164  {
4165  const double wx = massX[qx];
4166  massXY[qy][qx] += wx * wy;
4167  }
4168  }
4169  }
4170 
4171  for (int qz = 0; qz < Q1D; ++qz)
4172  {
4173  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
4174  for (int qy = 0; qy < Q1D; ++qy)
4175  {
4176  for (int qx = 0; qx < Q1D; ++qx)
4177  {
4178  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4179  }
4180  }
4181  }
4182  }
4183 
4184  osc += D1Dx * D1Dy * D1Dz;
4185  } // loop (c) over components
4186 
4187  // Apply D operator.
4188  for (int qz = 0; qz < Q1D; ++qz)
4189  {
4190  for (int qy = 0; qy < Q1D; ++qy)
4191  {
4192  for (int qx = 0; qx < Q1D; ++qx)
4193  {
4194  for (int c=0; c<VDIM; ++c)
4195  {
4196  mass[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
4197  }
4198  }
4199  }
4200  }
4201 
4202  // x component
4203  osc = 0;
4204  {
4205  const int D1Dz = D1D;
4206  const int D1Dy = D1D;
4207  const int D1Dx = D1D - 1;
4208 
4209  for (int qz = 0; qz < Q1D; ++qz)
4210  {
4211  double gradXY12[MAX_D1D][MAX_D1D];
4212  double gradXY21[MAX_D1D][MAX_D1D];
4213 
4214  for (int dy = 0; dy < D1Dy; ++dy)
4215  {
4216  for (int dx = 0; dx < D1Dx; ++dx)
4217  {
4218  gradXY12[dy][dx] = 0.0;
4219  gradXY21[dy][dx] = 0.0;
4220  }
4221  }
4222  for (int qy = 0; qy < Q1D; ++qy)
4223  {
4224  double massX[MAX_D1D][2];
4225  for (int dx = 0; dx < D1Dx; ++dx)
4226  {
4227  for (int n = 0; n < 2; ++n)
4228  {
4229  massX[dx][n] = 0.0;
4230  }
4231  }
4232  for (int qx = 0; qx < Q1D; ++qx)
4233  {
4234  for (int dx = 0; dx < D1Dx; ++dx)
4235  {
4236  const double wx = Bot(dx,qx);
4237 
4238  massX[dx][0] += wx * mass[qz][qy][qx][1];
4239  massX[dx][1] += wx * mass[qz][qy][qx][2];
4240  }
4241  }
4242  for (int dy = 0; dy < D1Dy; ++dy)
4243  {
4244  const double wy = Bct(dy,qy);
4245  const double wDy = Gct(dy,qy);
4246 
4247  for (int dx = 0; dx < D1Dx; ++dx)
4248  {
4249  gradXY21[dy][dx] += massX[dx][0] * wy;
4250  gradXY12[dy][dx] += massX[dx][1] * wDy;
4251  }
4252  }
4253  }
4254 
4255  for (int dz = 0; dz < D1Dz; ++dz)
4256  {
4257  const double wz = Bct(dz,qz);
4258  const double wDz = Gct(dz,qz);
4259  for (int dy = 0; dy < D1Dy; ++dy)
4260  {
4261  for (int dx = 0; dx < D1Dx; ++dx)
4262  {
4263  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4264  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
4265  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4266  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
4267  }
4268  }
4269  }
4270  } // loop qz
4271 
4272  osc += D1Dx * D1Dy * D1Dz;
4273  }
4274 
4275  // y component
4276  {
4277  const int D1Dz = D1D;
4278  const int D1Dy = D1D - 1;
4279  const int D1Dx = D1D;
4280 
4281  for (int qz = 0; qz < Q1D; ++qz)
4282  {
4283  double gradXY02[MAX_D1D][MAX_D1D];
4284  double gradXY20[MAX_D1D][MAX_D1D];
4285 
4286  for (int dy = 0; dy < D1Dy; ++dy)
4287  {
4288  for (int dx = 0; dx < D1Dx; ++dx)
4289  {
4290  gradXY02[dy][dx] = 0.0;
4291  gradXY20[dy][dx] = 0.0;
4292  }
4293  }
4294  for (int qx = 0; qx < Q1D; ++qx)
4295  {
4296  double massY[MAX_D1D][2];
4297  for (int dy = 0; dy < D1Dy; ++dy)
4298  {
4299  massY[dy][0] = 0.0;
4300  massY[dy][1] = 0.0;
4301  }
4302  for (int qy = 0; qy < Q1D; ++qy)
4303  {
4304  for (int dy = 0; dy < D1Dy; ++dy)
4305  {
4306  const double wy = Bot(dy,qy);
4307 
4308  massY[dy][0] += wy * mass[qz][qy][qx][2];
4309  massY[dy][1] += wy * mass[qz][qy][qx][0];
4310  }
4311  }
4312  for (int dx = 0; dx < D1Dx; ++dx)
4313  {
4314  const double wx = Bct(dx,qx);
4315  const double wDx = Gct(dx,qx);
4316 
4317  for (int dy = 0; dy < D1Dy; ++dy)
4318  {
4319  gradXY02[dy][dx] += massY[dy][0] * wDx;
4320  gradXY20[dy][dx] += massY[dy][1] * wx;
4321  }
4322  }
4323  }
4324 
4325  for (int dz = 0; dz < D1Dz; ++dz)
4326  {
4327  const double wz = Bct(dz,qz);
4328  const double wDz = Gct(dz,qz);
4329  for (int dy = 0; dy < D1Dy; ++dy)
4330  {
4331  for (int dx = 0; dx < D1Dx; ++dx)
4332  {
4333  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4334  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
4335  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4336  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
4337  }
4338  }
4339  }
4340  } // loop qz
4341 
4342  osc += D1Dx * D1Dy * D1Dz;
4343  }
4344 
4345  // z component
4346  {
4347  const int D1Dz = D1D - 1;
4348  const int D1Dy = D1D;
4349  const int D1Dx = D1D;
4350 
4351  for (int qx = 0; qx < Q1D; ++qx)
4352  {
4353  double gradYZ01[MAX_D1D][MAX_D1D];
4354  double gradYZ10[MAX_D1D][MAX_D1D];
4355 
4356  for (int dy = 0; dy < D1Dy; ++dy)
4357  {
4358  for (int dz = 0; dz < D1Dz; ++dz)
4359  {
4360  gradYZ01[dz][dy] = 0.0;
4361  gradYZ10[dz][dy] = 0.0;
4362  }
4363  }
4364  for (int qy = 0; qy < Q1D; ++qy)
4365  {
4366  double massZ[MAX_D1D][2];
4367  for (int dz = 0; dz < D1Dz; ++dz)
4368  {
4369  for (int n = 0; n < 2; ++n)
4370  {
4371  massZ[dz][n] = 0.0;
4372  }
4373  }
4374  for (int qz = 0; qz < Q1D; ++qz)
4375  {
4376  for (int dz = 0; dz < D1Dz; ++dz)
4377  {
4378  const double wz = Bot(dz,qz);
4379 
4380  massZ[dz][0] += wz * mass[qz][qy][qx][0];
4381  massZ[dz][1] += wz * mass[qz][qy][qx][1];
4382  }
4383  }
4384  for (int dy = 0; dy < D1Dy; ++dy)
4385  {
4386  const double wy = Bct(dy,qy);
4387  const double wDy = Gct(dy,qy);
4388 
4389  for (int dz = 0; dz < D1Dz; ++dz)
4390  {
4391  gradYZ01[dz][dy] += wy * massZ[dz][1];
4392  gradYZ10[dz][dy] += wDy * massZ[dz][0];
4393  }
4394  }
4395  }
4396 
4397  for (int dx = 0; dx < D1Dx; ++dx)
4398  {
4399  const double wx = Bct(dx,qx);
4400  const double wDx = Gct(dx,qx);
4401 
4402  for (int dy = 0; dy < D1Dy; ++dy)
4403  {
4404  for (int dz = 0; dz < D1Dz; ++dz)
4405  {
4406  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4407  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
4408  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4409  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
4410  }
4411  }
4412  }
4413  } // loop qx
4414  }
4415  });
4416 }
4417 
4418 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4419 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
4420  const int Q1D,
4421  const int coeffDim,
4422  const int NE,
4423  const Array<double> &bo,
4424  const Array<double> &bc,
4425  const Array<double> &gc,
4426  const Vector &pa_data,
4427  const Vector &x,
4428  Vector &y)
4429 {
4430  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4431  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4432 
4433  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4434  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4435  auto Gc = Reshape(gc.Read(), Q1D, D1D);
4436  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4437  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4438  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4439 
4440  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
4441  {
4442  constexpr int VDIM = 3;
4443  constexpr int maxCoeffDim = 3;
4444 
4445  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
4446  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
4447  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
4448 
4449  double opc[maxCoeffDim];
4450  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
4451  MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
4452 
4453  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
4454 
4455  MFEM_FOREACH_THREAD(qx,x,Q1D)
4456  {
4457  MFEM_FOREACH_THREAD(qy,y,Q1D)
4458  {
4459  MFEM_FOREACH_THREAD(qz,z,Q1D)
4460  {
4461  for (int i=0; i<coeffDim; ++i)
4462  {
4463  opc[i] = op(i,qx,qy,qz,e);
4464  }
4465  }
4466  }
4467  }
4468 
4469  const int tidx = MFEM_THREAD_ID(x);
4470  const int tidy = MFEM_THREAD_ID(y);
4471  const int tidz = MFEM_THREAD_ID(z);
4472 
4473  if (tidz == 0)
4474  {
4475  MFEM_FOREACH_THREAD(d,y,D1D)
4476  {
4477  MFEM_FOREACH_THREAD(q,x,Q1D)
4478  {
4479  sBc[d][q] = Bc(q,d);
4480  sGc[d][q] = Gc(q,d);
4481  if (d < D1D-1)
4482  {
4483  sBo[d][q] = Bo(q,d);
4484  }
4485  }
4486  }
4487  }
4488  MFEM_SYNC_THREAD;
4489 
4490  for (int qz=0; qz < Q1D; ++qz)
4491  {
4492  if (tidz == qz)
4493  {
4494  MFEM_FOREACH_THREAD(qy,y,Q1D)
4495  {
4496  MFEM_FOREACH_THREAD(qx,x,Q1D)
4497  {
4498  for (int i=0; i<3; ++i)
4499  {
4500  mass[qy][qx][i] = 0.0;
4501  }
4502  }
4503  }
4504  }
4505 
4506  int osc = 0;
4507  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4508  {
4509  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4510  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4511  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4512 
4513  MFEM_FOREACH_THREAD(dz,z,D1Dz)
4514  {
4515  MFEM_FOREACH_THREAD(dy,y,D1Dy)
4516  {
4517  MFEM_FOREACH_THREAD(dx,x,D1Dx)
4518  {
4519  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4520  }
4521  }
4522  }
4523  MFEM_SYNC_THREAD;
4524 
4525  if (tidz == qz)
4526  {
4527  if (c == 0)
4528  {
4529  for (int i=0; i<coeffDim; ++i)
4530  {
4531  sop[i][tidx][tidy] = opc[i];
4532  }
4533  }
4534 
4535  MFEM_FOREACH_THREAD(qy,y,Q1D)
4536  {
4537  MFEM_FOREACH_THREAD(qx,x,Q1D)
4538  {
4539  double u = 0.0;
4540 
4541  for (int dz = 0; dz < D1Dz; ++dz)
4542  {
4543  const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
4544 
4545  for (int dy = 0; dy < D1Dy; ++dy)
4546  {
4547  const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
4548 
4549  for (int dx = 0; dx < D1Dx; ++dx)
4550  {
4551  const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
4552  u += wx * wy * wz;
4553  }
4554  }
4555  }
4556 
4557  mass[qy][qx][c] += u;
4558  } // qx
4559  } // qy
4560  } // tidz == qz
4561 
4562  osc += D1Dx * D1Dy * D1Dz;
4563  MFEM_SYNC_THREAD;
4564  } // c
4565 
4566  double dxyz1 = 0.0;
4567  double dxyz2 = 0.0;
4568  double dxyz3 = 0.0;
4569 
4570  MFEM_FOREACH_THREAD(dz,z,D1D)
4571  {
4572  const double wcz = sBc[dz][qz];
4573  const double wcDz = sGc[dz][qz];
4574  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4575 
4576  MFEM_FOREACH_THREAD(dy,y,D1D)
4577  {
4578  MFEM_FOREACH_THREAD(dx,x,D1D)
4579  {
4580  for (int qy = 0; qy < Q1D; ++qy)
4581  {
4582  const double wcy = sBc[dy][qy];
4583  const double wcDy = sGc[dy][qy];
4584  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4585 
4586  for (int qx = 0; qx < Q1D; ++qx)
4587  {
4588  const double O1 = sop[0][qx][qy];
4589  const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
4590  const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
4591 
4592  const double c1 = O1 * mass[qy][qx][0];
4593  const double c2 = O2 * mass[qy][qx][1];
4594  const double c3 = O3 * mass[qy][qx][2];
4595 
4596  const double wcx = sBc[dx][qx];
4597  const double wDx = sGc[dx][qx];
4598 
4599  if (dx < D1D-1)
4600  {
4601  const double wx = sBo[dx][qx];
4602  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
4603  }
4604 
4605  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
4606 
4607  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
4608  } // qx
4609  } // qy
4610  } // dx
4611  } // dy
4612  } // dz
4613 
4614  MFEM_SYNC_THREAD;
4615 
4616  MFEM_FOREACH_THREAD(dz,z,D1D)
4617  {
4618  MFEM_FOREACH_THREAD(dy,y,D1D)
4619  {
4620  MFEM_FOREACH_THREAD(dx,x,D1D)
4621  {
4622  if (dx < D1D-1)
4623  {
4624  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4625  }
4626  if (dy < D1D-1)
4627  {
4628  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4629  }
4630  if (dz < D1D-1)
4631  {
4632  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4633  }
4634  }
4635  }
4636  }
4637  } // qz
4638  }); // end of element loop
4639 }
4640 
4641 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4642 {
4643  if (testType == mfem::FiniteElement::CURL &&
4644  trialType == mfem::FiniteElement::CURL && dim == 3)
4645  {
4646  if (Device::Allows(Backend::DEVICE_MASK))
4647  {
4648  const int ID = (dofs1D << 4) | quad1D;
4649  switch (ID)
4650  {
4651  case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, coeffDim,
4652  ne, mapsO->B, mapsC->B,
4653  mapsC->G, pa_data, x, y);
4654  case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, coeffDim,
4655  ne, mapsO->B, mapsC->B,
4656  mapsC->G, pa_data, x, y);
4657  case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, coeffDim,
4658  ne, mapsO->B, mapsC->B,
4659  mapsC->G, pa_data, x, y);
4660  case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, coeffDim,
4661  ne, mapsO->B, mapsC->B,
4662  mapsC->G, pa_data, x, y);
4663  default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne,
4664  mapsO->B, mapsC->B,
4665  mapsC->G, pa_data, x, y);
4666  }
4667  }
4668  else
4669  PAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
4670  mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
4671  }
4672  else
4673  {
4674  MFEM_ABORT("Unsupported dimension or space!");
4675  }
4676 }
4677 
4678 template void SmemPAHcurlMassAssembleDiagonal3D<0,0>(const int D1D,
4679  const int Q1D,
4680  const int NE,
4681  const bool symmetric,
4682  const Array<double> &bo,
4683  const Array<double> &bc,
4684  const Vector &pa_data,
4685  Vector &diag);
4686 
4687 template void SmemPAHcurlMassAssembleDiagonal3D<2,3>(const int D1D,
4688  const int Q1D,
4689  const int NE,
4690  const bool symmetric,
4691  const Array<double> &bo,
4692  const Array<double> &bc,
4693  const Vector &pa_data,
4694  Vector &diag);
4695 
4696 template void SmemPAHcurlMassAssembleDiagonal3D<3,4>(const int D1D,
4697  const int Q1D,
4698  const int NE,
4699  const bool symmetric,
4700  const Array<double> &bo,
4701  const Array<double> &bc,
4702  const Vector &pa_data,
4703  Vector &diag);
4704 
4705 template void SmemPAHcurlMassAssembleDiagonal3D<4,5>(const int D1D,
4706  const int Q1D,
4707  const int NE,
4708  const bool symmetric,
4709  const Array<double> &bo,
4710  const Array<double> &bc,
4711  const Vector &pa_data,
4712  Vector &diag);
4713 
4714 template void SmemPAHcurlMassAssembleDiagonal3D<5,6>(const int D1D,
4715  const int Q1D,
4716  const int NE,
4717  const bool symmetric,
4718  const Array<double> &bo,
4719  const Array<double> &bc,
4720  const Vector &pa_data,
4721  Vector &diag);
4722 
4723 template void SmemPAHcurlMassApply3D<0,0>(const int D1D,
4724  const int Q1D,
4725  const int NE,
4726  const bool symmetric,
4727  const Array<double> &bo,
4728  const Array<double> &bc,
4729  const Array<double> &bot,
4730  const Array<double> &bct,
4731  const Vector &pa_data,
4732  const Vector &x,
4733  Vector &y);
4734 
4735 template void SmemPAHcurlMassApply3D<2,3>(const int D1D,
4736  const int Q1D,
4737  const int NE,
4738  const bool symmetric,
4739  const Array<double> &bo,
4740  const Array<double> &bc,
4741  const Array<double> &bot,
4742  const Array<double> &bct,
4743  const Vector &pa_data,
4744  const Vector &x,
4745  Vector &y);
4746 
4747 template void SmemPAHcurlMassApply3D<3,4>(const int D1D,
4748  const int Q1D,
4749  const int NE,
4750  const bool symmetric,
4751  const Array<double> &bo,
4752  const Array<double> &bc,
4753  const Array<double> &bot,
4754  const Array<double> &bct,
4755  const Vector &pa_data,
4756  const Vector &x,
4757  Vector &y);
4758 
4759 template void SmemPAHcurlMassApply3D<4,5>(const int D1D,
4760  const int Q1D,
4761  const int NE,
4762  const bool symmetric,
4763  const Array<double> &bo,
4764  const Array<double> &bc,
4765  const Array<double> &bot,
4766  const Array<double> &bct,
4767  const Vector &pa_data,
4768  const Vector &x,
4769  Vector &y);
4770 
4771 template void SmemPAHcurlMassApply3D<5,6>(const int D1D,
4772  const int Q1D,
4773  const int NE,
4774  const bool symmetric,
4775  const Array<double> &bo,
4776  const Array<double> &bc,
4777  const Array<double> &bot,
4778  const Array<double> &bct,
4779  const Vector &pa_data,
4780  const Vector &x,
4781  Vector &y);
4782 
4783 } // namespace mfem
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:134
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:388
void PAHcurlMassAssembleDiagonal2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Vector &pa_data, Vector &diag)
const int MAX_Q1D
Definition: forall.hpp:27
constexpr int HCURL_MAX_D1D
Definition: bilininteg.hpp:24
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:290
string space
void PAHcurlMassAssembleDiagonal3D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Vector &pa_data, Vector &diag)
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:372
const int MAX_D1D
Definition: forall.hpp:26
void PAHcurlMassApply2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Array< double > &bot, const Array< double > &bct, const Vector &pa_data, const Vector &x, Vector &y)
Vector data type.
Definition: vector.hpp:51
constexpr int HCURL_MAX_Q1D
Definition: bilininteg.hpp:25