MFEM  v4.5.2
Finite element discretization library
bilininteg_hcurl.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "qspace.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 void PAHcurlHdivSetup3D(const int Q1D,
23  const int coeffDim,
24  const int NE,
25  const bool transpose,
26  const Array<double> &w_,
27  const Vector &j,
28  Vector &coeff_,
29  Vector &op);
30 
31 void PAHcurlMassApply2D(const int D1D,
32  const int Q1D,
33  const int NE,
34  const bool symmetric,
35  const Array<double> &bo,
36  const Array<double> &bc,
37  const Array<double> &bot,
38  const Array<double> &bct,
39  const Vector &pa_data,
40  const Vector &x,
41  Vector &y)
42 {
43  constexpr static int VDIM = 2;
44  constexpr static int MAX_D1D = HCURL_MAX_D1D;
45  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
46 
47  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
48  auto Bc = Reshape(bc.Read(), Q1D, D1D);
49  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
50  auto Bct = Reshape(bct.Read(), D1D, Q1D);
51  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
52  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
53  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
54 
55  MFEM_FORALL(e, NE,
56  {
57  double mass[MAX_Q1D][MAX_Q1D][VDIM];
58 
59  for (int qy = 0; qy < Q1D; ++qy)
60  {
61  for (int qx = 0; qx < Q1D; ++qx)
62  {
63  for (int c = 0; c < VDIM; ++c)
64  {
65  mass[qy][qx][c] = 0.0;
66  }
67  }
68  }
69 
70  int osc = 0;
71 
72  for (int c = 0; c < VDIM; ++c) // loop over x, y components
73  {
74  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
75  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
76 
77  for (int dy = 0; dy < D1Dy; ++dy)
78  {
79  double massX[MAX_Q1D];
80  for (int qx = 0; qx < Q1D; ++qx)
81  {
82  massX[qx] = 0.0;
83  }
84 
85  for (int dx = 0; dx < D1Dx; ++dx)
86  {
87  const double t = X(dx + (dy * D1Dx) + osc, e);
88  for (int qx = 0; qx < Q1D; ++qx)
89  {
90  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
91  }
92  }
93 
94  for (int qy = 0; qy < Q1D; ++qy)
95  {
96  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
97  for (int qx = 0; qx < Q1D; ++qx)
98  {
99  mass[qy][qx][c] += massX[qx] * wy;
100  }
101  }
102  }
103 
104  osc += D1Dx * D1Dy;
105  } // loop (c) over components
106 
107  // Apply D operator.
108  for (int qy = 0; qy < Q1D; ++qy)
109  {
110  for (int qx = 0; qx < Q1D; ++qx)
111  {
112  const double O11 = op(qx,qy,0,e);
113  const double O21 = op(qx,qy,1,e);
114  const double O12 = symmetric ? O21 : op(qx,qy,2,e);
115  const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
116  const double massX = mass[qy][qx][0];
117  const double massY = mass[qy][qx][1];
118  mass[qy][qx][0] = (O11*massX)+(O12*massY);
119  mass[qy][qx][1] = (O21*massX)+(O22*massY);
120  }
121  }
122 
123  for (int qy = 0; qy < Q1D; ++qy)
124  {
125  osc = 0;
126 
127  for (int c = 0; c < VDIM; ++c) // loop over x, y components
128  {
129  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
130  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
131 
132  double massX[MAX_D1D];
133  for (int dx = 0; dx < D1Dx; ++dx)
134  {
135  massX[dx] = 0.0;
136  }
137  for (int qx = 0; qx < Q1D; ++qx)
138  {
139  for (int dx = 0; dx < D1Dx; ++dx)
140  {
141  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
142  }
143  }
144 
145  for (int dy = 0; dy < D1Dy; ++dy)
146  {
147  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
148 
149  for (int dx = 0; dx < D1Dx; ++dx)
150  {
151  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
152  }
153  }
154 
155  osc += D1Dx * D1Dy;
156  } // loop c
157  } // loop qy
158  }); // end of element loop
159 }
160 
161 void PAHcurlMassAssembleDiagonal2D(const int D1D,
162  const int Q1D,
163  const int NE,
164  const bool symmetric,
165  const Array<double> &bo,
166  const Array<double> &bc,
167  const Vector &pa_data,
168  Vector &diag)
169 {
170  constexpr static int VDIM = 2;
171  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
172 
173  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
174  auto Bc = Reshape(bc.Read(), Q1D, D1D);
175  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
176  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
177 
178  MFEM_FORALL(e, NE,
179  {
180  int osc = 0;
181 
182  for (int c = 0; c < VDIM; ++c) // loop over x, y components
183  {
184  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
185  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
186 
187  double mass[MAX_Q1D];
188 
189  for (int dy = 0; dy < D1Dy; ++dy)
190  {
191  for (int qx = 0; qx < Q1D; ++qx)
192  {
193  mass[qx] = 0.0;
194  for (int qy = 0; qy < Q1D; ++qy)
195  {
196  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
197 
198  mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
199  op(qx,qy,symmetric ? 2 : 3, e));
200  }
201  }
202 
203  for (int dx = 0; dx < D1Dx; ++dx)
204  {
205  for (int qx = 0; qx < Q1D; ++qx)
206  {
207  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
208  D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
209  }
210  }
211  }
212 
213  osc += D1Dx * D1Dy;
214  } // loop c
215  }); // end of element loop
216 }
217 
218 void PAHcurlMassAssembleDiagonal3D(const int D1D,
219  const int Q1D,
220  const int NE,
221  const bool symmetric,
222  const Array<double> &bo,
223  const Array<double> &bc,
224  const Vector &pa_data,
225  Vector &diag)
226 {
227  constexpr static int MAX_D1D = HCURL_MAX_D1D;
228  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
229 
230  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
231  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
232  constexpr static int VDIM = 3;
233 
234  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
235  auto Bc = Reshape(bc.Read(), Q1D, D1D);
236  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
237  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
238 
239  MFEM_FORALL(e, NE,
240  {
241  int osc = 0;
242 
243  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
244  {
245  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
246  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
247  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
248 
249  const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
250  (symmetric ? 5 : 8));
251 
252  double mass[MAX_Q1D];
253 
254  for (int dz = 0; dz < D1Dz; ++dz)
255  {
256  for (int dy = 0; dy < D1Dy; ++dy)
257  {
258  for (int qx = 0; qx < Q1D; ++qx)
259  {
260  mass[qx] = 0.0;
261  for (int qy = 0; qy < Q1D; ++qy)
262  {
263  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
264 
265  for (int qz = 0; qz < Q1D; ++qz)
266  {
267  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
268 
269  mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
270  }
271  }
272  }
273 
274  for (int dx = 0; dx < D1Dx; ++dx)
275  {
276  for (int qx = 0; qx < Q1D; ++qx)
277  {
278  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
279  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
280  }
281  }
282  }
283  }
284 
285  osc += D1Dx * D1Dy * D1Dz;
286  } // loop c
287  }); // end of element loop
288 }
289 
290 template<int T_D1D, int T_Q1D>
291 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
292  const int Q1D,
293  const int NE,
294  const bool symmetric,
295  const Array<double> &bo,
296  const Array<double> &bc,
297  const Vector &pa_data,
298  Vector &diag)
299 {
300  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
301  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
302 
303  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
304  auto Bc = Reshape(bc.Read(), Q1D, D1D);
305  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
306  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
307 
308  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
309  {
310  constexpr int VDIM = 3;
311  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
312  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
313 
314  MFEM_SHARED double sBo[tQ1D][tD1D];
315  MFEM_SHARED double sBc[tQ1D][tD1D];
316 
317  double op3[3];
318  MFEM_SHARED double sop[3][tQ1D][tQ1D];
319 
320  MFEM_FOREACH_THREAD(qx,x,Q1D)
321  {
322  MFEM_FOREACH_THREAD(qy,y,Q1D)
323  {
324  MFEM_FOREACH_THREAD(qz,z,Q1D)
325  {
326  op3[0] = op(qx,qy,qz,0,e);
327  op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
328  op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
329  }
330  }
331  }
332 
333  const int tidx = MFEM_THREAD_ID(x);
334  const int tidy = MFEM_THREAD_ID(y);
335  const int tidz = MFEM_THREAD_ID(z);
336 
337  if (tidz == 0)
338  {
339  MFEM_FOREACH_THREAD(d,y,D1D)
340  {
341  MFEM_FOREACH_THREAD(q,x,Q1D)
342  {
343  sBc[q][d] = Bc(q,d);
344  if (d < D1D-1)
345  {
346  sBo[q][d] = Bo(q,d);
347  }
348  }
349  }
350  }
351  MFEM_SYNC_THREAD;
352 
353  int osc = 0;
354  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
355  {
356  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
357  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
358  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
359 
360  double dxyz = 0.0;
361 
362  for (int qz=0; qz < Q1D; ++qz)
363  {
364  if (tidz == qz)
365  {
366  for (int i=0; i<3; ++i)
367  {
368  sop[i][tidx][tidy] = op3[i];
369  }
370  }
371 
372  MFEM_SYNC_THREAD;
373 
374  MFEM_FOREACH_THREAD(dz,z,D1Dz)
375  {
376  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
377 
378  MFEM_FOREACH_THREAD(dy,y,D1Dy)
379  {
380  MFEM_FOREACH_THREAD(dx,x,D1Dx)
381  {
382  for (int qy = 0; qy < Q1D; ++qy)
383  {
384  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
385 
386  for (int qx = 0; qx < Q1D; ++qx)
387  {
388  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
389  dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
390  }
391  }
392  }
393  }
394  }
395 
396  MFEM_SYNC_THREAD;
397  } // qz loop
398 
399  MFEM_FOREACH_THREAD(dz,z,D1Dz)
400  {
401  MFEM_FOREACH_THREAD(dy,y,D1Dy)
402  {
403  MFEM_FOREACH_THREAD(dx,x,D1Dx)
404  {
405  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
406  }
407  }
408  }
409 
410  osc += D1Dx * D1Dy * D1Dz;
411  } // c loop
412  }); // end of element loop
413 }
414 
415 void PAHcurlMassApply3D(const int D1D,
416  const int Q1D,
417  const int NE,
418  const bool symmetric,
419  const Array<double> &bo,
420  const Array<double> &bc,
421  const Array<double> &bot,
422  const Array<double> &bct,
423  const Vector &pa_data,
424  const Vector &x,
425  Vector &y)
426 {
427  constexpr static int MAX_D1D = HCURL_MAX_D1D;
428  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
429 
430  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
431  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
432  constexpr static int VDIM = 3;
433 
434  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
435  auto Bc = Reshape(bc.Read(), Q1D, D1D);
436  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
437  auto Bct = Reshape(bct.Read(), D1D, Q1D);
438  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
439  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
440  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
441 
442  MFEM_FORALL(e, NE,
443  {
444  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
445 
446  for (int qz = 0; qz < Q1D; ++qz)
447  {
448  for (int qy = 0; qy < Q1D; ++qy)
449  {
450  for (int qx = 0; qx < Q1D; ++qx)
451  {
452  for (int c = 0; c < VDIM; ++c)
453  {
454  mass[qz][qy][qx][c] = 0.0;
455  }
456  }
457  }
458  }
459 
460  int osc = 0;
461 
462  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
463  {
464  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
465  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
466  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
467 
468  for (int dz = 0; dz < D1Dz; ++dz)
469  {
470  double massXY[MAX_Q1D][MAX_Q1D];
471  for (int qy = 0; qy < Q1D; ++qy)
472  {
473  for (int qx = 0; qx < Q1D; ++qx)
474  {
475  massXY[qy][qx] = 0.0;
476  }
477  }
478 
479  for (int dy = 0; dy < D1Dy; ++dy)
480  {
481  double massX[MAX_Q1D];
482  for (int qx = 0; qx < Q1D; ++qx)
483  {
484  massX[qx] = 0.0;
485  }
486 
487  for (int dx = 0; dx < D1Dx; ++dx)
488  {
489  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
490  for (int qx = 0; qx < Q1D; ++qx)
491  {
492  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
493  }
494  }
495 
496  for (int qy = 0; qy < Q1D; ++qy)
497  {
498  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
499  for (int qx = 0; qx < Q1D; ++qx)
500  {
501  const double wx = massX[qx];
502  massXY[qy][qx] += wx * wy;
503  }
504  }
505  }
506 
507  for (int qz = 0; qz < Q1D; ++qz)
508  {
509  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
510  for (int qy = 0; qy < Q1D; ++qy)
511  {
512  for (int qx = 0; qx < Q1D; ++qx)
513  {
514  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
515  }
516  }
517  }
518  }
519 
520  osc += D1Dx * D1Dy * D1Dz;
521  } // loop (c) over components
522 
523  // Apply D operator.
524  for (int qz = 0; qz < Q1D; ++qz)
525  {
526  for (int qy = 0; qy < Q1D; ++qy)
527  {
528  for (int qx = 0; qx < Q1D; ++qx)
529  {
530  const double O11 = op(qx,qy,qz,0,e);
531  const double O12 = op(qx,qy,qz,1,e);
532  const double O13 = op(qx,qy,qz,2,e);
533  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
534  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
535  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
536  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
537  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
538  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
539  const double massX = mass[qz][qy][qx][0];
540  const double massY = mass[qz][qy][qx][1];
541  const double massZ = mass[qz][qy][qx][2];
542  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
543  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
544  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
545  }
546  }
547  }
548 
549  for (int qz = 0; qz < Q1D; ++qz)
550  {
551  double massXY[MAX_D1D][MAX_D1D];
552 
553  osc = 0;
554 
555  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
556  {
557  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
558  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
559  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
560 
561  for (int dy = 0; dy < D1Dy; ++dy)
562  {
563  for (int dx = 0; dx < D1Dx; ++dx)
564  {
565  massXY[dy][dx] = 0.0;
566  }
567  }
568  for (int qy = 0; qy < Q1D; ++qy)
569  {
570  double massX[MAX_D1D];
571  for (int dx = 0; dx < D1Dx; ++dx)
572  {
573  massX[dx] = 0;
574  }
575  for (int qx = 0; qx < Q1D; ++qx)
576  {
577  for (int dx = 0; dx < D1Dx; ++dx)
578  {
579  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
580  }
581  }
582  for (int dy = 0; dy < D1Dy; ++dy)
583  {
584  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
585  for (int dx = 0; dx < D1Dx; ++dx)
586  {
587  massXY[dy][dx] += massX[dx] * wy;
588  }
589  }
590  }
591 
592  for (int dz = 0; dz < D1Dz; ++dz)
593  {
594  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
595  for (int dy = 0; dy < D1Dy; ++dy)
596  {
597  for (int dx = 0; dx < D1Dx; ++dx)
598  {
599  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
600  }
601  }
602  }
603 
604  osc += D1Dx * D1Dy * D1Dz;
605  } // loop c
606  } // loop qz
607  }); // end of element loop
608 }
609 
610 template<int T_D1D, int T_Q1D>
611 void SmemPAHcurlMassApply3D(const int D1D,
612  const int Q1D,
613  const int NE,
614  const bool symmetric,
615  const Array<double> &bo,
616  const Array<double> &bc,
617  const Array<double> &bot,
618  const Array<double> &bct,
619  const Vector &pa_data,
620  const Vector &x,
621  Vector &y)
622 {
623  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
624  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
625 
626  const int dataSize = symmetric ? 6 : 9;
627 
628  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
629  auto Bc = Reshape(bc.Read(), Q1D, D1D);
630  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
631  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
632  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
633 
634  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
635  {
636  constexpr int VDIM = 3;
637  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
638  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
639 
640  MFEM_SHARED double sBo[tQ1D][tD1D];
641  MFEM_SHARED double sBc[tQ1D][tD1D];
642 
643  double op9[9];
644  MFEM_SHARED double sop[9*tQ1D*tQ1D];
645  MFEM_SHARED double mass[tQ1D][tQ1D][3];
646 
647  MFEM_SHARED double sX[tD1D][tD1D][tD1D];
648 
649  MFEM_FOREACH_THREAD(qx,x,Q1D)
650  {
651  MFEM_FOREACH_THREAD(qy,y,Q1D)
652  {
653  MFEM_FOREACH_THREAD(qz,z,Q1D)
654  {
655  for (int i=0; i<dataSize; ++i)
656  {
657  op9[i] = op(qx,qy,qz,i,e);
658  }
659  }
660  }
661  }
662 
663  const int tidx = MFEM_THREAD_ID(x);
664  const int tidy = MFEM_THREAD_ID(y);
665  const int tidz = MFEM_THREAD_ID(z);
666 
667  if (tidz == 0)
668  {
669  MFEM_FOREACH_THREAD(d,y,D1D)
670  {
671  MFEM_FOREACH_THREAD(q,x,Q1D)
672  {
673  sBc[q][d] = Bc(q,d);
674  if (d < D1D-1)
675  {
676  sBo[q][d] = Bo(q,d);
677  }
678  }
679  }
680  }
681  MFEM_SYNC_THREAD;
682 
683  for (int qz=0; qz < Q1D; ++qz)
684  {
685  int osc = 0;
686  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
687  {
688  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
689  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
690  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
691 
692  MFEM_FOREACH_THREAD(dz,z,D1Dz)
693  {
694  MFEM_FOREACH_THREAD(dy,y,D1Dy)
695  {
696  MFEM_FOREACH_THREAD(dx,x,D1Dx)
697  {
698  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
699  }
700  }
701  }
702  MFEM_SYNC_THREAD;
703 
704  if (tidz == qz)
705  {
706  for (int i=0; i<dataSize; ++i)
707  {
708  sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
709  }
710 
711  MFEM_FOREACH_THREAD(qy,y,Q1D)
712  {
713  MFEM_FOREACH_THREAD(qx,x,Q1D)
714  {
715  double u = 0.0;
716 
717  for (int dz = 0; dz < D1Dz; ++dz)
718  {
719  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
720  for (int dy = 0; dy < D1Dy; ++dy)
721  {
722  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
723  for (int dx = 0; dx < D1Dx; ++dx)
724  {
725  const double t = sX[dz][dy][dx];
726  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
727  u += t * wx * wy * wz;
728  }
729  }
730  }
731 
732  mass[qy][qx][c] = u;
733  } // qx
734  } // qy
735  } // tidz == qz
736 
737  osc += D1Dx * D1Dy * D1Dz;
738  MFEM_SYNC_THREAD;
739  } // c
740 
741  MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
742 
743  osc = 0;
744  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
745  {
746  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
747  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
748  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
749 
750  double dxyz = 0.0;
751 
752  MFEM_FOREACH_THREAD(dz,z,D1Dz)
753  {
754  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
755 
756  MFEM_FOREACH_THREAD(dy,y,D1Dy)
757  {
758  MFEM_FOREACH_THREAD(dx,x,D1Dx)
759  {
760  for (int qy = 0; qy < Q1D; ++qy)
761  {
762  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
763  for (int qx = 0; qx < Q1D; ++qx)
764  {
765  const int os = (dataSize*qx) + (dataSize*Q1D*qy);
766  const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
767  (symmetric ? 2 : 6))); // O11, O21, O31
768  const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
769  (symmetric ? 4 : 7))); // O12, O22, O32
770  const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
771  (symmetric ? 5 : 8))); // O13, O23, O33
772 
773  const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
774  (sop[id3] * mass[qy][qx][2]);
775 
776  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
777  dxyz += m_c * wx * wy * wz;
778  }
779  }
780  }
781  }
782  }
783 
784  MFEM_SYNC_THREAD;
785 
786  MFEM_FOREACH_THREAD(dz,z,D1Dz)
787  {
788  MFEM_FOREACH_THREAD(dy,y,D1Dy)
789  {
790  MFEM_FOREACH_THREAD(dx,x,D1Dx)
791  {
792  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
793  }
794  }
795  }
796 
797  osc += D1Dx * D1Dy * D1Dz;
798  } // c loop
799  } // qz
800  }); // end of element loop
801 }
802 
803 // PA H(curl) curl-curl assemble 2D kernel
804 static void PACurlCurlSetup2D(const int Q1D,
805  const int NE,
806  const Array<double> &w,
807  const Vector &j,
808  Vector &coeff,
809  Vector &op)
810 {
811  const int NQ = Q1D*Q1D;
812  auto W = w.Read();
813  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
814  auto C = Reshape(coeff.Read(), NQ, NE);
815  auto y = Reshape(op.Write(), NQ, NE);
816  MFEM_FORALL(e, NE,
817  {
818  for (int q = 0; q < NQ; ++q)
819  {
820  const double J11 = J(q,0,0,e);
821  const double J21 = J(q,1,0,e);
822  const double J12 = J(q,0,1,e);
823  const double J22 = J(q,1,1,e);
824  const double detJ = (J11*J22)-(J21*J12);
825  y(q,e) = W[q] * C(q,e) / detJ;
826  }
827  });
828 }
829 
830 // PA H(curl) curl-curl assemble 3D kernel
831 static void PACurlCurlSetup3D(const int Q1D,
832  const int coeffDim,
833  const int NE,
834  const Array<double> &w,
835  const Vector &j,
836  Vector &coeff,
837  Vector &op)
838 {
839  const int NQ = Q1D*Q1D*Q1D;
840  const bool symmetric = (coeffDim != 9);
841  auto W = w.Read();
842  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
843  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
844  auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
845 
846  MFEM_FORALL(e, NE,
847  {
848  for (int q = 0; q < NQ; ++q)
849  {
850  const double J11 = J(q,0,0,e);
851  const double J21 = J(q,1,0,e);
852  const double J31 = J(q,2,0,e);
853  const double J12 = J(q,0,1,e);
854  const double J22 = J(q,1,1,e);
855  const double J32 = J(q,2,1,e);
856  const double J13 = J(q,0,2,e);
857  const double J23 = J(q,1,2,e);
858  const double J33 = J(q,2,2,e);
859  const double detJ = J11 * (J22 * J33 - J32 * J23) -
860  /* */ J21 * (J12 * J33 - J32 * J13) +
861  /* */ J31 * (J12 * J23 - J22 * J13);
862 
863  const double c_detJ = W[q] / detJ;
864 
865  if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
866  {
867  // Set y to the 6 or 9 entries of J^T M J / det
868  const double M11 = C(0, q, e);
869  const double M12 = C(1, q, e);
870  const double M13 = C(2, q, e);
871  const double M21 = (!symmetric) ? C(3, q, e) : M12;
872  const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
873  const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
874  const double M31 = (!symmetric) ? C(6, q, e) : M13;
875  const double M32 = (!symmetric) ? C(7, q, e) : M23;
876  const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
877 
878  // First compute R = MJ
879  const double R11 = M11*J11 + M12*J21 + M13*J31;
880  const double R12 = M11*J12 + M12*J22 + M13*J32;
881  const double R13 = M11*J13 + M12*J23 + M13*J33;
882  const double R21 = M21*J11 + M22*J21 + M23*J31;
883  const double R22 = M21*J12 + M22*J22 + M23*J32;
884  const double R23 = M21*J13 + M22*J23 + M23*J33;
885  const double R31 = M31*J11 + M32*J21 + M33*J31;
886  const double R32 = M31*J12 + M32*J22 + M33*J32;
887  const double R33 = M31*J13 + M32*J23 + M33*J33;
888 
889  // Now set y to J^T R / det
890  y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
891  const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
892  y(q,1,e) = Y12; // 1,2
893  y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
894 
895  const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
896  const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
897  const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
898 
899  const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
900 
901  y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
902  y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
903  y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
904 
905  if (!symmetric)
906  {
907  y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
908  y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
909  y(q,8,e) = Y33; // 3,3
910  }
911  }
912  else // Vector or scalar coefficient version
913  {
914  // Set y to the 6 entries of J^T D J / det^2
915  const double D1 = C(0, q, e);
916  const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
917  const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
918 
919  y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
920  y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
921  y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
922  y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
923  y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
924  y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
925  }
926  }
927  });
928 }
929 
930 // PA H(curl)-L2 assemble 2D kernel
931 static void PACurlL2Setup2D(const int Q1D,
932  const int NE,
933  const Array<double> &w,
934  Vector &coeff,
935  Vector &op)
936 {
937  const int NQ = Q1D*Q1D;
938  auto W = w.Read();
939  auto C = Reshape(coeff.Read(), NQ, NE);
940  auto y = Reshape(op.Write(), NQ, NE);
941  MFEM_FORALL(e, NE,
942  {
943  for (int q = 0; q < NQ; ++q)
944  {
945  y(q,e) = W[q] * C(q,e);
946  }
947  });
948 }
949 
950 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
951 {
952  // Assumes tensor-product elements
953  Mesh *mesh = fes.GetMesh();
954  const FiniteElement *fel = fes.GetFE(0);
955 
956  const VectorTensorFiniteElement *el =
957  dynamic_cast<const VectorTensorFiniteElement*>(fel);
958  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
959 
960  const IntegrationRule *ir
961  = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
962  *mesh->GetElementTransformation(0));
963 
964  const int dims = el->GetDim();
965  MFEM_VERIFY(dims == 2 || dims == 3, "");
966 
967  nq = ir->GetNPoints();
968  dim = mesh->Dimension();
969  MFEM_VERIFY(dim == 2 || dim == 3, "");
970 
971  ne = fes.GetNE();
972  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
973  mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
974  mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
975  dofs1D = mapsC->ndof;
976  quad1D = mapsC->nqpt;
977 
978  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
979 
980  QuadratureSpace qs(*mesh, *ir);
981  CoefficientVector coeff(qs, CoefficientStorage::SYMMETRIC);
982  if (Q) { coeff.Project(*Q); }
983  else if (MQ) { coeff.ProjectTranspose(*MQ); }
984  else if (DQ) { coeff.Project(*DQ); }
985  else { coeff.SetConstant(1.0); }
986 
987  const int coeff_dim = coeff.GetVDim();
988  symmetric = (coeff_dim != dim*dim);
989  const int sym_dims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
990  const int ndata = (dim == 2) ? 1 : (symmetric ? sym_dims : dim*dim);
991  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
992 
993  if (el->GetDerivType() != mfem::FiniteElement::CURL)
994  {
995  MFEM_ABORT("Unknown kernel.");
996  }
997 
998  if (dim == 3)
999  {
1000  PACurlCurlSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, coeff,
1001  pa_data);
1002  }
1003  else
1004  {
1005  PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1006  }
1007 }
1008 
1009 static void PACurlCurlApply2D(const int D1D,
1010  const int Q1D,
1011  const int NE,
1012  const Array<double> &bo,
1013  const Array<double> &bot,
1014  const Array<double> &gc,
1015  const Array<double> &gct,
1016  const Vector &pa_data,
1017  const Vector &x,
1018  Vector &y)
1019 {
1020  constexpr static int VDIM = 2;
1021  constexpr static int MAX_D1D = HCURL_MAX_D1D;
1022  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1023 
1024  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1025  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1026  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1027  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1028  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1029  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1030  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1031 
1032  MFEM_FORALL(e, NE,
1033  {
1034  double curl[MAX_Q1D][MAX_Q1D];
1035 
1036  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1037 
1038  for (int qy = 0; qy < Q1D; ++qy)
1039  {
1040  for (int qx = 0; qx < Q1D; ++qx)
1041  {
1042  curl[qy][qx] = 0.0;
1043  }
1044  }
1045 
1046  int osc = 0;
1047 
1048  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1049  {
1050  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1051  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1052 
1053  for (int dy = 0; dy < D1Dy; ++dy)
1054  {
1055  double gradX[MAX_Q1D];
1056  for (int qx = 0; qx < Q1D; ++qx)
1057  {
1058  gradX[qx] = 0;
1059  }
1060 
1061  for (int dx = 0; dx < D1Dx; ++dx)
1062  {
1063  const double t = X(dx + (dy * D1Dx) + osc, e);
1064  for (int qx = 0; qx < Q1D; ++qx)
1065  {
1066  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1067  }
1068  }
1069 
1070  for (int qy = 0; qy < Q1D; ++qy)
1071  {
1072  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1073  for (int qx = 0; qx < Q1D; ++qx)
1074  {
1075  curl[qy][qx] += gradX[qx] * wy;
1076  }
1077  }
1078  }
1079 
1080  osc += D1Dx * D1Dy;
1081  } // loop (c) over components
1082 
1083  // Apply D operator.
1084  for (int qy = 0; qy < Q1D; ++qy)
1085  {
1086  for (int qx = 0; qx < Q1D; ++qx)
1087  {
1088  curl[qy][qx] *= op(qx,qy,e);
1089  }
1090  }
1091 
1092  for (int qy = 0; qy < Q1D; ++qy)
1093  {
1094  osc = 0;
1095 
1096  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1097  {
1098  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1099  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1100 
1101  double gradX[MAX_D1D];
1102  for (int dx = 0; dx < D1Dx; ++dx)
1103  {
1104  gradX[dx] = 0.0;
1105  }
1106  for (int qx = 0; qx < Q1D; ++qx)
1107  {
1108  for (int dx = 0; dx < D1Dx; ++dx)
1109  {
1110  gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1111  }
1112  }
1113  for (int dy = 0; dy < D1Dy; ++dy)
1114  {
1115  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1116 
1117  for (int dx = 0; dx < D1Dx; ++dx)
1118  {
1119  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1120  }
1121  }
1122 
1123  osc += D1Dx * D1Dy;
1124  } // loop c
1125  } // loop qy
1126  }); // end of element loop
1127 }
1128 
1129 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1130 static void PACurlCurlApply3D(const int D1D,
1131  const int Q1D,
1132  const bool symmetric,
1133  const int NE,
1134  const Array<double> &bo,
1135  const Array<double> &bc,
1136  const Array<double> &bot,
1137  const Array<double> &bct,
1138  const Array<double> &gc,
1139  const Array<double> &gct,
1140  const Vector &pa_data,
1141  const Vector &x,
1142  Vector &y)
1143 {
1144  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1145  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1146  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1147  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1148  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1149  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1150  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1151 
1152  constexpr static int VDIM = 3;
1153 
1154  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1155  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1156  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1157  auto Bct = Reshape(bct.Read(), D1D, Q1D);
1158  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1159  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1160  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1161  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1162  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1163 
1164  MFEM_FORALL(e, NE,
1165  {
1166  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1167  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1168 
1169  for (int qz = 0; qz < Q1D; ++qz)
1170  {
1171  for (int qy = 0; qy < Q1D; ++qy)
1172  {
1173  for (int qx = 0; qx < Q1D; ++qx)
1174  {
1175  for (int c = 0; c < VDIM; ++c)
1176  {
1177  curl[qz][qy][qx][c] = 0.0;
1178  }
1179  }
1180  }
1181  }
1182 
1183  // We treat x, y, z components separately for optimization specific to each.
1184 
1185  int osc = 0;
1186 
1187  {
1188  // x component
1189  const int D1Dz = D1D;
1190  const int D1Dy = D1D;
1191  const int D1Dx = D1D - 1;
1192 
1193  for (int dz = 0; dz < D1Dz; ++dz)
1194  {
1195  double gradXY[MAX_Q1D][MAX_Q1D][2];
1196  for (int qy = 0; qy < Q1D; ++qy)
1197  {
1198  for (int qx = 0; qx < Q1D; ++qx)
1199  {
1200  for (int d = 0; d < 2; ++d)
1201  {
1202  gradXY[qy][qx][d] = 0.0;
1203  }
1204  }
1205  }
1206 
1207  for (int dy = 0; dy < D1Dy; ++dy)
1208  {
1209  double massX[MAX_Q1D];
1210  for (int qx = 0; qx < Q1D; ++qx)
1211  {
1212  massX[qx] = 0.0;
1213  }
1214 
1215  for (int dx = 0; dx < D1Dx; ++dx)
1216  {
1217  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1218  for (int qx = 0; qx < Q1D; ++qx)
1219  {
1220  massX[qx] += t * Bo(qx,dx);
1221  }
1222  }
1223 
1224  for (int qy = 0; qy < Q1D; ++qy)
1225  {
1226  const double wy = Bc(qy,dy);
1227  const double wDy = Gc(qy,dy);
1228  for (int qx = 0; qx < Q1D; ++qx)
1229  {
1230  const double wx = massX[qx];
1231  gradXY[qy][qx][0] += wx * wDy;
1232  gradXY[qy][qx][1] += wx * wy;
1233  }
1234  }
1235  }
1236 
1237  for (int qz = 0; qz < Q1D; ++qz)
1238  {
1239  const double wz = Bc(qz,dz);
1240  const double wDz = Gc(qz,dz);
1241  for (int qy = 0; qy < Q1D; ++qy)
1242  {
1243  for (int qx = 0; qx < Q1D; ++qx)
1244  {
1245  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1246  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1247  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1248  }
1249  }
1250  }
1251  }
1252 
1253  osc += D1Dx * D1Dy * D1Dz;
1254  }
1255 
1256  {
1257  // y component
1258  const int D1Dz = D1D;
1259  const int D1Dy = D1D - 1;
1260  const int D1Dx = D1D;
1261 
1262  for (int dz = 0; dz < D1Dz; ++dz)
1263  {
1264  double gradXY[MAX_Q1D][MAX_Q1D][2];
1265  for (int qy = 0; qy < Q1D; ++qy)
1266  {
1267  for (int qx = 0; qx < Q1D; ++qx)
1268  {
1269  for (int d = 0; d < 2; ++d)
1270  {
1271  gradXY[qy][qx][d] = 0.0;
1272  }
1273  }
1274  }
1275 
1276  for (int dx = 0; dx < D1Dx; ++dx)
1277  {
1278  double massY[MAX_Q1D];
1279  for (int qy = 0; qy < Q1D; ++qy)
1280  {
1281  massY[qy] = 0.0;
1282  }
1283 
1284  for (int dy = 0; dy < D1Dy; ++dy)
1285  {
1286  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1287  for (int qy = 0; qy < Q1D; ++qy)
1288  {
1289  massY[qy] += t * Bo(qy,dy);
1290  }
1291  }
1292 
1293  for (int qx = 0; qx < Q1D; ++qx)
1294  {
1295  const double wx = Bc(qx,dx);
1296  const double wDx = Gc(qx,dx);
1297  for (int qy = 0; qy < Q1D; ++qy)
1298  {
1299  const double wy = massY[qy];
1300  gradXY[qy][qx][0] += wDx * wy;
1301  gradXY[qy][qx][1] += wx * wy;
1302  }
1303  }
1304  }
1305 
1306  for (int qz = 0; qz < Q1D; ++qz)
1307  {
1308  const double wz = Bc(qz,dz);
1309  const double wDz = Gc(qz,dz);
1310  for (int qy = 0; qy < Q1D; ++qy)
1311  {
1312  for (int qx = 0; qx < Q1D; ++qx)
1313  {
1314  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1315  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1316  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1317  }
1318  }
1319  }
1320  }
1321 
1322  osc += D1Dx * D1Dy * D1Dz;
1323  }
1324 
1325  {
1326  // z component
1327  const int D1Dz = D1D - 1;
1328  const int D1Dy = D1D;
1329  const int D1Dx = D1D;
1330 
1331  for (int dx = 0; dx < D1Dx; ++dx)
1332  {
1333  double gradYZ[MAX_Q1D][MAX_Q1D][2];
1334  for (int qz = 0; qz < Q1D; ++qz)
1335  {
1336  for (int qy = 0; qy < Q1D; ++qy)
1337  {
1338  for (int d = 0; d < 2; ++d)
1339  {
1340  gradYZ[qz][qy][d] = 0.0;
1341  }
1342  }
1343  }
1344 
1345  for (int dy = 0; dy < D1Dy; ++dy)
1346  {
1347  double massZ[MAX_Q1D];
1348  for (int qz = 0; qz < Q1D; ++qz)
1349  {
1350  massZ[qz] = 0.0;
1351  }
1352 
1353  for (int dz = 0; dz < D1Dz; ++dz)
1354  {
1355  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1356  for (int qz = 0; qz < Q1D; ++qz)
1357  {
1358  massZ[qz] += t * Bo(qz,dz);
1359  }
1360  }
1361 
1362  for (int qy = 0; qy < Q1D; ++qy)
1363  {
1364  const double wy = Bc(qy,dy);
1365  const double wDy = Gc(qy,dy);
1366  for (int qz = 0; qz < Q1D; ++qz)
1367  {
1368  const double wz = massZ[qz];
1369  gradYZ[qz][qy][0] += wz * wy;
1370  gradYZ[qz][qy][1] += wz * wDy;
1371  }
1372  }
1373  }
1374 
1375  for (int qx = 0; qx < Q1D; ++qx)
1376  {
1377  const double wx = Bc(qx,dx);
1378  const double wDx = Gc(qx,dx);
1379 
1380  for (int qy = 0; qy < Q1D; ++qy)
1381  {
1382  for (int qz = 0; qz < Q1D; ++qz)
1383  {
1384  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1385  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1386  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1387  }
1388  }
1389  }
1390  }
1391  }
1392 
1393  // Apply D operator.
1394  for (int qz = 0; qz < Q1D; ++qz)
1395  {
1396  for (int qy = 0; qy < Q1D; ++qy)
1397  {
1398  for (int qx = 0; qx < Q1D; ++qx)
1399  {
1400  const double O11 = op(qx,qy,qz,0,e);
1401  const double O12 = op(qx,qy,qz,1,e);
1402  const double O13 = op(qx,qy,qz,2,e);
1403  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1404  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1405  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1406  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1407  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1408  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1409 
1410  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1411  (O13 * curl[qz][qy][qx][2]);
1412  const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1413  (O23 * curl[qz][qy][qx][2]);
1414  const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1415  (O33 * curl[qz][qy][qx][2]);
1416 
1417  curl[qz][qy][qx][0] = c1;
1418  curl[qz][qy][qx][1] = c2;
1419  curl[qz][qy][qx][2] = c3;
1420  }
1421  }
1422  }
1423 
1424  // x component
1425  osc = 0;
1426  {
1427  const int D1Dz = D1D;
1428  const int D1Dy = D1D;
1429  const int D1Dx = D1D - 1;
1430 
1431  for (int qz = 0; qz < Q1D; ++qz)
1432  {
1433  double gradXY12[MAX_D1D][MAX_D1D];
1434  double gradXY21[MAX_D1D][MAX_D1D];
1435 
1436  for (int dy = 0; dy < D1Dy; ++dy)
1437  {
1438  for (int dx = 0; dx < D1Dx; ++dx)
1439  {
1440  gradXY12[dy][dx] = 0.0;
1441  gradXY21[dy][dx] = 0.0;
1442  }
1443  }
1444  for (int qy = 0; qy < Q1D; ++qy)
1445  {
1446  double massX[MAX_D1D][2];
1447  for (int dx = 0; dx < D1Dx; ++dx)
1448  {
1449  for (int n = 0; n < 2; ++n)
1450  {
1451  massX[dx][n] = 0.0;
1452  }
1453  }
1454  for (int qx = 0; qx < Q1D; ++qx)
1455  {
1456  for (int dx = 0; dx < D1Dx; ++dx)
1457  {
1458  const double wx = Bot(dx,qx);
1459 
1460  massX[dx][0] += wx * curl[qz][qy][qx][1];
1461  massX[dx][1] += wx * curl[qz][qy][qx][2];
1462  }
1463  }
1464  for (int dy = 0; dy < D1Dy; ++dy)
1465  {
1466  const double wy = Bct(dy,qy);
1467  const double wDy = Gct(dy,qy);
1468 
1469  for (int dx = 0; dx < D1Dx; ++dx)
1470  {
1471  gradXY21[dy][dx] += massX[dx][0] * wy;
1472  gradXY12[dy][dx] += massX[dx][1] * wDy;
1473  }
1474  }
1475  }
1476 
1477  for (int dz = 0; dz < D1Dz; ++dz)
1478  {
1479  const double wz = Bct(dz,qz);
1480  const double wDz = Gct(dz,qz);
1481  for (int dy = 0; dy < D1Dy; ++dy)
1482  {
1483  for (int dx = 0; dx < D1Dx; ++dx)
1484  {
1485  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1486  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1487  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1488  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1489  }
1490  }
1491  }
1492  } // loop qz
1493 
1494  osc += D1Dx * D1Dy * D1Dz;
1495  }
1496 
1497  // y component
1498  {
1499  const int D1Dz = D1D;
1500  const int D1Dy = D1D - 1;
1501  const int D1Dx = D1D;
1502 
1503  for (int qz = 0; qz < Q1D; ++qz)
1504  {
1505  double gradXY02[MAX_D1D][MAX_D1D];
1506  double gradXY20[MAX_D1D][MAX_D1D];
1507 
1508  for (int dy = 0; dy < D1Dy; ++dy)
1509  {
1510  for (int dx = 0; dx < D1Dx; ++dx)
1511  {
1512  gradXY02[dy][dx] = 0.0;
1513  gradXY20[dy][dx] = 0.0;
1514  }
1515  }
1516  for (int qx = 0; qx < Q1D; ++qx)
1517  {
1518  double massY[MAX_D1D][2];
1519  for (int dy = 0; dy < D1Dy; ++dy)
1520  {
1521  massY[dy][0] = 0.0;
1522  massY[dy][1] = 0.0;
1523  }
1524  for (int qy = 0; qy < Q1D; ++qy)
1525  {
1526  for (int dy = 0; dy < D1Dy; ++dy)
1527  {
1528  const double wy = Bot(dy,qy);
1529 
1530  massY[dy][0] += wy * curl[qz][qy][qx][2];
1531  massY[dy][1] += wy * curl[qz][qy][qx][0];
1532  }
1533  }
1534  for (int dx = 0; dx < D1Dx; ++dx)
1535  {
1536  const double wx = Bct(dx,qx);
1537  const double wDx = Gct(dx,qx);
1538 
1539  for (int dy = 0; dy < D1Dy; ++dy)
1540  {
1541  gradXY02[dy][dx] += massY[dy][0] * wDx;
1542  gradXY20[dy][dx] += massY[dy][1] * wx;
1543  }
1544  }
1545  }
1546 
1547  for (int dz = 0; dz < D1Dz; ++dz)
1548  {
1549  const double wz = Bct(dz,qz);
1550  const double wDz = Gct(dz,qz);
1551  for (int dy = 0; dy < D1Dy; ++dy)
1552  {
1553  for (int dx = 0; dx < D1Dx; ++dx)
1554  {
1555  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1556  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1557  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1558  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1559  }
1560  }
1561  }
1562  } // loop qz
1563 
1564  osc += D1Dx * D1Dy * D1Dz;
1565  }
1566 
1567  // z component
1568  {
1569  const int D1Dz = D1D - 1;
1570  const int D1Dy = D1D;
1571  const int D1Dx = D1D;
1572 
1573  for (int qx = 0; qx < Q1D; ++qx)
1574  {
1575  double gradYZ01[MAX_D1D][MAX_D1D];
1576  double gradYZ10[MAX_D1D][MAX_D1D];
1577 
1578  for (int dy = 0; dy < D1Dy; ++dy)
1579  {
1580  for (int dz = 0; dz < D1Dz; ++dz)
1581  {
1582  gradYZ01[dz][dy] = 0.0;
1583  gradYZ10[dz][dy] = 0.0;
1584  }
1585  }
1586  for (int qy = 0; qy < Q1D; ++qy)
1587  {
1588  double massZ[MAX_D1D][2];
1589  for (int dz = 0; dz < D1Dz; ++dz)
1590  {
1591  for (int n = 0; n < 2; ++n)
1592  {
1593  massZ[dz][n] = 0.0;
1594  }
1595  }
1596  for (int qz = 0; qz < Q1D; ++qz)
1597  {
1598  for (int dz = 0; dz < D1Dz; ++dz)
1599  {
1600  const double wz = Bot(dz,qz);
1601 
1602  massZ[dz][0] += wz * curl[qz][qy][qx][0];
1603  massZ[dz][1] += wz * curl[qz][qy][qx][1];
1604  }
1605  }
1606  for (int dy = 0; dy < D1Dy; ++dy)
1607  {
1608  const double wy = Bct(dy,qy);
1609  const double wDy = Gct(dy,qy);
1610 
1611  for (int dz = 0; dz < D1Dz; ++dz)
1612  {
1613  gradYZ01[dz][dy] += wy * massZ[dz][1];
1614  gradYZ10[dz][dy] += wDy * massZ[dz][0];
1615  }
1616  }
1617  }
1618 
1619  for (int dx = 0; dx < D1Dx; ++dx)
1620  {
1621  const double wx = Bct(dx,qx);
1622  const double wDx = Gct(dx,qx);
1623 
1624  for (int dy = 0; dy < D1Dy; ++dy)
1625  {
1626  for (int dz = 0; dz < D1Dz; ++dz)
1627  {
1628  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1629  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1630  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1631  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1632  }
1633  }
1634  }
1635  } // loop qx
1636  }
1637  }); // end of element loop
1638 }
1639 
1640 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1641 static void SmemPACurlCurlApply3D(const int D1D,
1642  const int Q1D,
1643  const bool symmetric,
1644  const int NE,
1645  const Array<double> &bo,
1646  const Array<double> &bc,
1647  const Array<double> &bot,
1648  const Array<double> &bct,
1649  const Array<double> &gc,
1650  const Array<double> &gct,
1651  const Vector &pa_data,
1652  const Vector &x,
1653  Vector &y)
1654 {
1655  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1656  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1657  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1658  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1659  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1660  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1661  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1662 
1663  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1664  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1665  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1666  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1667  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1668  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1669 
1670  const int s = symmetric ? 6 : 9;
1671 
1672  auto device_kernel = [=] MFEM_DEVICE (int e)
1673  {
1674  constexpr int VDIM = 3;
1675 
1676  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1677  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1678  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1679 
1680  double ope[9];
1681  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1682  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1683 
1684  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1685 
1686  MFEM_FOREACH_THREAD(qx,x,Q1D)
1687  {
1688  MFEM_FOREACH_THREAD(qy,y,Q1D)
1689  {
1690  MFEM_FOREACH_THREAD(qz,z,Q1D)
1691  {
1692  for (int i=0; i<s; ++i)
1693  {
1694  ope[i] = op(qx,qy,qz,i,e);
1695  }
1696  }
1697  }
1698  }
1699 
1700  const int tidx = MFEM_THREAD_ID(x);
1701  const int tidy = MFEM_THREAD_ID(y);
1702  const int tidz = MFEM_THREAD_ID(z);
1703 
1704  if (tidz == 0)
1705  {
1706  MFEM_FOREACH_THREAD(d,y,D1D)
1707  {
1708  MFEM_FOREACH_THREAD(q,x,Q1D)
1709  {
1710  sBc[d][q] = Bc(q,d);
1711  sGc[d][q] = Gc(q,d);
1712  if (d < D1D-1)
1713  {
1714  sBo[d][q] = Bo(q,d);
1715  }
1716  }
1717  }
1718  }
1719  MFEM_SYNC_THREAD;
1720 
1721  for (int qz=0; qz < Q1D; ++qz)
1722  {
1723  if (tidz == qz)
1724  {
1725  MFEM_FOREACH_THREAD(qy,y,Q1D)
1726  {
1727  MFEM_FOREACH_THREAD(qx,x,Q1D)
1728  {
1729  for (int i=0; i<3; ++i)
1730  {
1731  curl[qy][qx][i] = 0.0;
1732  }
1733  }
1734  }
1735  }
1736 
1737  int osc = 0;
1738  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1739  {
1740  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1741  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1742  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1743 
1744  MFEM_FOREACH_THREAD(dz,z,D1Dz)
1745  {
1746  MFEM_FOREACH_THREAD(dy,y,D1Dy)
1747  {
1748  MFEM_FOREACH_THREAD(dx,x,D1Dx)
1749  {
1750  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1751  }
1752  }
1753  }
1754  MFEM_SYNC_THREAD;
1755 
1756  if (tidz == qz)
1757  {
1758  if (c == 0)
1759  {
1760  for (int i=0; i<s; ++i)
1761  {
1762  sop[i][tidx][tidy] = ope[i];
1763  }
1764  }
1765 
1766  MFEM_FOREACH_THREAD(qy,y,Q1D)
1767  {
1768  MFEM_FOREACH_THREAD(qx,x,Q1D)
1769  {
1770  double u = 0.0;
1771  double v = 0.0;
1772 
1773  // We treat x, y, z components separately for optimization specific to each.
1774  if (c == 0) // x component
1775  {
1776  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1777 
1778  for (int dz = 0; dz < D1Dz; ++dz)
1779  {
1780  const double wz = sBc[dz][qz];
1781  const double wDz = sGc[dz][qz];
1782 
1783  for (int dy = 0; dy < D1Dy; ++dy)
1784  {
1785  const double wy = sBc[dy][qy];
1786  const double wDy = sGc[dy][qy];
1787 
1788  for (int dx = 0; dx < D1Dx; ++dx)
1789  {
1790  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1791  u += wx * wDy * wz;
1792  v += wx * wy * wDz;
1793  }
1794  }
1795  }
1796 
1797  curl[qy][qx][1] += v; // (u_0)_{x_2}
1798  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1799  }
1800  else if (c == 1) // y component
1801  {
1802  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1803 
1804  for (int dz = 0; dz < D1Dz; ++dz)
1805  {
1806  const double wz = sBc[dz][qz];
1807  const double wDz = sGc[dz][qz];
1808 
1809  for (int dy = 0; dy < D1Dy; ++dy)
1810  {
1811  const double wy = sBo[dy][qy];
1812 
1813  for (int dx = 0; dx < D1Dx; ++dx)
1814  {
1815  const double t = sX[dz][dy][dx];
1816  const double wx = t * sBc[dx][qx];
1817  const double wDx = t * sGc[dx][qx];
1818 
1819  u += wDx * wy * wz;
1820  v += wx * wy * wDz;
1821  }
1822  }
1823  }
1824 
1825  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1826  curl[qy][qx][2] += u; // (u_1)_{x_0}
1827  }
1828  else // z component
1829  {
1830  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1831 
1832  for (int dz = 0; dz < D1Dz; ++dz)
1833  {
1834  const double wz = sBo[dz][qz];
1835 
1836  for (int dy = 0; dy < D1Dy; ++dy)
1837  {
1838  const double wy = sBc[dy][qy];
1839  const double wDy = sGc[dy][qy];
1840 
1841  for (int dx = 0; dx < D1Dx; ++dx)
1842  {
1843  const double t = sX[dz][dy][dx];
1844  const double wx = t * sBc[dx][qx];
1845  const double wDx = t * sGc[dx][qx];
1846 
1847  u += wDx * wy * wz;
1848  v += wx * wDy * wz;
1849  }
1850  }
1851  }
1852 
1853  curl[qy][qx][0] += v; // (u_2)_{x_1}
1854  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1855  }
1856  } // qx
1857  } // qy
1858  } // tidz == qz
1859 
1860  osc += D1Dx * D1Dy * D1Dz;
1861  MFEM_SYNC_THREAD;
1862  } // c
1863 
1864  double dxyz1 = 0.0;
1865  double dxyz2 = 0.0;
1866  double dxyz3 = 0.0;
1867 
1868  MFEM_FOREACH_THREAD(dz,z,D1D)
1869  {
1870  const double wcz = sBc[dz][qz];
1871  const double wcDz = sGc[dz][qz];
1872  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1873 
1874  MFEM_FOREACH_THREAD(dy,y,D1D)
1875  {
1876  MFEM_FOREACH_THREAD(dx,x,D1D)
1877  {
1878  for (int qy = 0; qy < Q1D; ++qy)
1879  {
1880  const double wcy = sBc[dy][qy];
1881  const double wcDy = sGc[dy][qy];
1882  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1883 
1884  for (int qx = 0; qx < Q1D; ++qx)
1885  {
1886  const double O11 = sop[0][qx][qy];
1887  const double O12 = sop[1][qx][qy];
1888  const double O13 = sop[2][qx][qy];
1889  const double O21 = symmetric ? O12 : sop[3][qx][qy];
1890  const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1891  const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1892  const double O31 = symmetric ? O13 : sop[6][qx][qy];
1893  const double O32 = symmetric ? O23 : sop[7][qx][qy];
1894  const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1895 
1896  const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1897  (O13 * curl[qy][qx][2]);
1898  const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1899  (O23 * curl[qy][qx][2]);
1900  const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1901  (O33 * curl[qy][qx][2]);
1902 
1903  const double wcx = sBc[dx][qx];
1904  const double wDx = sGc[dx][qx];
1905 
1906  if (dx < D1D-1)
1907  {
1908  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1909  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1910  const double wx = sBo[dx][qx];
1911  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1912  }
1913 
1914  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1915  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1916  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1917 
1918  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1919  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1920  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1921  } // qx
1922  } // qy
1923  } // dx
1924  } // dy
1925  } // dz
1926 
1927  MFEM_SYNC_THREAD;
1928 
1929  MFEM_FOREACH_THREAD(dz,z,D1D)
1930  {
1931  MFEM_FOREACH_THREAD(dy,y,D1D)
1932  {
1933  MFEM_FOREACH_THREAD(dx,x,D1D)
1934  {
1935  if (dx < D1D-1)
1936  {
1937  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
1938  }
1939  if (dy < D1D-1)
1940  {
1941  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
1942  }
1943  if (dz < D1D-1)
1944  {
1945  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
1946  }
1947  }
1948  }
1949  }
1950  } // qz
1951  }; // end of element loop
1952 
1953  auto host_kernel = [&] MFEM_LAMBDA (int)
1954  {
1955  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
1956  };
1957 
1958  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
1959 }
1960 
1961 static void PACurlL2Apply2D(const int D1D,
1962  const int D1Dtest,
1963  const int Q1D,
1964  const int NE,
1965  const Array<double> &bo,
1966  const Array<double> &bot,
1967  const Array<double> &bt,
1968  const Array<double> &gc,
1969  const Vector &pa_data,
1970  const Vector &x, // trial = H(curl)
1971  Vector &y) // test = L2 or H1
1972 {
1973  constexpr static int VDIM = 2;
1974  constexpr static int MAX_D1D = HCURL_MAX_D1D;
1975  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1976  const int H1 = (D1Dtest == D1D);
1977 
1978  MFEM_VERIFY(y.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension");
1979 
1980  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1981  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1982  auto Bt = Reshape(bt.Read(), D1D, Q1D);
1983  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1984  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1985  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1986  auto Y = Reshape(y.ReadWrite(), D1Dtest, D1Dtest, NE);
1987 
1988  MFEM_FORALL(e, NE,
1989  {
1990  double curl[MAX_Q1D][MAX_Q1D];
1991 
1992  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1993 
1994  for (int qy = 0; qy < Q1D; ++qy)
1995  {
1996  for (int qx = 0; qx < Q1D; ++qx)
1997  {
1998  curl[qy][qx] = 0.0;
1999  }
2000  }
2001 
2002  int osc = 0;
2003 
2004  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2005  {
2006  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2007  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2008 
2009  for (int dy = 0; dy < D1Dy; ++dy)
2010  {
2011  double gradX[MAX_Q1D];
2012  for (int qx = 0; qx < Q1D; ++qx)
2013  {
2014  gradX[qx] = 0;
2015  }
2016 
2017  for (int dx = 0; dx < D1Dx; ++dx)
2018  {
2019  const double t = X(dx + (dy * D1Dx) + osc, e);
2020  for (int qx = 0; qx < Q1D; ++qx)
2021  {
2022  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2023  }
2024  }
2025 
2026  for (int qy = 0; qy < Q1D; ++qy)
2027  {
2028  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
2029  for (int qx = 0; qx < Q1D; ++qx)
2030  {
2031  curl[qy][qx] += gradX[qx] * wy;
2032  }
2033  }
2034  }
2035 
2036  osc += D1Dx * D1Dy;
2037  } // loop (c) over components
2038 
2039  // Apply D operator.
2040  for (int qy = 0; qy < Q1D; ++qy)
2041  {
2042  for (int qx = 0; qx < Q1D; ++qx)
2043  {
2044  curl[qy][qx] *= op(qx,qy,e);
2045  }
2046  }
2047 
2048  for (int qy = 0; qy < Q1D; ++qy)
2049  {
2050  double sol_x[MAX_D1D];
2051  for (int dx = 0; dx < D1Dtest; ++dx)
2052  {
2053  sol_x[dx] = 0.0;
2054  }
2055  for (int qx = 0; qx < Q1D; ++qx)
2056  {
2057  const double s = curl[qy][qx];
2058  for (int dx = 0; dx < D1Dtest; ++dx)
2059  {
2060  sol_x[dx] += s * ((H1 == 1) ? Bt(dx,qx) : Bot(dx,qx));
2061  }
2062  }
2063  for (int dy = 0; dy < D1Dtest; ++dy)
2064  {
2065  const double wy = (H1 == 1) ? Bt(dy,qy) : Bot(dy,qy);
2066 
2067  for (int dx = 0; dx < D1Dtest; ++dx)
2068  {
2069  Y(dx,dy,e) += sol_x[dx] * wy;
2070  }
2071  }
2072  } // loop qy
2073  }); // end of element loop
2074 }
2075 
2076 static void PACurlL2ApplyTranspose2D(const int D1D,
2077  const int D1Dtest,
2078  const int Q1D,
2079  const int NE,
2080  const Array<double> &bo,
2081  const Array<double> &bot,
2082  const Array<double> &b,
2083  const Array<double> &gct,
2084  const Vector &pa_data,
2085  const Vector &x, // trial = H(curl)
2086  Vector &y) // test = L2 or H1
2087 {
2088  constexpr static int VDIM = 2;
2089  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2090  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2091  const int H1 = (D1Dtest == D1D);
2092 
2093  MFEM_VERIFY(x.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension");
2094 
2095  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2096  auto B = Reshape(b.Read(), Q1D, D1D);
2097  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2098  auto Gct = Reshape(gct.Read(), D1D, Q1D);
2099  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2100  auto X = Reshape(x.Read(), D1Dtest, D1Dtest, NE);
2101  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2102 
2103  MFEM_FORALL(e, NE,
2104  {
2105  double mass[MAX_Q1D][MAX_Q1D];
2106 
2107  // Zero-order term in L2 or H1 test space
2108 
2109  for (int qy = 0; qy < Q1D; ++qy)
2110  {
2111  for (int qx = 0; qx < Q1D; ++qx)
2112  {
2113  mass[qy][qx] = 0.0;
2114  }
2115  }
2116 
2117  for (int dy = 0; dy < D1Dtest; ++dy)
2118  {
2119  double sol_x[MAX_Q1D];
2120  for (int qy = 0; qy < Q1D; ++qy)
2121  {
2122  sol_x[qy] = 0.0;
2123  }
2124  for (int dx = 0; dx < D1Dtest; ++dx)
2125  {
2126  const double s = X(dx,dy,e);
2127  for (int qx = 0; qx < Q1D; ++qx)
2128  {
2129  sol_x[qx] += s * ((H1 == 1) ? B(qx,dx) : Bo(qx,dx));
2130  }
2131  }
2132  for (int qy = 0; qy < Q1D; ++qy)
2133  {
2134  const double d2q = (H1 == 1) ? B(qy,dy) : Bo(qy,dy);
2135  for (int qx = 0; qx < Q1D; ++qx)
2136  {
2137  mass[qy][qx] += d2q * sol_x[qx];
2138  }
2139  }
2140  }
2141 
2142  // Apply D operator.
2143  for (int qy = 0; qy < Q1D; ++qy)
2144  {
2145  for (int qx = 0; qx < Q1D; ++qx)
2146  {
2147  mass[qy][qx] *= op(qx,qy,e);
2148  }
2149  }
2150 
2151  for (int qy = 0; qy < Q1D; ++qy)
2152  {
2153  int osc = 0;
2154 
2155  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2156  {
2157  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2158  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2159 
2160  double gradX[MAX_D1D];
2161  for (int dx = 0; dx < D1Dx; ++dx)
2162  {
2163  gradX[dx] = 0.0;
2164  }
2165  for (int qx = 0; qx < Q1D; ++qx)
2166  {
2167  for (int dx = 0; dx < D1Dx; ++dx)
2168  {
2169  gradX[dx] += mass[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
2170  }
2171  }
2172  for (int dy = 0; dy < D1Dy; ++dy)
2173  {
2174  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
2175 
2176  for (int dx = 0; dx < D1Dx; ++dx)
2177  {
2178  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
2179  }
2180  }
2181 
2182  osc += D1Dx * D1Dy;
2183  } // loop c
2184  } // loop qy
2185  }); // end of element loop
2186 }
2187 
2188 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
2189 {
2190  if (dim == 3)
2191  {
2192  if (Device::Allows(Backend::DEVICE_MASK))
2193  {
2194  const int ID = (dofs1D << 4) | quad1D;
2195  switch (ID)
2196  {
2197  case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2198  mapsO->B, mapsC->B, mapsO->Bt,
2199  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2200  case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2201  mapsO->B, mapsC->B, mapsO->Bt,
2202  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2203  case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2204  mapsO->B,
2205  mapsC->B, mapsO->Bt,
2206  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2207  case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2208  mapsO->B, mapsC->B, mapsO->Bt,
2209  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2210  default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2211  mapsC->B, mapsO->Bt, mapsC->Bt,
2212  mapsC->G, mapsC->Gt, pa_data, x, y);
2213  }
2214  }
2215  else
2216  PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2217  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2218  }
2219  else if (dim == 2)
2220  {
2221  PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2222  mapsC->G, mapsC->Gt, pa_data, x, y);
2223  }
2224  else
2225  {
2226  MFEM_ABORT("Unsupported dimension!");
2227  }
2228 }
2229 
2230 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2231  const int Q1D,
2232  const int NE,
2233  const Array<double> &bo,
2234  const Array<double> &gc,
2235  const Vector &pa_data,
2236  Vector &diag)
2237 {
2238  constexpr static int VDIM = 2;
2239  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2240 
2241  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2242  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2243  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2244  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2245 
2246  MFEM_FORALL(e, NE,
2247  {
2248  int osc = 0;
2249 
2250  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2251  {
2252  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2253  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2254 
2255  double t[MAX_Q1D];
2256 
2257  for (int dy = 0; dy < D1Dy; ++dy)
2258  {
2259  for (int qx = 0; qx < Q1D; ++qx)
2260  {
2261  t[qx] = 0.0;
2262  for (int qy = 0; qy < Q1D; ++qy)
2263  {
2264  const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2265  t[qx] += wy * wy * op(qx,qy,e);
2266  }
2267  }
2268 
2269  for (int dx = 0; dx < D1Dx; ++dx)
2270  {
2271  for (int qx = 0; qx < Q1D; ++qx)
2272  {
2273  const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2274  D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2275  }
2276  }
2277  }
2278 
2279  osc += D1Dx * D1Dy;
2280  } // loop c
2281  }); // end of element loop
2282 }
2283 
2284 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2285 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2286  const int Q1D,
2287  const bool symmetric,
2288  const int NE,
2289  const Array<double> &bo,
2290  const Array<double> &bc,
2291  const Array<double> &go,
2292  const Array<double> &gc,
2293  const Vector &pa_data,
2294  Vector &diag)
2295 {
2296  constexpr static int VDIM = 3;
2297  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2298  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2299 
2300  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2301  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2302  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2303  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2304  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2305  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2306 
2307  const int s = symmetric ? 6 : 9;
2308  const int i11 = 0;
2309  const int i12 = 1;
2310  const int i13 = 2;
2311  const int i21 = symmetric ? i12 : 3;
2312  const int i22 = symmetric ? 3 : 4;
2313  const int i23 = symmetric ? 4 : 5;
2314  const int i31 = symmetric ? i13 : 6;
2315  const int i32 = symmetric ? i23 : 7;
2316  const int i33 = symmetric ? 5 : 8;
2317 
2318  MFEM_FORALL(e, NE,
2319  {
2320  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2321  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2322  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2323  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2324  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2325 
2326  // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2327  // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2328 
2329  int osc = 0;
2330 
2331  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2332  {
2333  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2334  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2335  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2336 
2337  double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2338 
2339  // z contraction
2340  for (int qx = 0; qx < Q1D; ++qx)
2341  {
2342  for (int qy = 0; qy < Q1D; ++qy)
2343  {
2344  for (int dz = 0; dz < D1Dz; ++dz)
2345  {
2346  for (int i=0; i<s; ++i)
2347  {
2348  for (int d=0; d<3; ++d)
2349  {
2350  zt[qx][qy][dz][i][d] = 0.0;
2351  }
2352  }
2353 
2354  for (int qz = 0; qz < Q1D; ++qz)
2355  {
2356  const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2357  const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2358 
2359  for (int i=0; i<s; ++i)
2360  {
2361  zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2362  zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2363  zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2364  }
2365  }
2366  }
2367  }
2368  } // end of z contraction
2369 
2370  double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2371 
2372  // y contraction
2373  for (int qx = 0; qx < Q1D; ++qx)
2374  {
2375  for (int dz = 0; dz < D1Dz; ++dz)
2376  {
2377  for (int dy = 0; dy < D1Dy; ++dy)
2378  {
2379  for (int i=0; i<s; ++i)
2380  {
2381  for (int d=0; d<3; ++d)
2382  for (int j=0; j<3; ++j)
2383  {
2384  yt[qx][dy][dz][i][d][j] = 0.0;
2385  }
2386  }
2387 
2388  for (int qy = 0; qy < Q1D; ++qy)
2389  {
2390  const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2391  const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2392 
2393  for (int i=0; i<s; ++i)
2394  {
2395  for (int d=0; d<3; ++d)
2396  {
2397  yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2398  yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2399  yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2400  }
2401  }
2402  }
2403  }
2404  }
2405  } // end of y contraction
2406 
2407  // x contraction
2408  for (int dz = 0; dz < D1Dz; ++dz)
2409  {
2410  for (int dy = 0; dy < D1Dy; ++dy)
2411  {
2412  for (int dx = 0; dx < D1Dx; ++dx)
2413  {
2414  for (int qx = 0; qx < Q1D; ++qx)
2415  {
2416  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2417  const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2418 
2419  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2420  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2421  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2422  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2423  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2424 
2425  /*
2426  const double O11 = op(q,0,e);
2427  const double O12 = op(q,1,e);
2428  const double O13 = op(q,2,e);
2429  const double O22 = op(q,3,e);
2430  const double O23 = op(q,4,e);
2431  const double O33 = op(q,5,e);
2432  */
2433 
2434  if (c == 0)
2435  {
2436  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2437  const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2438  - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2439 
2440  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2441  }
2442  else if (c == 1)
2443  {
2444  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2445  const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2446  - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2447  + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2448 
2449  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2450  }
2451  else
2452  {
2453  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2454  const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2455  - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2456  + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2457 
2458  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2459  }
2460  }
2461  }
2462  }
2463  } // end of x contraction
2464 
2465  osc += D1Dx * D1Dy * D1Dz;
2466  } // loop c
2467  }); // end of element loop
2468 }
2469 
2470 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2471 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2472  const int Q1D,
2473  const bool symmetric,
2474  const int NE,
2475  const Array<double> &bo,
2476  const Array<double> &bc,
2477  const Array<double> &go,
2478  const Array<double> &gc,
2479  const Vector &pa_data,
2480  Vector &diag)
2481 {
2482  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2483  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2484 
2485  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2486  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2487  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2488  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2489  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2490  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2491 
2492  const int s = symmetric ? 6 : 9;
2493  const int i11 = 0;
2494  const int i12 = 1;
2495  const int i13 = 2;
2496  const int i21 = symmetric ? i12 : 3;
2497  const int i22 = symmetric ? 3 : 4;
2498  const int i23 = symmetric ? 4 : 5;
2499  const int i31 = symmetric ? i13 : 6;
2500  const int i32 = symmetric ? i23 : 7;
2501  const int i33 = symmetric ? 5 : 8;
2502 
2503  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2504  {
2505  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2506  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2507  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2508  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2509  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2510 
2511  constexpr int VDIM = 3;
2512 
2513  MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2514  MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2515  MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2516  MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2517 
2518  double ope[9];
2519  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2520 
2521  MFEM_FOREACH_THREAD(qx,x,Q1D)
2522  {
2523  MFEM_FOREACH_THREAD(qy,y,Q1D)
2524  {
2525  MFEM_FOREACH_THREAD(qz,z,Q1D)
2526  {
2527  for (int i=0; i<s; ++i)
2528  {
2529  ope[i] = op(qx,qy,qz,i,e);
2530  }
2531  }
2532  }
2533  }
2534 
2535  const int tidx = MFEM_THREAD_ID(x);
2536  const int tidy = MFEM_THREAD_ID(y);
2537  const int tidz = MFEM_THREAD_ID(z);
2538 
2539  if (tidz == 0)
2540  {
2541  MFEM_FOREACH_THREAD(d,y,D1D)
2542  {
2543  MFEM_FOREACH_THREAD(q,x,Q1D)
2544  {
2545  sBc[q][d] = Bc(q,d);
2546  sGc[q][d] = Gc(q,d);
2547  if (d < D1D-1)
2548  {
2549  sBo[q][d] = Bo(q,d);
2550  sGo[q][d] = Go(q,d);
2551  }
2552  }
2553  }
2554  }
2555  MFEM_SYNC_THREAD;
2556 
2557  int osc = 0;
2558  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2559  {
2560  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2561  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2562  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2563 
2564  double dxyz = 0.0;
2565 
2566  for (int qz=0; qz < Q1D; ++qz)
2567  {
2568  if (tidz == qz)
2569  {
2570  for (int i=0; i<s; ++i)
2571  {
2572  sop[i][tidx][tidy] = ope[i];
2573  }
2574  }
2575 
2576  MFEM_SYNC_THREAD;
2577 
2578  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2579  {
2580  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2581  const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2582 
2583  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2584  {
2585  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2586  {
2587  for (int qy = 0; qy < Q1D; ++qy)
2588  {
2589  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2590  const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2591 
2592  for (int qx = 0; qx < Q1D; ++qx)
2593  {
2594  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2595  const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2596 
2597  if (c == 0)
2598  {
2599  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2600 
2601  // (u_0)_{x_2} O22 (u_0)_{x_2}
2602  dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2603 
2604  // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2605  dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2606 
2607  // (u_0)_{x_1} O33 (u_0)_{x_1}
2608  dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2609  }
2610  else if (c == 1)
2611  {
2612  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2613 
2614  // (u_1)_{x_2} O11 (u_1)_{x_2}
2615  dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2616 
2617  // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2618  dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2619 
2620  // (u_1)_{x_0} O33 (u_1)_{x_0})
2621  dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2622  }
2623  else
2624  {
2625  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2626 
2627  // (u_2)_{x_1} O11 (u_2)_{x_1}
2628  dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2629 
2630  // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2631  dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2632 
2633  // (u_2)_{x_0} O22 (u_2)_{x_0}
2634  dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2635  }
2636  }
2637  }
2638  }
2639  }
2640  }
2641 
2642  MFEM_SYNC_THREAD;
2643  } // qz loop
2644 
2645  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2646  {
2647  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2648  {
2649  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2650  {
2651  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2652  }
2653  }
2654  }
2655 
2656  osc += D1Dx * D1Dy * D1Dz;
2657  } // c loop
2658  }); // end of element loop
2659 }
2660 
2661 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2662 {
2663  if (dim == 3)
2664  {
2665  if (Device::Allows(Backend::DEVICE_MASK))
2666  {
2667  const int ID = (dofs1D << 4) | quad1D;
2668  switch (ID)
2669  {
2670  case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2671  symmetric, ne,
2672  mapsO->B, mapsC->B,
2673  mapsO->G, mapsC->G,
2674  pa_data, diag);
2675  case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2676  symmetric, ne,
2677  mapsO->B, mapsC->B,
2678  mapsO->G, mapsC->G,
2679  pa_data, diag);
2680  case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2681  symmetric, ne,
2682  mapsO->B, mapsC->B,
2683  mapsO->G, mapsC->G,
2684  pa_data, diag);
2685  case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2686  symmetric, ne,
2687  mapsO->B, mapsC->B,
2688  mapsO->G, mapsC->G,
2689  pa_data, diag);
2690  default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2691  mapsO->B, mapsC->B,
2692  mapsO->G, mapsC->G,
2693  pa_data, diag);
2694  }
2695  }
2696  else
2697  PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2698  mapsO->B, mapsC->B,
2699  mapsO->G, mapsC->G,
2700  pa_data, diag);
2701  }
2702  else if (dim == 2)
2703  {
2704  PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2705  mapsO->B, mapsC->G, pa_data, diag);
2706  }
2707  else
2708  {
2709  MFEM_ABORT("Unsupported dimension!");
2710  }
2711 }
2712 
2713 // Apply to x corresponding to DOFs in H^1 (trial), whose gradients are
2714 // integrated against H(curl) test functions corresponding to y.
2715 void PAHcurlH1Apply3D(const int D1D,
2716  const int Q1D,
2717  const int NE,
2718  const Array<double> &bc,
2719  const Array<double> &gc,
2720  const Array<double> &bot,
2721  const Array<double> &bct,
2722  const Vector &pa_data,
2723  const Vector &x,
2724  Vector &y)
2725 {
2726  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2727  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2728 
2729  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2730  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2731 
2732  constexpr static int VDIM = 3;
2733 
2734  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2735  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2736  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2737  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2738  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2739  auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2740  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2741 
2742  MFEM_FORALL(e, NE,
2743  {
2744  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2745 
2746  for (int qz = 0; qz < Q1D; ++qz)
2747  {
2748  for (int qy = 0; qy < Q1D; ++qy)
2749  {
2750  for (int qx = 0; qx < Q1D; ++qx)
2751  {
2752  for (int c = 0; c < VDIM; ++c)
2753  {
2754  mass[qz][qy][qx][c] = 0.0;
2755  }
2756  }
2757  }
2758  }
2759 
2760  for (int dz = 0; dz < D1D; ++dz)
2761  {
2762  double gradXY[MAX_Q1D][MAX_Q1D][3];
2763  for (int qy = 0; qy < Q1D; ++qy)
2764  {
2765  for (int qx = 0; qx < Q1D; ++qx)
2766  {
2767  gradXY[qy][qx][0] = 0.0;
2768  gradXY[qy][qx][1] = 0.0;
2769  gradXY[qy][qx][2] = 0.0;
2770  }
2771  }
2772  for (int dy = 0; dy < D1D; ++dy)
2773  {
2774  double gradX[MAX_Q1D][2];
2775  for (int qx = 0; qx < Q1D; ++qx)
2776  {
2777  gradX[qx][0] = 0.0;
2778  gradX[qx][1] = 0.0;
2779  }
2780  for (int dx = 0; dx < D1D; ++dx)
2781  {
2782  const double s = X(dx,dy,dz,e);
2783  for (int qx = 0; qx < Q1D; ++qx)
2784  {
2785  gradX[qx][0] += s * Bc(qx,dx);
2786  gradX[qx][1] += s * Gc(qx,dx);
2787  }
2788  }
2789  for (int qy = 0; qy < Q1D; ++qy)
2790  {
2791  const double wy = Bc(qy,dy);
2792  const double wDy = Gc(qy,dy);
2793  for (int qx = 0; qx < Q1D; ++qx)
2794  {
2795  const double wx = gradX[qx][0];
2796  const double wDx = gradX[qx][1];
2797  gradXY[qy][qx][0] += wDx * wy;
2798  gradXY[qy][qx][1] += wx * wDy;
2799  gradXY[qy][qx][2] += wx * wy;
2800  }
2801  }
2802  }
2803  for (int qz = 0; qz < Q1D; ++qz)
2804  {
2805  const double wz = Bc(qz,dz);
2806  const double wDz = Gc(qz,dz);
2807  for (int qy = 0; qy < Q1D; ++qy)
2808  {
2809  for (int qx = 0; qx < Q1D; ++qx)
2810  {
2811  mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2812  mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2813  mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2814  }
2815  }
2816  }
2817  }
2818 
2819  // Apply D operator.
2820  for (int qz = 0; qz < Q1D; ++qz)
2821  {
2822  for (int qy = 0; qy < Q1D; ++qy)
2823  {
2824  for (int qx = 0; qx < Q1D; ++qx)
2825  {
2826  const double O11 = op(qx,qy,qz,0,e);
2827  const double O12 = op(qx,qy,qz,1,e);
2828  const double O13 = op(qx,qy,qz,2,e);
2829  const double O22 = op(qx,qy,qz,3,e);
2830  const double O23 = op(qx,qy,qz,4,e);
2831  const double O33 = op(qx,qy,qz,5,e);
2832  const double massX = mass[qz][qy][qx][0];
2833  const double massY = mass[qz][qy][qx][1];
2834  const double massZ = mass[qz][qy][qx][2];
2835  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2836  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2837  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2838  }
2839  }
2840  }
2841 
2842  for (int qz = 0; qz < Q1D; ++qz)
2843  {
2844  double massXY[MAX_D1D][MAX_D1D];
2845 
2846  int osc = 0;
2847 
2848  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2849  {
2850  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2851  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2852  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2853 
2854  for (int dy = 0; dy < D1Dy; ++dy)
2855  {
2856  for (int dx = 0; dx < D1Dx; ++dx)
2857  {
2858  massXY[dy][dx] = 0.0;
2859  }
2860  }
2861  for (int qy = 0; qy < Q1D; ++qy)
2862  {
2863  double massX[MAX_D1D];
2864  for (int dx = 0; dx < D1Dx; ++dx)
2865  {
2866  massX[dx] = 0;
2867  }
2868  for (int qx = 0; qx < Q1D; ++qx)
2869  {
2870  for (int dx = 0; dx < D1Dx; ++dx)
2871  {
2872  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2873  }
2874  }
2875  for (int dy = 0; dy < D1Dy; ++dy)
2876  {
2877  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2878  for (int dx = 0; dx < D1Dx; ++dx)
2879  {
2880  massXY[dy][dx] += massX[dx] * wy;
2881  }
2882  }
2883  }
2884 
2885  for (int dz = 0; dz < D1Dz; ++dz)
2886  {
2887  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2888  for (int dy = 0; dy < D1Dy; ++dy)
2889  {
2890  for (int dx = 0; dx < D1Dx; ++dx)
2891  {
2892  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2893  }
2894  }
2895  }
2896 
2897  osc += D1Dx * D1Dy * D1Dz;
2898  } // loop c
2899  } // loop qz
2900  }); // end of element loop
2901 }
2902 
2903 // Apply to x corresponding to DOFs in H(curl), integrated
2904 // against gradients of H^1 functions corresponding to y.
2905 void PAHcurlH1ApplyTranspose3D(const int D1D,
2906  const int Q1D,
2907  const int NE,
2908  const Array<double> &bc,
2909  const Array<double> &bo,
2910  const Array<double> &bct,
2911  const Array<double> &gct,
2912  const Vector &pa_data,
2913  const Vector &x,
2914  Vector &y)
2915 {
2916  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2917  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2918 
2919  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2920  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2921 
2922  constexpr static int VDIM = 3;
2923 
2924  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2925  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2926  auto Bt = Reshape(bct.Read(), D1D, Q1D);
2927  auto Gt = Reshape(gct.Read(), D1D, Q1D);
2928  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2929  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2930  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
2931 
2932  MFEM_FORALL(e, NE,
2933  {
2934  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2935 
2936  for (int qz = 0; qz < Q1D; ++qz)
2937  {
2938  for (int qy = 0; qy < Q1D; ++qy)
2939  {
2940  for (int qx = 0; qx < Q1D; ++qx)
2941  {
2942  for (int c = 0; c < VDIM; ++c)
2943  {
2944  mass[qz][qy][qx][c] = 0.0;
2945  }
2946  }
2947  }
2948  }
2949 
2950  int osc = 0;
2951 
2952  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2953  {
2954  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2955  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2956  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2957 
2958  for (int dz = 0; dz < D1Dz; ++dz)
2959  {
2960  double massXY[MAX_Q1D][MAX_Q1D];
2961  for (int qy = 0; qy < Q1D; ++qy)
2962  {
2963  for (int qx = 0; qx < Q1D; ++qx)
2964  {
2965  massXY[qy][qx] = 0.0;
2966  }
2967  }
2968 
2969  for (int dy = 0; dy < D1Dy; ++dy)
2970  {
2971  double massX[MAX_Q1D];
2972  for (int qx = 0; qx < Q1D; ++qx)
2973  {
2974  massX[qx] = 0.0;
2975  }
2976 
2977  for (int dx = 0; dx < D1Dx; ++dx)
2978  {
2979  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
2980  for (int qx = 0; qx < Q1D; ++qx)
2981  {
2982  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2983  }
2984  }
2985 
2986  for (int qy = 0; qy < Q1D; ++qy)
2987  {
2988  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
2989  for (int qx = 0; qx < Q1D; ++qx)
2990  {
2991  const double wx = massX[qx];
2992  massXY[qy][qx] += wx * wy;
2993  }
2994  }
2995  }
2996 
2997  for (int qz = 0; qz < Q1D; ++qz)
2998  {
2999  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
3000  for (int qy = 0; qy < Q1D; ++qy)
3001  {
3002  for (int qx = 0; qx < Q1D; ++qx)
3003  {
3004  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
3005  }
3006  }
3007  }
3008  }
3009 
3010  osc += D1Dx * D1Dy * D1Dz;
3011  } // loop (c) over components
3012 
3013  // Apply D operator.
3014  for (int qz = 0; qz < Q1D; ++qz)
3015  {
3016  for (int qy = 0; qy < Q1D; ++qy)
3017  {
3018  for (int qx = 0; qx < Q1D; ++qx)
3019  {
3020  const double O11 = op(qx,qy,qz,0,e);
3021  const double O12 = op(qx,qy,qz,1,e);
3022  const double O13 = op(qx,qy,qz,2,e);
3023  const double O22 = op(qx,qy,qz,3,e);
3024  const double O23 = op(qx,qy,qz,4,e);
3025  const double O33 = op(qx,qy,qz,5,e);
3026  const double massX = mass[qz][qy][qx][0];
3027  const double massY = mass[qz][qy][qx][1];
3028  const double massZ = mass[qz][qy][qx][2];
3029  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
3030  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
3031  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
3032  }
3033  }
3034  }
3035 
3036  for (int qz = 0; qz < Q1D; ++qz)
3037  {
3038  double gradXY[MAX_D1D][MAX_D1D][3];
3039  for (int dy = 0; dy < D1D; ++dy)
3040  {
3041  for (int dx = 0; dx < D1D; ++dx)
3042  {
3043  gradXY[dy][dx][0] = 0;
3044  gradXY[dy][dx][1] = 0;
3045  gradXY[dy][dx][2] = 0;
3046  }
3047  }
3048  for (int qy = 0; qy < Q1D; ++qy)
3049  {
3050  double gradX[MAX_D1D][3];
3051  for (int dx = 0; dx < D1D; ++dx)
3052  {
3053  gradX[dx][0] = 0;
3054  gradX[dx][1] = 0;
3055  gradX[dx][2] = 0;
3056  }
3057  for (int qx = 0; qx < Q1D; ++qx)
3058  {
3059  const double gX = mass[qz][qy][qx][0];
3060  const double gY = mass[qz][qy][qx][1];
3061  const double gZ = mass[qz][qy][qx][2];
3062  for (int dx = 0; dx < D1D; ++dx)
3063  {
3064  const double wx = Bt(dx,qx);
3065  const double wDx = Gt(dx,qx);
3066  gradX[dx][0] += gX * wDx;
3067  gradX[dx][1] += gY * wx;
3068  gradX[dx][2] += gZ * wx;
3069  }
3070  }
3071  for (int dy = 0; dy < D1D; ++dy)
3072  {
3073  const double wy = Bt(dy,qy);
3074  const double wDy = Gt(dy,qy);
3075  for (int dx = 0; dx < D1D; ++dx)
3076  {
3077  gradXY[dy][dx][0] += gradX[dx][0] * wy;
3078  gradXY[dy][dx][1] += gradX[dx][1] * wDy;
3079  gradXY[dy][dx][2] += gradX[dx][2] * wy;
3080  }
3081  }
3082  }
3083  for (int dz = 0; dz < D1D; ++dz)
3084  {
3085  const double wz = Bt(dz,qz);
3086  const double wDz = Gt(dz,qz);
3087  for (int dy = 0; dy < D1D; ++dy)
3088  {
3089  for (int dx = 0; dx < D1D; ++dx)
3090  {
3091  Y(dx,dy,dz,e) +=
3092  ((gradXY[dy][dx][0] * wz) +
3093  (gradXY[dy][dx][1] * wz) +
3094  (gradXY[dy][dx][2] * wDz));
3095  }
3096  }
3097  }
3098  } // loop qz
3099  }); // end of element loop
3100 }
3101 
3102 // Apply to x corresponding to DOFs in H^1 (trial), whose gradients are
3103 // integrated against H(curl) test functions corresponding to y.
3104 void PAHcurlH1Apply2D(const int D1D,
3105  const int Q1D,
3106  const int NE,
3107  const Array<double> &bc,
3108  const Array<double> &gc,
3109  const Array<double> &bot,
3110  const Array<double> &bct,
3111  const Vector &pa_data,
3112  const Vector &x,
3113  Vector &y)
3114 {
3115  constexpr static int VDIM = 2;
3116  constexpr static int MAX_D1D = HCURL_MAX_D1D;
3117  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3118 
3119  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3120  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3121  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3122  auto Bct = Reshape(bct.Read(), D1D, Q1D);
3123  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3124  auto X = Reshape(x.Read(), D1D, D1D, NE);
3125  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
3126 
3127  MFEM_FORALL(e, NE,
3128  {
3129  double mass[MAX_Q1D][MAX_Q1D][VDIM];
3130 
3131  for (int qy = 0; qy < Q1D; ++qy)
3132  {
3133  for (int qx = 0; qx < Q1D; ++qx)
3134  {
3135  for (int c = 0; c < VDIM; ++c)
3136  {
3137  mass[qy][qx][c] = 0.0;
3138  }
3139  }
3140  }
3141 
3142  for (int dy = 0; dy < D1D; ++dy)
3143  {
3144  double gradX[MAX_Q1D][2];
3145  for (int qx = 0; qx < Q1D; ++qx)
3146  {
3147  gradX[qx][0] = 0.0;
3148  gradX[qx][1] = 0.0;
3149  }
3150  for (int dx = 0; dx < D1D; ++dx)
3151  {
3152  const double s = X(dx,dy,e);
3153  for (int qx = 0; qx < Q1D; ++qx)
3154  {
3155  gradX[qx][0] += s * Bc(qx,dx);
3156  gradX[qx][1] += s * Gc(qx,dx);
3157  }
3158  }
3159  for (int qy = 0; qy < Q1D; ++qy)
3160  {
3161  const double wy = Bc(qy,dy);
3162  const double wDy = Gc(qy,dy);
3163  for (int qx = 0; qx < Q1D; ++qx)
3164  {
3165  const double wx = gradX[qx][0];
3166  const double wDx = gradX[qx][1];
3167  mass[qy][qx][0] += wDx * wy;
3168  mass[qy][qx][1] += wx * wDy;
3169  }
3170  }
3171  }
3172 
3173  // Apply D operator.
3174  for (int qy = 0; qy < Q1D; ++qy)
3175  {
3176  for (int qx = 0; qx < Q1D; ++qx)
3177  {
3178  const double O11 = op(qx,qy,0,e);
3179  const double O12 = op(qx,qy,1,e);
3180  const double O22 = op(qx,qy,2,e);
3181  const double massX = mass[qy][qx][0];
3182  const double massY = mass[qy][qx][1];
3183  mass[qy][qx][0] = (O11*massX)+(O12*massY);
3184  mass[qy][qx][1] = (O12*massX)+(O22*massY);
3185  }
3186  }
3187 
3188  for (int qy = 0; qy < Q1D; ++qy)
3189  {
3190  int osc = 0;
3191 
3192  for (int c = 0; c < VDIM; ++c) // loop over x, y components
3193  {
3194  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3195  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3196 
3197  double massX[MAX_D1D];
3198  for (int dx = 0; dx < D1Dx; ++dx)
3199  {
3200  massX[dx] = 0;
3201  }
3202  for (int qx = 0; qx < Q1D; ++qx)
3203  {
3204  for (int dx = 0; dx < D1Dx; ++dx)
3205  {
3206  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3207  }
3208  }
3209 
3210  for (int dy = 0; dy < D1Dy; ++dy)
3211  {
3212  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3213 
3214  for (int dx = 0; dx < D1Dx; ++dx)
3215  {
3216  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
3217  }
3218  }
3219 
3220  osc += D1Dx * D1Dy;
3221  } // loop c
3222  }
3223  }); // end of element loop
3224 }
3225 
3226 // Apply to x corresponding to DOFs in H(curl), integrated
3227 // against gradients of H^1 functions corresponding to y.
3228 void PAHcurlH1ApplyTranspose2D(const int D1D,
3229  const int Q1D,
3230  const int NE,
3231  const Array<double> &bc,
3232  const Array<double> &bo,
3233  const Array<double> &bct,
3234  const Array<double> &gct,
3235  const Vector &pa_data,
3236  const Vector &x,
3237  Vector &y)
3238 {
3239  constexpr static int VDIM = 2;
3240  constexpr static int MAX_D1D = HCURL_MAX_D1D;
3241  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3242 
3243  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3244  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3245  auto Bt = Reshape(bct.Read(), D1D, Q1D);
3246  auto Gt = Reshape(gct.Read(), D1D, Q1D);
3247  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3248  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
3249  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
3250 
3251  MFEM_FORALL(e, NE,
3252  {
3253  double mass[MAX_Q1D][MAX_Q1D][VDIM];
3254 
3255  for (int qy = 0; qy < Q1D; ++qy)
3256  {
3257  for (int qx = 0; qx < Q1D; ++qx)
3258  {
3259  for (int c = 0; c < VDIM; ++c)
3260  {
3261  mass[qy][qx][c] = 0.0;
3262  }
3263  }
3264  }
3265 
3266  int osc = 0;
3267 
3268  for (int c = 0; c < VDIM; ++c) // loop over x, y components
3269  {
3270  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3271  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3272 
3273  for (int dy = 0; dy < D1Dy; ++dy)
3274  {
3275  double massX[MAX_Q1D];
3276  for (int qx = 0; qx < Q1D; ++qx)
3277  {
3278  massX[qx] = 0.0;
3279  }
3280 
3281  for (int dx = 0; dx < D1Dx; ++dx)
3282  {
3283  const double t = X(dx + (dy * D1Dx) + osc, e);
3284  for (int qx = 0; qx < Q1D; ++qx)
3285  {
3286  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
3287  }
3288  }
3289 
3290  for (int qy = 0; qy < Q1D; ++qy)
3291  {
3292  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
3293  for (int qx = 0; qx < Q1D; ++qx)
3294  {
3295  mass[qy][qx][c] += massX[qx] * wy;
3296  }
3297  }
3298  }
3299 
3300  osc += D1Dx * D1Dy;
3301  } // loop (c) over components
3302 
3303  // Apply D operator.
3304  for (int qy = 0; qy < Q1D; ++qy)
3305  {
3306  for (int qx = 0; qx < Q1D; ++qx)
3307  {
3308  const double O11 = op(qx,qy,0,e);
3309  const double O12 = op(qx,qy,1,e);
3310  const double O22 = op(qx,qy,2,e);
3311  const double massX = mass[qy][qx][0];
3312  const double massY = mass[qy][qx][1];
3313  mass[qy][qx][0] = (O11*massX)+(O12*massY);
3314  mass[qy][qx][1] = (O12*massX)+(O22*massY);
3315  }
3316  }
3317 
3318  for (int qy = 0; qy < Q1D; ++qy)
3319  {
3320  double gradX[MAX_D1D][2];
3321  for (int dx = 0; dx < D1D; ++dx)
3322  {
3323  gradX[dx][0] = 0;
3324  gradX[dx][1] = 0;
3325  }
3326  for (int qx = 0; qx < Q1D; ++qx)
3327  {
3328  const double gX = mass[qy][qx][0];
3329  const double gY = mass[qy][qx][1];
3330  for (int dx = 0; dx < D1D; ++dx)
3331  {
3332  const double wx = Bt(dx,qx);
3333  const double wDx = Gt(dx,qx);
3334  gradX[dx][0] += gX * wDx;
3335  gradX[dx][1] += gY * wx;
3336  }
3337  }
3338  for (int dy = 0; dy < D1D; ++dy)
3339  {
3340  const double wy = Bt(dy,qy);
3341  const double wDy = Gt(dy,qy);
3342  for (int dx = 0; dx < D1D; ++dx)
3343  {
3344  Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
3345  }
3346  }
3347  }
3348  }); // end of element loop
3349 }
3350 
3351 // PA H(curl) Mass Assemble 3D kernel
3352 void PAHcurlL2Setup(const int NQ,
3353  const int coeffDim,
3354  const int NE,
3355  const Array<double> &w,
3356  Vector &coeff,
3357  Vector &op)
3358 {
3359  auto W = w.Read();
3360  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
3361  auto y = Reshape(op.Write(), coeffDim, NQ, NE);
3362 
3363  MFEM_FORALL(e, NE,
3364  {
3365  for (int q = 0; q < NQ; ++q)
3366  {
3367  for (int c=0; c<coeffDim; ++c)
3368  {
3369  y(c,q,e) = W[q] * C(c,q,e);
3370  }
3371  }
3372  });
3373 }
3374 
3375 void MixedScalarCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3376  const FiniteElementSpace &test_fes)
3377 {
3378  // Assumes tensor-product elements
3379  Mesh *mesh = trial_fes.GetMesh();
3380  const FiniteElement *fel = trial_fes.GetFE(0); // In H(curl)
3381  const FiniteElement *eltest = test_fes.GetFE(0); // In scalar space
3382 
3383  const VectorTensorFiniteElement *el =
3384  dynamic_cast<const VectorTensorFiniteElement*>(fel);
3385  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
3386 
3387  if (el->GetDerivType() != mfem::FiniteElement::CURL)
3388  {
3389  MFEM_ABORT("Unknown kernel.");
3390  }
3391 
3392  const IntegrationRule *ir
3393  = IntRule ? IntRule : &MassIntegrator::GetRule(*eltest, *eltest,
3394  *mesh->GetElementTransformation(0));
3395 
3396  const int dims = el->GetDim();
3397  MFEM_VERIFY(dims == 2, "");
3398 
3399  const int nq = ir->GetNPoints();
3400  dim = mesh->Dimension();
3401  MFEM_VERIFY(dim == 2, "");
3402 
3403  ne = test_fes.GetNE();
3404  mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3405  mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3406  dofs1D = mapsC->ndof;
3407  quad1D = mapsC->nqpt;
3408 
3409  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3410 
3411  if (el->GetOrder() == eltest->GetOrder())
3412  {
3413  dofs1Dtest = dofs1D;
3414  }
3415  else
3416  {
3417  dofs1Dtest = dofs1D - 1;
3418  }
3419 
3420  pa_data.SetSize(nq * ne, Device::GetMemoryType());
3421 
3422  QuadratureSpace qs(*mesh, *ir);
3423  CoefficientVector coeff(Q, qs, CoefficientStorage::FULL);
3424 
3425  if (dim == 2)
3426  {
3427  PACurlL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
3428  }
3429  else
3430  {
3431  MFEM_ABORT("Unsupported dimension!");
3432  }
3433 }
3434 
3435 void MixedScalarCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3436 {
3437  if (dim == 2)
3438  {
3439  PACurlL2Apply2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3440  mapsC->Bt, mapsC->G, pa_data, x, y);
3441  }
3442  else
3443  {
3444  MFEM_ABORT("Unsupported dimension!");
3445  }
3446 }
3447 
3448 void MixedScalarCurlIntegrator::AddMultTransposePA(const Vector &x,
3449  Vector &y) const
3450 {
3451  if (dim == 2)
3452  {
3453  PACurlL2ApplyTranspose2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3454  mapsC->B, mapsC->Gt, pa_data, x, y);
3455  }
3456  else
3457  {
3458  MFEM_ABORT("Unsupported dimension!");
3459  }
3460 }
3461 
3462 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3463  const FiniteElementSpace &test_fes)
3464 {
3465  // Assumes tensor-product elements, with vector test and trial spaces.
3466  Mesh *mesh = trial_fes.GetMesh();
3467  const FiniteElement *trial_fel = trial_fes.GetFE(0);
3468  const FiniteElement *test_fel = test_fes.GetFE(0);
3469 
3470  const VectorTensorFiniteElement *trial_el =
3471  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3472  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
3473 
3474  const VectorTensorFiniteElement *test_el =
3475  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
3476  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
3477 
3478  const IntegrationRule *ir
3479  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
3480  *mesh->GetElementTransformation(0));
3481  const int dims = trial_el->GetDim();
3482  MFEM_VERIFY(dims == 3, "");
3483 
3484  const int nq = ir->GetNPoints();
3485  dim = mesh->Dimension();
3486  MFEM_VERIFY(dim == 3, "");
3487 
3488  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
3489 
3490  ne = trial_fes.GetNE();
3491  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
3492  mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3493  mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3494  mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3495  mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3496  dofs1D = mapsC->ndof;
3497  quad1D = mapsC->nqpt;
3498  dofs1Dtest = mapsCtest->ndof;
3499 
3500  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3501 
3502  testType = test_el->GetDerivType();
3503  trialType = trial_el->GetDerivType();
3504 
3505  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
3506  coeffDim = (DQ ? 3 : 1);
3507 
3508  const bool curlSpaces = (testType == mfem::FiniteElement::CURL &&
3509  trialType == mfem::FiniteElement::CURL);
3510 
3511  const int ndata = curlSpaces ? (coeffDim == 1 ? 1 : 9) : symmDims;
3512  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
3513 
3514  QuadratureSpace qs(*mesh, *ir);
3515  CoefficientVector coeff(qs, CoefficientStorage::FULL);
3516  if (Q) { coeff.Project(*Q); }
3517  else if (DQ) { coeff.Project(*DQ); }
3518  else { coeff.SetConstant(1.0); }
3519 
3520  if (testType == mfem::FiniteElement::CURL &&
3521  trialType == mfem::FiniteElement::CURL && dim == 3)
3522  {
3523  if (coeffDim == 1)
3524  {
3525  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
3526  }
3527  else
3528  {
3529  PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(),
3530  geom->J, coeff, pa_data);
3531  }
3532  }
3533  else if (testType == mfem::FiniteElement::DIV &&
3534  trialType == mfem::FiniteElement::CURL && dim == 3 &&
3535  test_fel->GetOrder() == trial_fel->GetOrder())
3536  {
3537  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
3538  pa_data);
3539  }
3540  else
3541  {
3542  MFEM_ABORT("Unknown kernel.");
3543  }
3544 }
3545 
3546 // Apply to x corresponding to DOFs in H(curl) (trial), whose curl is
3547 // integrated against H(curl) test functions corresponding to y.
3548 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3549 static void PAHcurlL2Apply3D(const int D1D,
3550  const int Q1D,
3551  const int coeffDim,
3552  const int NE,
3553  const Array<double> &bo,
3554  const Array<double> &bc,
3555  const Array<double> &bot,
3556  const Array<double> &bct,
3557  const Array<double> &gc,
3558  const Vector &pa_data,
3559  const Vector &x,
3560  Vector &y)
3561 {
3562  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3563  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3564  // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
3565  // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
3566  // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
3567  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3568  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3569  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3570 
3571  constexpr static int VDIM = 3;
3572 
3573  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3574  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3575  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3576  auto Bct = Reshape(bct.Read(), D1D, Q1D);
3577  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3578  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3579  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3580  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3581 
3582  MFEM_FORALL(e, NE,
3583  {
3584  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3585  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3586 
3587  for (int qz = 0; qz < Q1D; ++qz)
3588  {
3589  for (int qy = 0; qy < Q1D; ++qy)
3590  {
3591  for (int qx = 0; qx < Q1D; ++qx)
3592  {
3593  for (int c = 0; c < VDIM; ++c)
3594  {
3595  curl[qz][qy][qx][c] = 0.0;
3596  }
3597  }
3598  }
3599  }
3600 
3601  // We treat x, y, z components separately for optimization specific to each.
3602 
3603  int osc = 0;
3604 
3605  {
3606  // x component
3607  const int D1Dz = D1D;
3608  const int D1Dy = D1D;
3609  const int D1Dx = D1D - 1;
3610 
3611  for (int dz = 0; dz < D1Dz; ++dz)
3612  {
3613  double gradXY[MAX_Q1D][MAX_Q1D][2];
3614  for (int qy = 0; qy < Q1D; ++qy)
3615  {
3616  for (int qx = 0; qx < Q1D; ++qx)
3617  {
3618  for (int d = 0; d < 2; ++d)
3619  {
3620  gradXY[qy][qx][d] = 0.0;
3621  }
3622  }
3623  }
3624 
3625  for (int dy = 0; dy < D1Dy; ++dy)
3626  {
3627  double massX[MAX_Q1D];
3628  for (int qx = 0; qx < Q1D; ++qx)
3629  {
3630  massX[qx] = 0.0;
3631  }
3632 
3633  for (int dx = 0; dx < D1Dx; ++dx)
3634  {
3635  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3636  for (int qx = 0; qx < Q1D; ++qx)
3637  {
3638  massX[qx] += t * Bo(qx,dx);
3639  }
3640  }
3641 
3642  for (int qy = 0; qy < Q1D; ++qy)
3643  {
3644  const double wy = Bc(qy,dy);
3645  const double wDy = Gc(qy,dy);
3646  for (int qx = 0; qx < Q1D; ++qx)
3647  {
3648  const double wx = massX[qx];
3649  gradXY[qy][qx][0] += wx * wDy;
3650  gradXY[qy][qx][1] += wx * wy;
3651  }
3652  }
3653  }
3654 
3655  for (int qz = 0; qz < Q1D; ++qz)
3656  {
3657  const double wz = Bc(qz,dz);
3658  const double wDz = Gc(qz,dz);
3659  for (int qy = 0; qy < Q1D; ++qy)
3660  {
3661  for (int qx = 0; qx < Q1D; ++qx)
3662  {
3663  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3664  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3665  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3666  }
3667  }
3668  }
3669  }
3670 
3671  osc += D1Dx * D1Dy * D1Dz;
3672  }
3673 
3674  {
3675  // y component
3676  const int D1Dz = D1D;
3677  const int D1Dy = D1D - 1;
3678  const int D1Dx = D1D;
3679 
3680  for (int dz = 0; dz < D1Dz; ++dz)
3681  {
3682  double gradXY[MAX_Q1D][MAX_Q1D][2];
3683  for (int qy = 0; qy < Q1D; ++qy)
3684  {
3685  for (int qx = 0; qx < Q1D; ++qx)
3686  {
3687  for (int d = 0; d < 2; ++d)
3688  {
3689  gradXY[qy][qx][d] = 0.0;
3690  }
3691  }
3692  }
3693 
3694  for (int dx = 0; dx < D1Dx; ++dx)
3695  {
3696  double massY[MAX_Q1D];
3697  for (int qy = 0; qy < Q1D; ++qy)
3698  {
3699  massY[qy] = 0.0;
3700  }
3701 
3702  for (int dy = 0; dy < D1Dy; ++dy)
3703  {
3704  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3705  for (int qy = 0; qy < Q1D; ++qy)
3706  {
3707  massY[qy] += t * Bo(qy,dy);
3708  }
3709  }
3710 
3711  for (int qx = 0; qx < Q1D; ++qx)
3712  {
3713  const double wx = Bc(qx,dx);
3714  const double wDx = Gc(qx,dx);
3715  for (int qy = 0; qy < Q1D; ++qy)
3716  {
3717  const double wy = massY[qy];
3718  gradXY[qy][qx][0] += wDx * wy;
3719  gradXY[qy][qx][1] += wx * wy;
3720  }
3721  }
3722  }
3723 
3724  for (int qz = 0; qz < Q1D; ++qz)
3725  {
3726  const double wz = Bc(qz,dz);
3727  const double wDz = Gc(qz,dz);
3728  for (int qy = 0; qy < Q1D; ++qy)
3729  {
3730  for (int qx = 0; qx < Q1D; ++qx)
3731  {
3732  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3733  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3734  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3735  }
3736  }
3737  }
3738  }
3739 
3740  osc += D1Dx * D1Dy * D1Dz;
3741  }
3742 
3743  {
3744  // z component
3745  const int D1Dz = D1D - 1;
3746  const int D1Dy = D1D;
3747  const int D1Dx = D1D;
3748 
3749  for (int dx = 0; dx < D1Dx; ++dx)
3750  {
3751  double gradYZ[MAX_Q1D][MAX_Q1D][2];
3752  for (int qz = 0; qz < Q1D; ++qz)
3753  {
3754  for (int qy = 0; qy < Q1D; ++qy)
3755  {
3756  for (int d = 0; d < 2; ++d)
3757  {
3758  gradYZ[qz][qy][d] = 0.0;
3759  }
3760  }
3761  }
3762 
3763  for (int dy = 0; dy < D1Dy; ++dy)
3764  {
3765  double massZ[MAX_Q1D];
3766  for (int qz = 0; qz < Q1D; ++qz)
3767  {
3768  massZ[qz] = 0.0;
3769  }
3770 
3771  for (int dz = 0; dz < D1Dz; ++dz)
3772  {
3773  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3774  for (int qz = 0; qz < Q1D; ++qz)
3775  {
3776  massZ[qz] += t * Bo(qz,dz);
3777  }
3778  }
3779 
3780  for (int qy = 0; qy < Q1D; ++qy)
3781  {
3782  const double wy = Bc(qy,dy);
3783  const double wDy = Gc(qy,dy);
3784  for (int qz = 0; qz < Q1D; ++qz)
3785  {
3786  const double wz = massZ[qz];
3787  gradYZ[qz][qy][0] += wz * wy;
3788  gradYZ[qz][qy][1] += wz * wDy;
3789  }
3790  }
3791  }
3792 
3793  for (int qx = 0; qx < Q1D; ++qx)
3794  {
3795  const double wx = Bc(qx,dx);
3796  const double wDx = Gc(qx,dx);
3797 
3798  for (int qy = 0; qy < Q1D; ++qy)
3799  {
3800  for (int qz = 0; qz < Q1D; ++qz)
3801  {
3802  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3803  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3804  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3805  }
3806  }
3807  }
3808  }
3809  }
3810 
3811  // Apply D operator.
3812  for (int qz = 0; qz < Q1D; ++qz)
3813  {
3814  for (int qy = 0; qy < Q1D; ++qy)
3815  {
3816  for (int qx = 0; qx < Q1D; ++qx)
3817  {
3818  const double O11 = op(0,qx,qy,qz,e);
3819  if (coeffDim == 1)
3820  {
3821  for (int c = 0; c < VDIM; ++c)
3822  {
3823  curl[qz][qy][qx][c] *= O11;
3824  }
3825  }
3826  else
3827  {
3828  const double O21 = op(1,qx,qy,qz,e);
3829  const double O31 = op(2,qx,qy,qz,e);
3830  const double O12 = op(3,qx,qy,qz,e);
3831  const double O22 = op(4,qx,qy,qz,e);
3832  const double O32 = op(5,qx,qy,qz,e);
3833  const double O13 = op(6,qx,qy,qz,e);
3834  const double O23 = op(7,qx,qy,qz,e);
3835  const double O33 = op(8,qx,qy,qz,e);
3836  const double curlX = curl[qz][qy][qx][0];
3837  const double curlY = curl[qz][qy][qx][1];
3838  const double curlZ = curl[qz][qy][qx][2];
3839  curl[qz][qy][qx][0] = (O11*curlX)+(O12*curlY)+(O13*curlZ);
3840  curl[qz][qy][qx][1] = (O21*curlX)+(O22*curlY)+(O23*curlZ);
3841  curl[qz][qy][qx][2] = (O31*curlX)+(O32*curlY)+(O33*curlZ);
3842  }
3843  }
3844  }
3845  }
3846 
3847  for (int qz = 0; qz < Q1D; ++qz)
3848  {
3849  double massXY[MAX_D1D][MAX_D1D];
3850 
3851  osc = 0;
3852 
3853  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3854  {
3855  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3856  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3857  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3858 
3859  for (int dy = 0; dy < D1Dy; ++dy)
3860  {
3861  for (int dx = 0; dx < D1Dx; ++dx)
3862  {
3863  massXY[dy][dx] = 0;
3864  }
3865  }
3866  for (int qy = 0; qy < Q1D; ++qy)
3867  {
3868  double massX[MAX_D1D];
3869  for (int dx = 0; dx < D1Dx; ++dx)
3870  {
3871  massX[dx] = 0.0;
3872  }
3873  for (int qx = 0; qx < Q1D; ++qx)
3874  {
3875  for (int dx = 0; dx < D1Dx; ++dx)
3876  {
3877  massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3878  }
3879  }
3880 
3881  for (int dy = 0; dy < D1Dy; ++dy)
3882  {
3883  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3884  for (int dx = 0; dx < D1Dx; ++dx)
3885  {
3886  massXY[dy][dx] += massX[dx] * wy;
3887  }
3888  }
3889  }
3890 
3891  for (int dz = 0; dz < D1Dz; ++dz)
3892  {
3893  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
3894  for (int dy = 0; dy < D1Dy; ++dy)
3895  {
3896  for (int dx = 0; dx < D1Dx; ++dx)
3897  {
3898  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
3899  }
3900  }
3901  }
3902 
3903  osc += D1Dx * D1Dy * D1Dz;
3904  } // loop c
3905  } // loop qz
3906  }); // end of element loop
3907 }
3908 
3909 // Apply to x corresponding to DOFs in H(curl) (trial), whose curl is
3910 // integrated against H(curl) test functions corresponding to y.
3911 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3912 static void SmemPAHcurlL2Apply3D(const int D1D,
3913  const int Q1D,
3914  const int coeffDim,
3915  const int NE,
3916  const Array<double> &bo,
3917  const Array<double> &bc,
3918  const Array<double> &gc,
3919  const Vector &pa_data,
3920  const Vector &x,
3921  Vector &y)
3922 {
3923  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3924  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3925 
3926  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3927  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3928  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3929  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3930  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3931  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3932 
3933  auto device_kernel = [=] MFEM_DEVICE (int e)
3934  {
3935  constexpr int VDIM = 3;
3936  constexpr int maxCoeffDim = 9;
3937 
3938  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
3939  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
3940  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
3941 
3942  double opc[maxCoeffDim];
3943  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
3944  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
3945 
3946  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
3947 
3948  MFEM_FOREACH_THREAD(qx,x,Q1D)
3949  {
3950  MFEM_FOREACH_THREAD(qy,y,Q1D)
3951  {
3952  MFEM_FOREACH_THREAD(qz,z,Q1D)
3953  {
3954  for (int i=0; i<coeffDim; ++i)
3955  {
3956  opc[i] = op(i,qx,qy,qz,e);
3957  }
3958  }
3959  }
3960  }
3961 
3962  const int tidx = MFEM_THREAD_ID(x);
3963  const int tidy = MFEM_THREAD_ID(y);
3964  const int tidz = MFEM_THREAD_ID(z);
3965 
3966  if (tidz == 0)
3967  {
3968  MFEM_FOREACH_THREAD(d,y,D1D)
3969  {
3970  MFEM_FOREACH_THREAD(q,x,Q1D)
3971  {
3972  sBc[d][q] = Bc(q,d);
3973  sGc[d][q] = Gc(q,d);
3974  if (d < D1D-1)
3975  {
3976  sBo[d][q] = Bo(q,d);
3977  }
3978  }
3979  }
3980  }
3981  MFEM_SYNC_THREAD;
3982 
3983  for (int qz=0; qz < Q1D; ++qz)
3984  {
3985  if (tidz == qz)
3986  {
3987  MFEM_FOREACH_THREAD(qy,y,Q1D)
3988  {
3989  MFEM_FOREACH_THREAD(qx,x,Q1D)
3990  {
3991  for (int i=0; i<3; ++i)
3992  {
3993  curl[qy][qx][i] = 0.0;
3994  }
3995  }
3996  }
3997  }
3998 
3999  int osc = 0;
4000  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4001  {
4002  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4003  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4004  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4005 
4006  MFEM_FOREACH_THREAD(dz,z,D1Dz)
4007  {
4008  MFEM_FOREACH_THREAD(dy,y,D1Dy)
4009  {
4010  MFEM_FOREACH_THREAD(dx,x,D1Dx)
4011  {
4012  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4013  }
4014  }
4015  }
4016  MFEM_SYNC_THREAD;
4017 
4018  if (tidz == qz)
4019  {
4020  if (c == 0)
4021  {
4022  for (int i=0; i<coeffDim; ++i)
4023  {
4024  sop[i][tidx][tidy] = opc[i];
4025  }
4026  }
4027 
4028  MFEM_FOREACH_THREAD(qy,y,Q1D)
4029  {
4030  MFEM_FOREACH_THREAD(qx,x,Q1D)
4031  {
4032  double u = 0.0;
4033  double v = 0.0;
4034 
4035  // We treat x, y, z components separately for optimization specific to each.
4036  if (c == 0) // x component
4037  {
4038  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4039 
4040  for (int dz = 0; dz < D1Dz; ++dz)
4041  {
4042  const double wz = sBc[dz][qz];
4043  const double wDz = sGc[dz][qz];
4044 
4045  for (int dy = 0; dy < D1Dy; ++dy)
4046  {
4047  const double wy = sBc[dy][qy];
4048  const double wDy = sGc[dy][qy];
4049 
4050  for (int dx = 0; dx < D1Dx; ++dx)
4051  {
4052  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
4053  u += wx * wDy * wz;
4054  v += wx * wy * wDz;
4055  }
4056  }
4057  }
4058 
4059  curl[qy][qx][1] += v; // (u_0)_{x_2}
4060  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
4061  }
4062  else if (c == 1) // y component
4063  {
4064  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4065 
4066  for (int dz = 0; dz < D1Dz; ++dz)
4067  {
4068  const double wz = sBc[dz][qz];
4069  const double wDz = sGc[dz][qz];
4070 
4071  for (int dy = 0; dy < D1Dy; ++dy)
4072  {
4073  const double wy = sBo[dy][qy];
4074 
4075  for (int dx = 0; dx < D1Dx; ++dx)
4076  {
4077  const double t = sX[dz][dy][dx];
4078  const double wx = t * sBc[dx][qx];
4079  const double wDx = t * sGc[dx][qx];
4080 
4081  u += wDx * wy * wz;
4082  v += wx * wy * wDz;
4083  }
4084  }
4085  }
4086 
4087  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
4088  curl[qy][qx][2] += u; // (u_1)_{x_0}
4089  }
4090  else // z component
4091  {
4092  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4093 
4094  for (int dz = 0; dz < D1Dz; ++dz)
4095  {
4096  const double wz = sBo[dz][qz];
4097 
4098  for (int dy = 0; dy < D1Dy; ++dy)
4099  {
4100  const double wy = sBc[dy][qy];
4101  const double wDy = sGc[dy][qy];
4102 
4103  for (int dx = 0; dx < D1Dx; ++dx)
4104  {
4105  const double t = sX[dz][dy][dx];
4106  const double wx = t * sBc[dx][qx];
4107  const double wDx = t * sGc[dx][qx];
4108 
4109  u += wDx * wy * wz;
4110  v += wx * wDy * wz;
4111  }
4112  }
4113  }
4114 
4115  curl[qy][qx][0] += v; // (u_2)_{x_1}
4116  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
4117  }
4118  } // qx
4119  } // qy
4120  } // tidz == qz
4121 
4122  osc += D1Dx * D1Dy * D1Dz;
4123  MFEM_SYNC_THREAD;
4124  } // c
4125 
4126  double dxyz1 = 0.0;
4127  double dxyz2 = 0.0;
4128  double dxyz3 = 0.0;
4129 
4130  MFEM_FOREACH_THREAD(dz,z,D1D)
4131  {
4132  const double wcz = sBc[dz][qz];
4133  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4134 
4135  MFEM_FOREACH_THREAD(dy,y,D1D)
4136  {
4137  MFEM_FOREACH_THREAD(dx,x,D1D)
4138  {
4139  for (int qy = 0; qy < Q1D; ++qy)
4140  {
4141  const double wcy = sBc[dy][qy];
4142  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4143 
4144  for (int qx = 0; qx < Q1D; ++qx)
4145  {
4146  const double O11 = sop[0][qx][qy];
4147  double c1, c2, c3;
4148  if (coeffDim == 1)
4149  {
4150  c1 = O11 * curl[qy][qx][0];
4151  c2 = O11 * curl[qy][qx][1];
4152  c3 = O11 * curl[qy][qx][2];
4153  }
4154  else
4155  {
4156  const double O21 = sop[1][qx][qy];
4157  const double O31 = sop[2][qx][qy];
4158  const double O12 = sop[3][qx][qy];
4159  const double O22 = sop[4][qx][qy];
4160  const double O32 = sop[5][qx][qy];
4161  const double O13 = sop[6][qx][qy];
4162  const double O23 = sop[7][qx][qy];
4163  const double O33 = sop[8][qx][qy];
4164  c1 = (O11*curl[qy][qx][0])+(O12*curl[qy][qx][1])+(O13*curl[qy][qx][2]);
4165  c2 = (O21*curl[qy][qx][0])+(O22*curl[qy][qx][1])+(O23*curl[qy][qx][2]);
4166  c3 = (O31*curl[qy][qx][0])+(O32*curl[qy][qx][1])+(O33*curl[qy][qx][2]);
4167  }
4168 
4169  const double wcx = sBc[dx][qx];
4170 
4171  if (dx < D1D-1)
4172  {
4173  const double wx = sBo[dx][qx];
4174  dxyz1 += c1 * wx * wcy * wcz;
4175  }
4176 
4177  dxyz2 += c2 * wcx * wy * wcz;
4178  dxyz3 += c3 * wcx * wcy * wz;
4179  } // qx
4180  } // qy
4181  } // dx
4182  } // dy
4183  } // dz
4184 
4185  MFEM_SYNC_THREAD;
4186 
4187  MFEM_FOREACH_THREAD(dz,z,D1D)
4188  {
4189  MFEM_FOREACH_THREAD(dy,y,D1D)
4190  {
4191  MFEM_FOREACH_THREAD(dx,x,D1D)
4192  {
4193  if (dx < D1D-1)
4194  {
4195  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4196  }
4197  if (dy < D1D-1)
4198  {
4199  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4200  }
4201  if (dz < D1D-1)
4202  {
4203  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4204  }
4205  }
4206  }
4207  }
4208  } // qz
4209  }; // end of element loop
4210 
4211  auto host_kernel = [&] MFEM_LAMBDA (int)
4212  {
4213  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
4214  };
4215 
4216  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
4217 }
4218 
4219 // Apply to x corresponding to DOFs in H(curl) (trial), whose curl is
4220 // integrated against H(div) test functions corresponding to y.
4221 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4222 static void PAHcurlHdivApply3D(const int D1D,
4223  const int D1Dtest,
4224  const int Q1D,
4225  const int NE,
4226  const Array<double> &bo,
4227  const Array<double> &bc,
4228  const Array<double> &bot,
4229  const Array<double> &bct,
4230  const Array<double> &gc,
4231  const Vector &pa_data,
4232  const Vector &x,
4233  Vector &y)
4234 {
4235  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4236  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4237  // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
4238  // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
4239  // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
4240  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4241  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4242  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4243 
4244  constexpr static int VDIM = 3;
4245 
4246  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4247  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4248  auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
4249  auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
4250  auto Gc = Reshape(gc.Read(), Q1D, D1D);
4251  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
4252  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4253  auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
4254 
4255  MFEM_FORALL(e, NE,
4256  {
4257  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4258  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
4259 
4260  for (int qz = 0; qz < Q1D; ++qz)
4261  {
4262  for (int qy = 0; qy < Q1D; ++qy)
4263  {
4264  for (int qx = 0; qx < Q1D; ++qx)
4265  {
4266  for (int c = 0; c < VDIM; ++c)
4267  {
4268  curl[qz][qy][qx][c] = 0.0;
4269  }
4270  }
4271  }
4272  }
4273 
4274  // We treat x, y, z components separately for optimization specific to each.
4275 
4276  int osc = 0;
4277 
4278  {
4279  // x component
4280  const int D1Dz = D1D;
4281  const int D1Dy = D1D;
4282  const int D1Dx = D1D - 1;
4283 
4284  for (int dz = 0; dz < D1Dz; ++dz)
4285  {
4286  double gradXY[MAX_Q1D][MAX_Q1D][2];
4287  for (int qy = 0; qy < Q1D; ++qy)
4288  {
4289  for (int qx = 0; qx < Q1D; ++qx)
4290  {
4291  for (int d = 0; d < 2; ++d)
4292  {
4293  gradXY[qy][qx][d] = 0.0;
4294  }
4295  }
4296  }
4297 
4298  for (int dy = 0; dy < D1Dy; ++dy)
4299  {
4300  double massX[MAX_Q1D];
4301  for (int qx = 0; qx < Q1D; ++qx)
4302  {
4303  massX[qx] = 0.0;
4304  }
4305 
4306  for (int dx = 0; dx < D1Dx; ++dx)
4307  {
4308  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4309  for (int qx = 0; qx < Q1D; ++qx)
4310  {
4311  massX[qx] += t * Bo(qx,dx);
4312  }
4313  }
4314 
4315  for (int qy = 0; qy < Q1D; ++qy)
4316  {
4317  const double wy = Bc(qy,dy);
4318  const double wDy = Gc(qy,dy);
4319  for (int qx = 0; qx < Q1D; ++qx)
4320  {
4321  const double wx = massX[qx];
4322  gradXY[qy][qx][0] += wx * wDy;
4323  gradXY[qy][qx][1] += wx * wy;
4324  }
4325  }
4326  }
4327 
4328  for (int qz = 0; qz < Q1D; ++qz)
4329  {
4330  const double wz = Bc(qz,dz);
4331  const double wDz = Gc(qz,dz);
4332  for (int qy = 0; qy < Q1D; ++qy)
4333  {
4334  for (int qx = 0; qx < Q1D; ++qx)
4335  {
4336  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4337  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
4338  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
4339  }
4340  }
4341  }
4342  }
4343 
4344  osc += D1Dx * D1Dy * D1Dz;
4345  }
4346 
4347  {
4348  // y component
4349  const int D1Dz = D1D;
4350  const int D1Dy = D1D - 1;
4351  const int D1Dx = D1D;
4352 
4353  for (int dz = 0; dz < D1Dz; ++dz)
4354  {
4355  double gradXY[MAX_Q1D][MAX_Q1D][2];
4356  for (int qy = 0; qy < Q1D; ++qy)
4357  {
4358  for (int qx = 0; qx < Q1D; ++qx)
4359  {
4360  for (int d = 0; d < 2; ++d)
4361  {
4362  gradXY[qy][qx][d] = 0.0;
4363  }
4364  }
4365  }
4366 
4367  for (int dx = 0; dx < D1Dx; ++dx)
4368  {
4369  double massY[MAX_Q1D];
4370  for (int qy = 0; qy < Q1D; ++qy)
4371  {
4372  massY[qy] = 0.0;
4373  }
4374 
4375  for (int dy = 0; dy < D1Dy; ++dy)
4376  {
4377  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4378  for (int qy = 0; qy < Q1D; ++qy)
4379  {
4380  massY[qy] += t * Bo(qy,dy);
4381  }
4382  }
4383 
4384  for (int qx = 0; qx < Q1D; ++qx)
4385  {
4386  const double wx = Bc(qx,dx);
4387  const double wDx = Gc(qx,dx);
4388  for (int qy = 0; qy < Q1D; ++qy)
4389  {
4390  const double wy = massY[qy];
4391  gradXY[qy][qx][0] += wDx * wy;
4392  gradXY[qy][qx][1] += wx * wy;
4393  }
4394  }
4395  }
4396 
4397  for (int qz = 0; qz < Q1D; ++qz)
4398  {
4399  const double wz = Bc(qz,dz);
4400  const double wDz = Gc(qz,dz);
4401  for (int qy = 0; qy < Q1D; ++qy)
4402  {
4403  for (int qx = 0; qx < Q1D; ++qx)
4404  {
4405  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4406  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
4407  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
4408  }
4409  }
4410  }
4411  }
4412 
4413  osc += D1Dx * D1Dy * D1Dz;
4414  }
4415 
4416  {
4417  // z component
4418  const int D1Dz = D1D - 1;
4419  const int D1Dy = D1D;
4420  const int D1Dx = D1D;
4421 
4422  for (int dx = 0; dx < D1Dx; ++dx)
4423  {
4424  double gradYZ[MAX_Q1D][MAX_Q1D][2];
4425  for (int qz = 0; qz < Q1D; ++qz)
4426  {
4427  for (int qy = 0; qy < Q1D; ++qy)
4428  {
4429  for (int d = 0; d < 2; ++d)
4430  {
4431  gradYZ[qz][qy][d] = 0.0;
4432  }
4433  }
4434  }
4435 
4436  for (int dy = 0; dy < D1Dy; ++dy)
4437  {
4438  double massZ[MAX_Q1D];
4439  for (int qz = 0; qz < Q1D; ++qz)
4440  {
4441  massZ[qz] = 0.0;
4442  }
4443 
4444  for (int dz = 0; dz < D1Dz; ++dz)
4445  {
4446  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4447  for (int qz = 0; qz < Q1D; ++qz)
4448  {
4449  massZ[qz] += t * Bo(qz,dz);
4450  }
4451  }
4452 
4453  for (int qy = 0; qy < Q1D; ++qy)
4454  {
4455  const double wy = Bc(qy,dy);
4456  const double wDy = Gc(qy,dy);
4457  for (int qz = 0; qz < Q1D; ++qz)
4458  {
4459  const double wz = massZ[qz];
4460  gradYZ[qz][qy][0] += wz * wy;
4461  gradYZ[qz][qy][1] += wz * wDy;
4462  }
4463  }
4464  }
4465 
4466  for (int qx = 0; qx < Q1D; ++qx)
4467  {
4468  const double wx = Bc(qx,dx);
4469  const double wDx = Gc(qx,dx);
4470 
4471  for (int qy = 0; qy < Q1D; ++qy)
4472  {
4473  for (int qz = 0; qz < Q1D; ++qz)
4474  {
4475  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4476  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
4477  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
4478  }
4479  }
4480  }
4481  }
4482  }
4483 
4484  // Apply D operator.
4485  for (int qz = 0; qz < Q1D; ++qz)
4486  {
4487  for (int qy = 0; qy < Q1D; ++qy)
4488  {
4489  for (int qx = 0; qx < Q1D; ++qx)
4490  {
4491  const double O11 = op(qx,qy,qz,0,e);
4492  const double O12 = op(qx,qy,qz,1,e);
4493  const double O13 = op(qx,qy,qz,2,e);
4494  const double O22 = op(qx,qy,qz,3,e);
4495  const double O23 = op(qx,qy,qz,4,e);
4496  const double O33 = op(qx,qy,qz,5,e);
4497 
4498  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
4499  (O13 * curl[qz][qy][qx][2]);
4500  const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
4501  (O23 * curl[qz][qy][qx][2]);
4502  const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
4503  (O33 * curl[qz][qy][qx][2]);
4504 
4505  curl[qz][qy][qx][0] = c1;
4506  curl[qz][qy][qx][1] = c2;
4507  curl[qz][qy][qx][2] = c3;
4508  }
4509  }
4510  }
4511 
4512  for (int qz = 0; qz < Q1D; ++qz)
4513  {
4514  double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
4515 
4516  osc = 0;
4517 
4518  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4519  {
4520  const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
4521  const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
4522  const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
4523 
4524  for (int dy = 0; dy < D1Dy; ++dy)
4525  {
4526  for (int dx = 0; dx < D1Dx; ++dx)
4527  {
4528  massXY[dy][dx] = 0;
4529  }
4530  }
4531  for (int qy = 0; qy < Q1D; ++qy)
4532  {
4533  double massX[HCURL_MAX_D1D];
4534  for (int dx = 0; dx < D1Dx; ++dx)
4535  {
4536  massX[dx] = 0;
4537  }
4538  for (int qx = 0; qx < Q1D; ++qx)
4539  {
4540  for (int dx = 0; dx < D1Dx; ++dx)
4541  {
4542  massX[dx] += curl[qz][qy][qx][c] *
4543  ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
4544  }
4545  }
4546  for (int dy = 0; dy < D1Dy; ++dy)
4547  {
4548  const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
4549  for (int dx = 0; dx < D1Dx; ++dx)
4550  {
4551  massXY[dy][dx] += massX[dx] * wy;
4552  }
4553  }
4554  }
4555 
4556  for (int dz = 0; dz < D1Dz; ++dz)
4557  {
4558  const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
4559  for (int dy = 0; dy < D1Dy; ++dy)
4560  {
4561  for (int dx = 0; dx < D1Dx; ++dx)
4562  {
4563  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
4564  massXY[dy][dx] * wz;
4565  }
4566  }
4567  }
4568 
4569  osc += D1Dx * D1Dy * D1Dz;
4570  } // loop c
4571  } // loop qz
4572  }); // end of element loop
4573 }
4574 
4575 // Apply to x corresponding to DOFs in H(div) (test), integrated against the
4576 // curl of H(curl) trial functions corresponding to y.
4577 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4578 static void PAHcurlHdivApply3DTranspose(const int D1D,
4579  const int D1Dtest,
4580  const int Q1D,
4581  const int NE,
4582  const Array<double> &bo,
4583  const Array<double> &bc,
4584  const Array<double> &bot,
4585  const Array<double> &bct,
4586  const Array<double> &gct,
4587  const Vector &pa_data,
4588  const Vector &x,
4589  Vector &y)
4590 {
4591  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4592  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4593  // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
4594  // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
4595  // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
4596  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4597  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4598  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4599 
4600  constexpr static int VDIM = 3;
4601 
4602  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4603  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4604  auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
4605  auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
4606  auto Gct = Reshape(gct.Read(), D1D, Q1D);
4607  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
4608  auto X = Reshape(x.Read(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
4609  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4610 
4611  MFEM_FORALL(e, NE,
4612  {
4613  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
4614 
4615  for (int qz = 0; qz < Q1D; ++qz)
4616  {
4617  for (int qy = 0; qy < Q1D; ++qy)
4618  {
4619  for (int qx = 0; qx < Q1D; ++qx)
4620  {
4621  for (int c = 0; c < VDIM; ++c)
4622  {
4623  mass[qz][qy][qx][c] = 0.0;
4624  }
4625  }
4626  }
4627  }
4628 
4629  int osc = 0;
4630 
4631  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4632  {
4633  const int D1Dz = (c == 2) ? D1D : D1D - 1;
4634  const int D1Dy = (c == 1) ? D1D : D1D - 1;
4635  const int D1Dx = (c == 0) ? D1D : D1D - 1;
4636 
4637  for (int dz = 0; dz < D1Dz; ++dz)
4638  {
4639  double massXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
4640  for (int qy = 0; qy < Q1D; ++qy)
4641  {
4642  for (int qx = 0; qx < Q1D; ++qx)
4643  {
4644  massXY[qy][qx] = 0.0;
4645  }
4646  }
4647 
4648  for (int dy = 0; dy < D1Dy; ++dy)
4649  {
4650  double massX[HDIV_MAX_Q1D];
4651  for (int qx = 0; qx < Q1D; ++qx)
4652  {
4653  massX[qx] = 0.0;
4654  }
4655 
4656  for (int dx = 0; dx < D1Dx; ++dx)
4657  {
4658  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4659  for (int qx = 0; qx < Q1D; ++qx)
4660  {
4661  massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
4662  }
4663  }
4664 
4665  for (int qy = 0; qy < Q1D; ++qy)
4666  {
4667  const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
4668  for (int qx = 0; qx < Q1D; ++qx)
4669  {
4670  const double wx = massX[qx];
4671  massXY[qy][qx] += wx * wy;
4672  }
4673  }
4674  }
4675 
4676  for (int qz = 0; qz < Q1D; ++qz)
4677  {
4678  const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
4679  for (int qy = 0; qy < Q1D; ++qy)
4680  {
4681  for (int qx = 0; qx < Q1D; ++qx)
4682  {
4683  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4684  }
4685  }
4686  }
4687  }
4688 
4689  osc += D1Dx * D1Dy * D1Dz;
4690  } // loop (c) over components
4691 
4692  // Apply D operator.
4693  for (int qz = 0; qz < Q1D; ++qz)
4694  {
4695  for (int qy = 0; qy < Q1D; ++qy)
4696  {
4697  for (int qx = 0; qx < Q1D; ++qx)
4698  {
4699  const double O11 = op(qx,qy,qz,0,e);
4700  const double O12 = op(qx,qy,qz,1,e);
4701  const double O13 = op(qx,qy,qz,2,e);
4702  const double O22 = op(qx,qy,qz,3,e);
4703  const double O23 = op(qx,qy,qz,4,e);
4704  const double O33 = op(qx,qy,qz,5,e);
4705  const double massX = mass[qz][qy][qx][0];
4706  const double massY = mass[qz][qy][qx][1];
4707  const double massZ = mass[qz][qy][qx][2];
4708  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
4709  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
4710  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
4711  }
4712  }
4713  }
4714 
4715  // x component
4716  osc = 0;
4717  {
4718  const int D1Dz = D1D;
4719  const int D1Dy = D1D;
4720  const int D1Dx = D1D - 1;
4721 
4722  for (int qz = 0; qz < Q1D; ++qz)
4723  {
4724  double gradXY12[MAX_D1D][MAX_D1D];
4725  double gradXY21[MAX_D1D][MAX_D1D];
4726 
4727  for (int dy = 0; dy < D1Dy; ++dy)
4728  {
4729  for (int dx = 0; dx < D1Dx; ++dx)
4730  {
4731  gradXY12[dy][dx] = 0.0;
4732  gradXY21[dy][dx] = 0.0;
4733  }
4734  }
4735  for (int qy = 0; qy < Q1D; ++qy)
4736  {
4737  double massX[MAX_D1D][2];
4738  for (int dx = 0; dx < D1Dx; ++dx)
4739  {
4740  for (int n = 0; n < 2; ++n)
4741  {
4742  massX[dx][n] = 0.0;
4743  }
4744  }
4745  for (int qx = 0; qx < Q1D; ++qx)
4746  {
4747  for (int dx = 0; dx < D1Dx; ++dx)
4748  {
4749  const double wx = Bot(dx,qx);
4750 
4751  massX[dx][0] += wx * mass[qz][qy][qx][1];
4752  massX[dx][1] += wx * mass[qz][qy][qx][2];
4753  }
4754  }
4755  for (int dy = 0; dy < D1Dy; ++dy)
4756  {
4757  const double wy = Bct(dy,qy);
4758  const double wDy = Gct(dy,qy);
4759 
4760  for (int dx = 0; dx < D1Dx; ++dx)
4761  {
4762  gradXY21[dy][dx] += massX[dx][0] * wy;
4763  gradXY12[dy][dx] += massX[dx][1] * wDy;
4764  }
4765  }
4766  }
4767 
4768  for (int dz = 0; dz < D1Dz; ++dz)
4769  {
4770  const double wz = Bct(dz,qz);
4771  const double wDz = Gct(dz,qz);
4772  for (int dy = 0; dy < D1Dy; ++dy)
4773  {
4774  for (int dx = 0; dx < D1Dx; ++dx)
4775  {
4776  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4777  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
4778  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4779  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
4780  }
4781  }
4782  }
4783  } // loop qz
4784 
4785  osc += D1Dx * D1Dy * D1Dz;
4786  }
4787 
4788  // y component
4789  {
4790  const int D1Dz = D1D;
4791  const int D1Dy = D1D - 1;
4792  const int D1Dx = D1D;
4793 
4794  for (int qz = 0; qz < Q1D; ++qz)
4795  {
4796  double gradXY02[MAX_D1D][MAX_D1D];
4797  double gradXY20[MAX_D1D][MAX_D1D];
4798 
4799  for (int dy = 0; dy < D1Dy; ++dy)
4800  {
4801  for (int dx = 0; dx < D1Dx; ++dx)
4802  {
4803  gradXY02[dy][dx] = 0.0;
4804  gradXY20[dy][dx] = 0.0;
4805  }
4806  }
4807  for (int qx = 0; qx < Q1D; ++qx)
4808  {
4809  double massY[MAX_D1D][2];
4810  for (int dy = 0; dy < D1Dy; ++dy)
4811  {
4812  massY[dy][0] = 0.0;
4813  massY[dy][1] = 0.0;
4814  }
4815  for (int qy = 0; qy < Q1D; ++qy)
4816  {
4817  for (int dy = 0; dy < D1Dy; ++dy)
4818  {
4819  const double wy = Bot(dy,qy);
4820 
4821  massY[dy][0] += wy * mass[qz][qy][qx][2];
4822  massY[dy][1] += wy * mass[qz][qy][qx][0];
4823  }
4824  }
4825  for (int dx = 0; dx < D1Dx; ++dx)
4826  {
4827  const double wx = Bct(dx,qx);
4828  const double wDx = Gct(dx,qx);
4829 
4830  for (int dy = 0; dy < D1Dy; ++dy)
4831  {
4832  gradXY02[dy][dx] += massY[dy][0] * wDx;
4833  gradXY20[dy][dx] += massY[dy][1] * wx;
4834  }
4835  }
4836  }
4837 
4838  for (int dz = 0; dz < D1Dz; ++dz)
4839  {
4840  const double wz = Bct(dz,qz);
4841  const double wDz = Gct(dz,qz);
4842  for (int dy = 0; dy < D1Dy; ++dy)
4843  {
4844  for (int dx = 0; dx < D1Dx; ++dx)
4845  {
4846  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4847  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
4848  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4849  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
4850  }
4851  }
4852  }
4853  } // loop qz
4854 
4855  osc += D1Dx * D1Dy * D1Dz;
4856  }
4857 
4858  // z component
4859  {
4860  const int D1Dz = D1D - 1;
4861  const int D1Dy = D1D;
4862  const int D1Dx = D1D;
4863 
4864  for (int qx = 0; qx < Q1D; ++qx)
4865  {
4866  double gradYZ01[MAX_D1D][MAX_D1D];
4867  double gradYZ10[MAX_D1D][MAX_D1D];
4868 
4869  for (int dy = 0; dy < D1Dy; ++dy)
4870  {
4871  for (int dz = 0; dz < D1Dz; ++dz)
4872  {
4873  gradYZ01[dz][dy] = 0.0;
4874  gradYZ10[dz][dy] = 0.0;
4875  }
4876  }
4877  for (int qy = 0; qy < Q1D; ++qy)
4878  {
4879  double massZ[MAX_D1D][2];
4880  for (int dz = 0; dz < D1Dz; ++dz)
4881  {
4882  for (int n = 0; n < 2; ++n)
4883  {
4884  massZ[dz][n] = 0.0;
4885  }
4886  }
4887  for (int qz = 0; qz < Q1D; ++qz)
4888  {
4889  for (int dz = 0; dz < D1Dz; ++dz)
4890  {
4891  const double wz = Bot(dz,qz);
4892 
4893  massZ[dz][0] += wz * mass[qz][qy][qx][0];
4894  massZ[dz][1] += wz * mass[qz][qy][qx][1];
4895  }
4896  }
4897  for (int dy = 0; dy < D1Dy; ++dy)
4898  {
4899  const double wy = Bct(dy,qy);
4900  const double wDy = Gct(dy,qy);
4901 
4902  for (int dz = 0; dz < D1Dz; ++dz)
4903  {
4904  gradYZ01[dz][dy] += wy * massZ[dz][1];
4905  gradYZ10[dz][dy] += wDy * massZ[dz][0];
4906  }
4907  }
4908  }
4909 
4910  for (int dx = 0; dx < D1Dx; ++dx)
4911  {
4912  const double wx = Bct(dx,qx);
4913  const double wDx = Gct(dx,qx);
4914 
4915  for (int dy = 0; dy < D1Dy; ++dy)
4916  {
4917  for (int dz = 0; dz < D1Dz; ++dz)
4918  {
4919  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4920  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
4921  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4922  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
4923  }
4924  }
4925  }
4926  } // loop qx
4927  }
4928  }); // end of element loop
4929 }
4930 
4931 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4932 {
4933  if (testType == mfem::FiniteElement::CURL &&
4934  trialType == mfem::FiniteElement::CURL && dim == 3)
4935  {
4936  const int ndata = coeffDim == 1 ? 1 : 9;
4937 
4938  if (Device::Allows(Backend::DEVICE_MASK))
4939  {
4940  const int ID = (dofs1D << 4) | quad1D;
4941  switch (ID)
4942  {
4943  case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, ndata, ne,
4944  mapsO->B, mapsC->B,
4945  mapsC->G, pa_data, x, y);
4946  case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, ndata, ne,
4947  mapsO->B, mapsC->B,
4948  mapsC->G, pa_data, x, y);
4949  case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, ndata, ne,
4950  mapsO->B, mapsC->B,
4951  mapsC->G, pa_data, x, y);
4952  case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, ndata, ne,
4953  mapsO->B, mapsC->B,
4954  mapsC->G, pa_data, x, y);
4955  default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne,
4956  mapsO->B, mapsC->B, mapsC->G,
4957  pa_data, x, y);
4958  }
4959  }
4960  else
4961  PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B,
4962  mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
4963  }
4964  else if (testType == mfem::FiniteElement::DIV &&
4965  trialType == mfem::FiniteElement::CURL && dim == 3)
4966  PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
4967  mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
4968  pa_data, x, y);
4969  else
4970  {
4971  MFEM_ABORT("Unsupported dimension or space!");
4972  }
4973 }
4974 
4975 void MixedVectorCurlIntegrator::AddMultTransposePA(const Vector &x,
4976  Vector &y) const
4977 {
4978  if (testType == mfem::FiniteElement::DIV &&
4979  trialType == mfem::FiniteElement::CURL && dim == 3)
4980  PAHcurlHdivApply3DTranspose(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
4981  mapsC->B, mapsOtest->Bt, mapsCtest->Bt,
4982  mapsC->Gt, pa_data, x, y);
4983  else
4984  {
4985  MFEM_ABORT("Unsupported dimension or space!");
4986  }
4987 }
4988 
4989 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
4990  &trial_fes,
4991  const FiniteElementSpace &test_fes)
4992 {
4993  // Assumes tensor-product elements, with vector test and trial spaces.
4994  Mesh *mesh = trial_fes.GetMesh();
4995  const FiniteElement *trial_fel = trial_fes.GetFE(0);
4996  const FiniteElement *test_fel = test_fes.GetFE(0);
4997 
4998  const VectorTensorFiniteElement *trial_el =
4999  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
5000  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
5001 
5002  const VectorTensorFiniteElement *test_el =
5003  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
5004  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
5005 
5006  const IntegrationRule *ir
5007  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
5008  *mesh->GetElementTransformation(0));
5009  const int dims = trial_el->GetDim();
5010  MFEM_VERIFY(dims == 3, "");
5011 
5012  const int nq = ir->GetNPoints();
5013  dim = mesh->Dimension();
5014  MFEM_VERIFY(dim == 3, "");
5015 
5016  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
5017 
5018  ne = trial_fes.GetNE();
5019  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
5020  mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
5021  mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
5022  dofs1D = mapsC->ndof;
5023  quad1D = mapsC->nqpt;
5024 
5025  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
5026 
5027  testType = test_el->GetDerivType();
5028  trialType = trial_el->GetDerivType();
5029 
5030  const bool curlSpaces = (testType == mfem::FiniteElement::CURL &&
5031  trialType == mfem::FiniteElement::CURL);
5032 
5033  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
5034 
5035  coeffDim = DQ ? 3 : 1;
5036  const int ndata = curlSpaces ? (DQ ? 9 : 1) : symmDims;
5037 
5038  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
5039 
5040  QuadratureSpace qs(*mesh, *ir);
5041  CoefficientVector coeff(qs, CoefficientStorage::FULL);
5042  if (Q) { coeff.Project(*Q); }
5043  else if (DQ) { coeff.Project(*DQ); }
5044  else { coeff.SetConstant(1.0); }
5045 
5046  if (trialType == mfem::FiniteElement::CURL && dim == 3)
5047  {
5048  if (coeffDim == 1)
5049  {
5050  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
5051  }
5052  else
5053  {
5054  PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(),
5055  geom->J, coeff, pa_data);
5056  }
5057  }
5058  else if (trialType == mfem::FiniteElement::DIV && dim == 3 &&
5059  test_el->GetOrder() == trial_el->GetOrder())
5060  {
5061  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
5062  pa_data);
5063  }
5064  else
5065  {
5066  MFEM_ABORT("Unknown kernel.");
5067  }
5068 }
5069 
5070 // Apply to x corresponding to DOFs in H(curl) (trial), integrated against curl
5071 // of H(curl) test functions corresponding to y.
5072 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
5073 static void PAHcurlL2Apply3DTranspose(const int D1D,
5074  const int Q1D,
5075  const int coeffDim,
5076  const int NE,
5077  const Array<double> &bo,
5078  const Array<double> &bc,
5079  const Array<double> &bot,
5080  const Array<double> &bct,
5081  const Array<double> &gct,
5082  const Vector &pa_data,
5083  const Vector &x,
5084  Vector &y)
5085 {
5086  // See PAHcurlL2Apply3D for comments.
5087 
5088  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
5089  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
5090 
5091  constexpr static int VDIM = 3;
5092 
5093  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
5094  auto Bc = Reshape(bc.Read(), Q1D, D1D);
5095  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
5096  auto Bct = Reshape(bct.Read(), D1D, Q1D);
5097  auto Gct = Reshape(gct.Read(), D1D, Q1D);
5098  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
5099  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
5100  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
5101 
5102  MFEM_FORALL(e, NE,
5103  {
5104  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
5105 
5106  for (int qz = 0; qz < Q1D; ++qz)
5107  {
5108  for (int qy = 0; qy < Q1D; ++qy)
5109  {
5110  for (int qx = 0; qx < Q1D; ++qx)
5111  {
5112  for (int c = 0; c < VDIM; ++c)
5113  {
5114  mass[qz][qy][qx][c] = 0.0;
5115  }
5116  }
5117  }
5118  }
5119 
5120  int osc = 0;
5121 
5122  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
5123  {
5124  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
5125  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
5126  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
5127 
5128  for (int dz = 0; dz < D1Dz; ++dz)
5129  {
5130  double massXY[MAX_Q1D][MAX_Q1D];
5131  for (int qy = 0; qy < Q1D; ++qy)
5132  {
5133  for (int qx = 0; qx < Q1D; ++qx)
5134  {
5135  massXY[qy][qx] = 0.0;
5136  }
5137  }
5138 
5139  for (int dy = 0; dy < D1Dy; ++dy)
5140  {
5141  double massX[MAX_Q1D];
5142  for (int qx = 0; qx < Q1D; ++qx)
5143  {
5144  massX[qx] = 0.0;
5145  }
5146 
5147  for (int dx = 0; dx < D1Dx; ++dx)
5148  {
5149  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
5150  for (int qx = 0; qx < Q1D; ++qx)
5151  {
5152  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
5153  }
5154  }
5155 
5156  for (int qy = 0; qy < Q1D; ++qy)
5157  {
5158  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
5159  for (int qx = 0; qx < Q1D; ++qx)
5160  {
5161  const double wx = massX[qx];
5162  massXY[qy][qx] += wx * wy;
5163  }
5164  }
5165  }
5166 
5167  for (int qz = 0; qz < Q1D; ++qz)
5168  {
5169  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
5170  for (int qy = 0; qy < Q1D; ++qy)
5171  {
5172  for (int qx = 0; qx < Q1D; ++qx)
5173  {
5174  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
5175  }
5176  }
5177  }
5178  }
5179 
5180  osc += D1Dx * D1Dy * D1Dz;
5181  } // loop (c) over components
5182 
5183  // Apply D operator.
5184  for (int qz = 0; qz < Q1D; ++qz)
5185  {
5186  for (int qy = 0; qy < Q1D; ++qy)
5187  {
5188  for (int qx = 0; qx < Q1D; ++qx)
5189  {
5190  const double O11 = op(0,qx,qy,qz,e);
5191  if (coeffDim == 1)
5192  {
5193  for (int c = 0; c < VDIM; ++c)
5194  {
5195  mass[qz][qy][qx][c] *= O11;
5196  }
5197  }
5198  else
5199  {
5200  const double O12 = op(1,qx,qy,qz,e);
5201  const double O13 = op(2,qx,qy,qz,e);
5202  const double O21 = op(3,qx,qy,qz,e);
5203  const double O22 = op(4,qx,qy,qz,e);
5204  const double O23 = op(5,qx,qy,qz,e);
5205  const double O31 = op(6,qx,qy,qz,e);
5206  const double O32 = op(7,qx,qy,qz,e);
5207  const double O33 = op(8,qx,qy,qz,e);
5208  const double massX = mass[qz][qy][qx][0];
5209  const double massY = mass[qz][qy][qx][1];
5210  const double massZ = mass[qz][qy][qx][2];
5211  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
5212  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
5213  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
5214  }
5215  }
5216  }
5217  }
5218 
5219  // x component
5220  osc = 0;
5221  {
5222  const int D1Dz = D1D;
5223  const int D1Dy = D1D;
5224  const int D1Dx = D1D - 1;
5225 
5226  for (int qz = 0; qz < Q1D; ++qz)
5227  {
5228  double gradXY12[MAX_D1D][MAX_D1D];
5229  double gradXY21[MAX_D1D][MAX_D1D];
5230 
5231  for (int dy = 0; dy < D1Dy; ++dy)
5232  {
5233  for (int dx = 0; dx < D1Dx; ++dx)
5234  {
5235  gradXY12[dy][dx] = 0.0;
5236  gradXY21[dy][dx] = 0.0;
5237  }
5238  }
5239  for (int qy = 0; qy < Q1D; ++qy)
5240  {
5241  double massX[MAX_D1D][2];
5242  for (int dx = 0; dx < D1Dx; ++dx)
5243  {
5244  for (int n = 0; n < 2; ++n)
5245  {
5246  massX[dx][n] = 0.0;
5247  }
5248  }
5249  for (int qx = 0; qx < Q1D; ++qx)
5250  {
5251  for (int dx = 0; dx < D1Dx; ++dx)
5252  {
5253  const double wx = Bot(dx,qx);
5254 
5255  massX[dx][0] += wx * mass[qz][qy][qx][1];
5256  massX[dx][1] += wx * mass[qz][qy][qx][2];
5257  }
5258  }
5259  for (int dy = 0; dy < D1Dy; ++dy)
5260  {
5261  const double wy = Bct(dy,qy);
5262  const double wDy = Gct(dy,qy);
5263 
5264  for (int dx = 0; dx < D1Dx; ++dx)
5265  {
5266  gradXY21[dy][dx] += massX[dx][0] * wy;
5267  gradXY12[dy][dx] += massX[dx][1] * wDy;
5268  }
5269  }
5270  }
5271 
5272  for (int dz = 0; dz < D1Dz; ++dz)
5273  {
5274  const double wz = Bct(dz,qz);
5275  const double wDz = Gct(dz,qz);
5276  for (int dy = 0; dy < D1Dy; ++dy)
5277  {
5278  for (int dx = 0; dx < D1Dx; ++dx)
5279  {
5280  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
5281  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
5282  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5283  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
5284  }
5285  }
5286  }
5287  } // loop qz
5288 
5289  osc += D1Dx * D1Dy * D1Dz;
5290  }
5291 
5292  // y component
5293  {
5294  const int D1Dz = D1D;
5295  const int D1Dy = D1D - 1;
5296  const int D1Dx = D1D;
5297 
5298  for (int qz = 0; qz < Q1D; ++qz)
5299  {
5300  double gradXY02[MAX_D1D][MAX_D1D];
5301  double gradXY20[MAX_D1D][MAX_D1D];
5302 
5303  for (int dy = 0; dy < D1Dy; ++dy)
5304  {
5305  for (int dx = 0; dx < D1Dx; ++dx)
5306  {
5307  gradXY02[dy][dx] = 0.0;
5308  gradXY20[dy][dx] = 0.0;
5309  }
5310  }
5311  for (int qx = 0; qx < Q1D; ++qx)
5312  {
5313  double massY[MAX_D1D][2];
5314  for (int dy = 0; dy < D1Dy; ++dy)
5315  {
5316  massY[dy][0] = 0.0;
5317  massY[dy][1] = 0.0;
5318  }
5319  for (int qy = 0; qy < Q1D; ++qy)
5320  {
5321  for (int dy = 0; dy < D1Dy; ++dy)
5322  {
5323  const double wy = Bot(dy,qy);
5324 
5325  massY[dy][0] += wy * mass[qz][qy][qx][2];
5326  massY[dy][1] += wy * mass[qz][qy][qx][0];
5327  }
5328  }
5329  for (int dx = 0; dx < D1Dx; ++dx)
5330  {
5331  const double wx = Bct(dx,qx);
5332  const double wDx = Gct(dx,qx);
5333 
5334  for (int dy = 0; dy < D1Dy; ++dy)
5335  {
5336  gradXY02[dy][dx] += massY[dy][0] * wDx;
5337  gradXY20[dy][dx] += massY[dy][1] * wx;
5338  }
5339  }
5340  }
5341 
5342  for (int dz = 0; dz < D1Dz; ++dz)
5343  {
5344  const double wz = Bct(dz,qz);
5345  const double wDz = Gct(dz,qz);
5346  for (int dy = 0; dy < D1Dy; ++dy)
5347  {
5348  for (int dx = 0; dx < D1Dx; ++dx)
5349  {
5350  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
5351  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
5352  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5353  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
5354  }
5355  }
5356  }
5357  } // loop qz
5358 
5359  osc += D1Dx * D1Dy * D1Dz;
5360  }
5361 
5362  // z component
5363  {
5364  const int D1Dz = D1D - 1;
5365  const int D1Dy = D1D;
5366  const int D1Dx = D1D;
5367 
5368  for (int qx = 0; qx < Q1D; ++qx)
5369  {
5370  double gradYZ01[MAX_D1D][MAX_D1D];
5371  double gradYZ10[MAX_D1D][MAX_D1D];
5372 
5373  for (int dy = 0; dy < D1Dy; ++dy)
5374  {
5375  for (int dz = 0; dz < D1Dz; ++dz)
5376  {
5377  gradYZ01[dz][dy] = 0.0;
5378  gradYZ10[dz][dy] = 0.0;
5379  }
5380  }
5381  for (int qy = 0; qy < Q1D; ++qy)
5382  {
5383  double massZ[MAX_D1D][2];
5384  for (int dz = 0; dz < D1Dz; ++dz)
5385  {
5386  for (int n = 0; n < 2; ++n)
5387  {
5388  massZ[dz][n] = 0.0;
5389  }
5390  }
5391  for (int qz = 0; qz < Q1D; ++qz)
5392  {
5393  for (int dz = 0; dz < D1Dz; ++dz)
5394  {
5395  const double wz = Bot(dz,qz);
5396 
5397  massZ[dz][0] += wz * mass[qz][qy][qx][0];
5398  massZ[dz][1] += wz * mass[qz][qy][qx][1];
5399  }
5400  }
5401  for (int dy = 0; dy < D1Dy; ++dy)
5402  {
5403  const double wy = Bct(dy,qy);
5404  const double wDy = Gct(dy,qy);
5405 
5406  for (int dz = 0; dz < D1Dz; ++dz)
5407  {
5408  gradYZ01[dz][dy] += wy * massZ[dz][1];
5409  gradYZ10[dz][dy] += wDy * massZ[dz][0];
5410  }
5411  }
5412  }
5413 
5414  for (int dx = 0; dx < D1Dx; ++dx)
5415  {
5416  const double wx = Bct(dx,qx);
5417  const double wDx = Gct(dx,qx);
5418 
5419  for (int dy = 0; dy < D1Dy; ++dy)
5420  {
5421  for (int dz = 0; dz < D1Dz; ++dz)
5422  {
5423  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
5424  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
5425  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5426  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
5427  }
5428  }
5429  }
5430  } // loop qx
5431  }
5432  });
5433 }
5434 
5435 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
5436 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
5437  const int Q1D,
5438  const int coeffDim,
5439  const int NE,
5440  const Array<double> &bo,
5441  const Array<double> &bc,
5442  const Array<double> &gc,
5443  const Vector &pa_data,
5444  const Vector &x,
5445  Vector &y)
5446 {
5447  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
5448  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
5449 
5450  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
5451  auto Bc = Reshape(bc.Read(), Q1D, D1D);
5452  auto Gc = Reshape(gc.Read(), Q1D, D1D);
5453  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
5454  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
5455  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
5456 
5457  auto device_kernel = [=] MFEM_DEVICE (int e)
5458  {
5459  constexpr int VDIM = 3;
5460  constexpr int maxCoeffDim = 9;
5461 
5462  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
5463  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
5464  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
5465 
5466  double opc[maxCoeffDim];
5467  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
5468  MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
5469 
5470  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
5471 
5472  MFEM_FOREACH_THREAD(qx,x,Q1D)
5473  {
5474  MFEM_FOREACH_THREAD(qy,y,Q1D)
5475  {
5476  MFEM_FOREACH_THREAD(qz,z,Q1D)
5477  {
5478  for (int i=0; i<coeffDim; ++i)
5479  {
5480  opc[i] = op(i,qx,qy,qz,e);
5481  }
5482  }
5483  }
5484  }
5485 
5486  const int tidx = MFEM_THREAD_ID(x);
5487  const int tidy = MFEM_THREAD_ID(y);
5488  const int tidz = MFEM_THREAD_ID(z);
5489 
5490  if (tidz == 0)
5491  {
5492  MFEM_FOREACH_THREAD(d,y,D1D)
5493  {
5494  MFEM_FOREACH_THREAD(q,x,Q1D)
5495  {
5496  sBc[d][q] = Bc(q,d);
5497  sGc[d][q] = Gc(q,d);
5498  if (d < D1D-1)
5499  {
5500  sBo[d][q] = Bo(q,d);
5501  }
5502  }
5503  }
5504  }
5505  MFEM_SYNC_THREAD;
5506 
5507  for (int qz=0; qz < Q1D; ++qz)
5508  {
5509  if (tidz == qz)
5510  {
5511  MFEM_FOREACH_THREAD(qy,y,Q1D)
5512  {
5513  MFEM_FOREACH_THREAD(qx,x,Q1D)
5514  {
5515  for (int i=0; i<3; ++i)
5516  {
5517  mass[qy][qx][i] = 0.0;
5518  }
5519  }
5520  }
5521  }
5522 
5523  int osc = 0;
5524  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
5525  {
5526  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
5527  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
5528  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
5529 
5530  MFEM_FOREACH_THREAD(dz,z,D1Dz)
5531  {
5532  MFEM_FOREACH_THREAD(dy,y,D1Dy)
5533  {
5534  MFEM_FOREACH_THREAD(dx,x,D1Dx)
5535  {
5536  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
5537  }
5538  }
5539  }
5540  MFEM_SYNC_THREAD;
5541 
5542  if (tidz == qz)
5543  {
5544  if (c == 0)
5545  {
5546  for (int i=0; i<coeffDim; ++i)
5547  {
5548  sop[i][tidx][tidy] = opc[i];
5549  }
5550  }
5551 
5552  MFEM_FOREACH_THREAD(qy,y,Q1D)
5553  {
5554  MFEM_FOREACH_THREAD(qx,x,Q1D)
5555  {
5556  double u = 0.0;
5557 
5558  for (int dz = 0; dz < D1Dz; ++dz)
5559  {
5560  const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
5561 
5562  for (int dy = 0; dy < D1Dy; ++dy)
5563  {
5564  const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
5565 
5566  for (int dx = 0; dx < D1Dx; ++dx)
5567  {
5568  const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
5569  u += wx * wy * wz;
5570  }
5571  }
5572  }
5573 
5574  mass[qy][qx][c] += u;
5575  } // qx
5576  } // qy
5577  } // tidz == qz
5578 
5579  osc += D1Dx * D1Dy * D1Dz;
5580  MFEM_SYNC_THREAD;
5581  } // c
5582 
5583  double dxyz1 = 0.0;
5584  double dxyz2 = 0.0;
5585  double dxyz3 = 0.0;
5586 
5587  MFEM_FOREACH_THREAD(dz,z,D1D)
5588  {
5589  const double wcz = sBc[dz][qz];
5590  const double wcDz = sGc[dz][qz];
5591  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
5592 
5593  MFEM_FOREACH_THREAD(dy,y,D1D)
5594  {
5595  MFEM_FOREACH_THREAD(dx,x,D1D)
5596  {
5597  for (int qy = 0; qy < Q1D; ++qy)
5598  {
5599  const double wcy = sBc[dy][qy];
5600  const double wcDy = sGc[dy][qy];
5601  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
5602 
5603  for (int qx = 0; qx < Q1D; ++qx)
5604  {
5605  const double O11 = sop[0][qx][qy];
5606  double c1, c2, c3;
5607  if (coeffDim == 1)
5608  {
5609  c1 = O11 * mass[qy][qx][0];
5610  c2 = O11 * mass[qy][qx][1];
5611  c3 = O11 * mass[qy][qx][2];
5612  }
5613  else
5614  {
5615  const double O12 = sop[1][qx][qy];
5616  const double O13 = sop[2][qx][qy];
5617  const double O21 = sop[3][qx][qy];
5618  const double O22 = sop[4][qx][qy];
5619  const double O23 = sop[5][qx][qy];
5620  const double O31 = sop[6][qx][qy];
5621  const double O32 = sop[7][qx][qy];
5622  const double O33 = sop[8][qx][qy];
5623 
5624  c1 = (O11*mass[qy][qx][0])+(O12*mass[qy][qx][1])+(O13*mass[qy][qx][2]);
5625  c2 = (O21*mass[qy][qx][0])+(O22*mass[qy][qx][1])+(O23*mass[qy][qx][2]);
5626  c3 = (O31*mass[qy][qx][0])+(O32*mass[qy][qx][1])+(O33*mass[qy][qx][2]);
5627  }
5628 
5629  const double wcx = sBc[dx][qx];
5630  const double wDx = sGc[dx][qx];
5631 
5632  if (dx < D1D-1)
5633  {
5634  const double wx = sBo[dx][qx];
5635  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
5636  }
5637 
5638  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
5639 
5640  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
5641  } // qx
5642  } // qy
5643  } // dx
5644  } // dy
5645  } // dz
5646 
5647  MFEM_SYNC_THREAD;
5648 
5649  MFEM_FOREACH_THREAD(dz,z,D1D)
5650  {
5651  MFEM_FOREACH_THREAD(dy,y,D1D)
5652  {
5653  MFEM_FOREACH_THREAD(dx,x,D1D)
5654  {
5655  if (dx < D1D-1)
5656  {
5657  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
5658  }
5659  if (dy < D1D-1)
5660  {
5661  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
5662  }
5663  if (dz < D1D-1)
5664  {
5665  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
5666  }
5667  }
5668  }
5669  }
5670  } // qz
5671  }; // end of element loop
5672 
5673  auto host_kernel = [&] MFEM_LAMBDA (int)
5674  {
5675  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
5676  };
5677 
5678  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
5679 }
5680 
5681 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
5682 {
5683  if (testType == mfem::FiniteElement::CURL &&
5684  trialType == mfem::FiniteElement::CURL && dim == 3)
5685  {
5686  const int ndata = coeffDim == 1 ? 1 : 9;
5687  if (Device::Allows(Backend::DEVICE_MASK))
5688  {
5689  const int ID = (dofs1D << 4) | quad1D;
5690  switch (ID)
5691  {
5692  case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, ndata,
5693  ne, mapsO->B, mapsC->B,
5694  mapsC->G, pa_data, x, y);
5695  case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, ndata,
5696  ne, mapsO->B, mapsC->B,
5697  mapsC->G, pa_data, x, y);
5698  case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, ndata,
5699  ne, mapsO->B, mapsC->B,
5700  mapsC->G, pa_data, x, y);
5701  case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, ndata,
5702  ne, mapsO->B, mapsC->B,
5703  mapsC->G, pa_data, x, y);
5704  default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne,
5705  mapsO->B, mapsC->B,
5706  mapsC->G, pa_data, x, y);
5707  }
5708  }
5709  else
5710  PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B,
5711  mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
5712  }
5713  else if (testType == mfem::FiniteElement::CURL &&
5714  trialType == mfem::FiniteElement::DIV && dim == 3)
5715  {
5716  PAHcurlHdivApply3DTranspose(dofs1D, dofs1D, quad1D, ne, mapsO->B,
5717  mapsC->B, mapsO->Bt, mapsC->Bt,
5718  mapsC->Gt, pa_data, x, y);
5719  }
5720  else
5721  {
5722  MFEM_ABORT("Unsupported dimension or space!");
5723  }
5724 }
5725 
5726 void MixedVectorWeakCurlIntegrator::AddMultTransposePA(const Vector &x,
5727  Vector &y) const
5728 {
5729  if (testType == mfem::FiniteElement::CURL &&
5730  trialType == mfem::FiniteElement::DIV && dim == 3)
5731  {
5732  PAHcurlHdivApply3D(dofs1D, dofs1D, quad1D, ne, mapsO->B,
5733  mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->G,
5734  pa_data, x, y);
5735  }
5736  else
5737  {
5738  MFEM_ABORT("Unsupported dimension or space!");
5739  }
5740 }
5741 
5742 // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient
5743 // to get a dof in H(curl) (range). You can think of the range as the "test" space
5744 // and the domain as the "trial" space, but there's no integration.
5745 static void PAHcurlApplyGradient2D(const int c_dofs1D,
5746  const int o_dofs1D,
5747  const int NE,
5748  const Array<double> &B_,
5749  const Array<double> &G_,
5750  const Vector &x_,
5751  Vector &y_)
5752 {
5753  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5754  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5755 
5756  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5757  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5758 
5759  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5760  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5761 
5762  MFEM_FORALL(e, NE,
5763  {
5764  double w[MAX_D1D][MAX_D1D];
5765 
5766  // horizontal part
5767  for (int dx = 0; dx < c_dofs1D; ++dx)
5768  {
5769  for (int ey = 0; ey < c_dofs1D; ++ey)
5770  {
5771  w[dx][ey] = 0.0;
5772  for (int dy = 0; dy < c_dofs1D; ++dy)
5773  {
5774  w[dx][ey] += B(ey, dy) * x(dx, dy, e);
5775  }
5776  }
5777  }
5778 
5779  for (int ey = 0; ey < c_dofs1D; ++ey)
5780  {
5781  for (int ex = 0; ex < o_dofs1D; ++ex)
5782  {
5783  double s = 0.0;
5784  for (int dx = 0; dx < c_dofs1D; ++dx)
5785  {
5786  s += G(ex, dx) * w[dx][ey];
5787  }
5788  const int local_index = ey*o_dofs1D + ex;
5789  y(local_index, e) += s;
5790  }
5791  }
5792 
5793  // vertical part
5794  for (int dx = 0; dx < c_dofs1D; ++dx)
5795  {
5796  for (int ey = 0; ey < o_dofs1D; ++ey)
5797  {
5798  w[dx][ey] = 0.0;
5799  for (int dy = 0; dy < c_dofs1D; ++dy)
5800  {
5801  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5802  }
5803  }
5804  }
5805 
5806  for (int ey = 0; ey < o_dofs1D; ++ey)
5807  {
5808  for (int ex = 0; ex < c_dofs1D; ++ex)
5809  {
5810  double s = 0.0;
5811  for (int dx = 0; dx < c_dofs1D; ++dx)
5812  {
5813  s += B(ex, dx) * w[dx][ey];
5814  }
5815  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5816  y(local_index, e) += s;
5817  }
5818  }
5819  });
5820 }
5821 
5822 // Specialization of PAHcurlApplyGradient2D to the case where B is identity
5823 static void PAHcurlApplyGradient2DBId(const int c_dofs1D,
5824  const int o_dofs1D,
5825  const int NE,
5826  const Array<double> &G_,
5827  const Vector &x_,
5828  Vector &y_)
5829 {
5830  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5831 
5832  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5833  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5834 
5835  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5836  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5837 
5838  MFEM_FORALL(e, NE,
5839  {
5840  double w[MAX_D1D][MAX_D1D];
5841 
5842  // horizontal part
5843  for (int dx = 0; dx < c_dofs1D; ++dx)
5844  {
5845  for (int ey = 0; ey < c_dofs1D; ++ey)
5846  {
5847  const int dy = ey;
5848  w[dx][ey] = x(dx, dy, e);
5849  }
5850  }
5851 
5852  for (int ey = 0; ey < c_dofs1D; ++ey)
5853  {
5854  for (int ex = 0; ex < o_dofs1D; ++ex)
5855  {
5856  double s = 0.0;
5857  for (int dx = 0; dx < c_dofs1D; ++dx)
5858  {
5859  s += G(ex, dx) * w[dx][ey];
5860  }
5861  const int local_index = ey*o_dofs1D + ex;
5862  y(local_index, e) += s;
5863  }
5864  }
5865 
5866  // vertical part
5867  for (int dx = 0; dx < c_dofs1D; ++dx)
5868  {
5869  for (int ey = 0; ey < o_dofs1D; ++ey)
5870  {
5871  w[dx][ey] = 0.0;
5872  for (int dy = 0; dy < c_dofs1D; ++dy)
5873  {
5874  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5875  }
5876  }
5877  }
5878 
5879  for (int ey = 0; ey < o_dofs1D; ++ey)
5880  {
5881  for (int ex = 0; ex < c_dofs1D; ++ex)
5882  {
5883  const int dx = ex;
5884  const double s = w[dx][ey];
5885  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5886  y(local_index, e) += s;
5887  }
5888  }
5889  });
5890 }
5891 
5892 static void PAHcurlApplyGradientTranspose2D(
5893  const int c_dofs1D, const int o_dofs1D, const int NE,
5894  const Array<double> &B_, const Array<double> &G_,
5895  const Vector &x_, Vector &y_)
5896 {
5897  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5898  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5899 
5900  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5901  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5902 
5903  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5904  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5905 
5906  MFEM_FORALL(e, NE,
5907  {
5908  double w[MAX_D1D][MAX_D1D];
5909 
5910  // horizontal part (open x, closed y)
5911  for (int dy = 0; dy < c_dofs1D; ++dy)
5912  {
5913  for (int ex = 0; ex < o_dofs1D; ++ex)
5914  {
5915  w[dy][ex] = 0.0;
5916  for (int ey = 0; ey < c_dofs1D; ++ey)
5917  {
5918  const int local_index = ey*o_dofs1D + ex;
5919  w[dy][ex] += B(ey, dy) * x(local_index, e);
5920  }
5921  }
5922  }
5923 
5924  for (int dy = 0; dy < c_dofs1D; ++dy)
5925  {
5926  for (int dx = 0; dx < c_dofs1D; ++dx)
5927  {
5928  double s = 0.0;
5929  for (int ex = 0; ex < o_dofs1D; ++ex)
5930  {
5931  s += G(ex, dx) * w[dy][ex];
5932  }
5933  y(dx, dy, e) += s;
5934  }
5935  }
5936 
5937  // vertical part (open y, closed x)
5938  for (int dy = 0; dy < c_dofs1D; ++dy)
5939  {
5940  for (int ex = 0; ex < c_dofs1D; ++ex)
5941  {
5942  w[dy][ex] = 0.0;
5943  for (int ey = 0; ey < o_dofs1D; ++ey)
5944  {
5945  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5946  w[dy][ex] += G(ey, dy) * x(local_index, e);
5947  }
5948  }
5949  }
5950 
5951  for (int dy = 0; dy < c_dofs1D; ++dy)
5952  {
5953  for (int dx = 0; dx < c_dofs1D; ++dx)
5954  {
5955  double s = 0.0;
5956  for (int ex = 0; ex < c_dofs1D; ++ex)
5957  {
5958  s += B(ex, dx) * w[dy][ex];
5959  }
5960  y(dx, dy, e) += s;
5961  }
5962  }
5963  });
5964 }
5965 
5966 // Specialization of PAHcurlApplyGradientTranspose2D to the case where
5967 // B is identity
5968 static void PAHcurlApplyGradientTranspose2DBId(
5969  const int c_dofs1D, const int o_dofs1D, const int NE,
5970  const Array<double> &G_,
5971  const Vector &x_, Vector &y_)
5972 {
5973  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5974 
5975  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5976  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5977 
5978  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5979  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5980 
5981  MFEM_FORALL(e, NE,
5982  {
5983  double w[MAX_D1D][MAX_D1D];
5984 
5985  // horizontal part (open x, closed y)
5986  for (int dy = 0; dy < c_dofs1D; ++dy)
5987  {
5988  for (int ex = 0; ex < o_dofs1D; ++ex)
5989  {
5990  const int ey = dy;
5991  const int local_index = ey*o_dofs1D + ex;
5992  w[dy][ex] = x(local_index, e);
5993  }
5994  }
5995 
5996  for (int dy = 0; dy < c_dofs1D; ++dy)
5997  {
5998  for (int dx = 0; dx < c_dofs1D; ++dx)
5999  {
6000  double s = 0.0;
6001  for (int ex = 0; ex < o_dofs1D; ++ex)
6002  {
6003  s += G(ex, dx) * w[dy][ex];
6004  }
6005  y(dx, dy, e) += s;
6006  }
6007  }
6008 
6009  // vertical part (open y, closed x)
6010  for (int dy = 0; dy < c_dofs1D; ++dy)
6011  {
6012  for (int ex = 0; ex < c_dofs1D; ++ex)
6013  {
6014  w[dy][ex] = 0.0;
6015  for (int ey = 0; ey < o_dofs1D; ++ey)
6016  {
6017  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
6018  w[dy][ex] += G(ey, dy) * x(local_index, e);
6019  }
6020  }
6021  }
6022 
6023  for (int dy = 0; dy < c_dofs1D; ++dy)
6024  {
6025  for (int dx = 0; dx < c_dofs1D; ++dx)
6026  {
6027  const int ex = dx;
6028  const double s = w[dy][ex];
6029  y(dx, dy, e) += s;
6030  }
6031  }
6032  });
6033 }
6034 
6035 static void PAHcurlApplyGradient3D(const int c_dofs1D,
6036  const int o_dofs1D,
6037  const int NE,
6038  const Array<double> &B_,
6039  const Array<double> &G_,
6040  const Vector &x_,
6041  Vector &y_)
6042 {
6043  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
6044  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6045 
6046  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6047  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6048 
6049  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6050  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6051 
6052  MFEM_FORALL(e, NE,
6053  {
6054  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6055  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6056 
6057  // ---
6058  // dofs that point parallel to x-axis (open in x, closed in y, z)
6059  // ---
6060 
6061  // contract in z
6062  for (int ez = 0; ez < c_dofs1D; ++ez)
6063  {
6064  for (int dx = 0; dx < c_dofs1D; ++dx)
6065  {
6066  for (int dy = 0; dy < c_dofs1D; ++dy)
6067  {
6068  w1[dx][dy][ez] = 0.0;
6069  for (int dz = 0; dz < c_dofs1D; ++dz)
6070  {
6071  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
6072  }
6073  }
6074  }
6075  }
6076 
6077  // contract in y
6078  for (int ez = 0; ez < c_dofs1D; ++ez)
6079  {
6080  for (int ey = 0; ey < c_dofs1D; ++ey)
6081  {
6082  for (int dx = 0; dx < c_dofs1D; ++dx)
6083  {
6084  w2[dx][ey][ez] = 0.0;
6085  for (int dy = 0; dy < c_dofs1D; ++dy)
6086  {
6087  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
6088  }
6089  }
6090  }
6091  }
6092 
6093  // contract in x
6094  for (int ez = 0; ez < c_dofs1D; ++ez)
6095  {
6096  for (int ey = 0; ey < c_dofs1D; ++ey)
6097  {
6098  for (int ex = 0; ex < o_dofs1D; ++ex)
6099  {
6100  double s = 0.0;
6101  for (int dx = 0; dx < c_dofs1D; ++dx)
6102  {
6103  s += G(ex, dx) * w2[dx][ey][ez];
6104  }
6105  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6106  y(local_index, e) += s;
6107  }
6108  }
6109  }
6110 
6111  // ---
6112  // dofs that point parallel to y-axis (open in y, closed in x, z)
6113  // ---
6114 
6115  // contract in z
6116  for (int ez = 0; ez < c_dofs1D; ++ez)
6117  {
6118  for (int dx = 0; dx < c_dofs1D; ++dx)
6119  {
6120  for (int dy = 0; dy < c_dofs1D; ++dy)
6121  {
6122  w1[dx][dy][ez] = 0.0;
6123  for (int dz = 0; dz < c_dofs1D; ++dz)
6124  {
6125  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
6126  }
6127  }
6128  }
6129  }
6130 
6131  // contract in y
6132  for (int ez = 0; ez < c_dofs1D; ++ez)
6133  {
6134  for (int ey = 0; ey < o_dofs1D; ++ey)
6135  {
6136  for (int dx = 0; dx < c_dofs1D; ++dx)
6137  {
6138  w2[dx][ey][ez] = 0.0;
6139  for (int dy = 0; dy < c_dofs1D; ++dy)
6140  {
6141  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
6142  }
6143  }
6144  }
6145  }
6146 
6147  // contract in x
6148  for (int ez = 0; ez < c_dofs1D; ++ez)
6149  {
6150  for (int ey = 0; ey < o_dofs1D; ++ey)
6151  {
6152  for (int ex = 0; ex < c_dofs1D; ++ex)
6153  {
6154  double s = 0.0;
6155  for (int dx = 0; dx < c_dofs1D; ++dx)
6156  {
6157  s += B(ex, dx) * w2[dx][ey][ez];
6158  }
6159  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6160  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6161  y(local_index, e) += s;
6162  }
6163  }
6164  }
6165 
6166  // ---
6167  // dofs that point parallel to z-axis (open in z, closed in x, y)
6168  // ---
6169 
6170  // contract in z
6171  for (int ez = 0; ez < o_dofs1D; ++ez)
6172  {
6173  for (int dx = 0; dx < c_dofs1D; ++dx)
6174  {
6175  for (int dy = 0; dy < c_dofs1D; ++dy)
6176  {
6177  w1[dx][dy][ez] = 0.0;
6178  for (int dz = 0; dz < c_dofs1D; ++dz)
6179  {
6180  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
6181  }
6182  }
6183  }
6184  }
6185 
6186  // contract in y
6187  for (int ez = 0; ez < o_dofs1D; ++ez)
6188  {
6189  for (int ey = 0; ey < c_dofs1D; ++ey)
6190  {
6191  for (int dx = 0; dx < c_dofs1D; ++dx)
6192  {
6193  w2[dx][ey][ez] = 0.0;
6194  for (int dy = 0; dy < c_dofs1D; ++dy)
6195  {
6196  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
6197  }
6198  }
6199  }
6200  }
6201 
6202  // contract in x
6203  for (int ez = 0; ez < o_dofs1D; ++ez)
6204  {
6205  for (int ey = 0; ey < c_dofs1D; ++ey)
6206  {
6207  for (int ex = 0; ex < c_dofs1D; ++ex)
6208  {
6209  double s = 0.0;
6210  for (int dx = 0; dx < c_dofs1D; ++dx)
6211  {
6212  s += B(ex, dx) * w2[dx][ey][ez];
6213  }
6214  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6215  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6216  y(local_index, e) += s;
6217  }
6218  }
6219  }
6220  });
6221 }
6222 
6223 // Specialization of PAHcurlApplyGradient3D to the case where
6224 static void PAHcurlApplyGradient3DBId(const int c_dofs1D,
6225  const int o_dofs1D,
6226  const int NE,
6227  const Array<double> &G_,
6228  const Vector &x_,
6229  Vector &y_)
6230 {
6231  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6232 
6233  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6234  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6235 
6236  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6237  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6238 
6239  MFEM_FORALL(e, NE,
6240  {
6241  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6242  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6243 
6244  // ---
6245  // dofs that point parallel to x-axis (open in x, closed in y, z)
6246  // ---
6247 
6248  // contract in z
6249  for (int ez = 0; ez < c_dofs1D; ++ez)
6250  {
6251  for (int dx = 0; dx < c_dofs1D; ++dx)
6252  {
6253  for (int dy = 0; dy < c_dofs1D; ++dy)
6254  {
6255  const int dz = ez;
6256  w1[dx][dy][ez] = x(dx, dy, dz, e);
6257  }
6258  }
6259  }
6260 
6261  // contract in y
6262  for (int ez = 0; ez < c_dofs1D; ++ez)
6263  {
6264  for (int ey = 0; ey < c_dofs1D; ++ey)
6265  {
6266  for (int dx = 0; dx < c_dofs1D; ++dx)
6267  {
6268  const int dy = ey;
6269  w2[dx][ey][ez] = w1[dx][dy][ez];
6270  }
6271  }
6272  }
6273 
6274  // contract in x
6275  for (int ez = 0; ez < c_dofs1D; ++ez)
6276  {
6277  for (int ey = 0; ey < c_dofs1D; ++ey)
6278  {
6279  for (int ex = 0; ex < o_dofs1D; ++ex)
6280  {
6281  double s = 0.0;
6282  for (int dx = 0; dx < c_dofs1D; ++dx)
6283  {
6284  s += G(ex, dx) * w2[dx][ey][ez];
6285  }
6286  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6287  y(local_index, e) += s;
6288  }
6289  }
6290  }
6291 
6292  // ---
6293  // dofs that point parallel to y-axis (open in y, closed in x, z)
6294  // ---
6295 
6296  // contract in z
6297  for (int ez = 0; ez < c_dofs1D; ++ez)
6298  {
6299  for (int dx = 0; dx < c_dofs1D; ++dx)
6300  {
6301  for (int dy = 0; dy < c_dofs1D; ++dy)
6302  {
6303  const int dz = ez;
6304  w1[dx][dy][ez] = x(dx, dy, dz, e);
6305  }
6306  }
6307  }
6308 
6309  // contract in y
6310  for (int ez = 0; ez < c_dofs1D; ++ez)
6311  {
6312  for (int ey = 0; ey < o_dofs1D; ++ey)
6313  {
6314  for (int dx = 0; dx < c_dofs1D; ++dx)
6315  {
6316  w2[dx][ey][ez] = 0.0;
6317  for (int dy = 0; dy < c_dofs1D; ++dy)
6318  {
6319  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
6320  }
6321  }
6322  }
6323  }
6324 
6325  // contract in x
6326  for (int ez = 0; ez < c_dofs1D; ++ez)
6327  {
6328  for (int ey = 0; ey < o_dofs1D; ++ey)
6329  {
6330  for (int ex = 0; ex < c_dofs1D; ++ex)
6331  {
6332  const int dx = ex;
6333  const double s = w2[dx][ey][ez];
6334  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6335  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6336  y(local_index, e) += s;
6337  }
6338  }
6339  }
6340 
6341  // ---
6342  // dofs that point parallel to z-axis (open in z, closed in x, y)
6343  // ---
6344 
6345  // contract in z
6346  for (int ez = 0; ez < o_dofs1D; ++ez)
6347  {
6348  for (int dx = 0; dx < c_dofs1D; ++dx)
6349  {
6350  for (int dy = 0; dy < c_dofs1D; ++dy)
6351  {
6352  w1[dx][dy][ez] = 0.0;
6353  for (int dz = 0; dz < c_dofs1D; ++dz)
6354  {
6355  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
6356  }
6357  }
6358  }
6359  }
6360 
6361  // contract in y
6362  for (int ez = 0; ez < o_dofs1D; ++ez)
6363  {
6364  for (int ey = 0; ey < c_dofs1D; ++ey)
6365  {
6366  for (int dx = 0; dx < c_dofs1D; ++dx)
6367  {
6368  const int dy = ey;
6369  w2[dx][ey][ez] = w1[dx][dy][ez];
6370  }
6371  }
6372  }
6373 
6374  // contract in x
6375  for (int ez = 0; ez < o_dofs1D; ++ez)
6376  {
6377  for (int ey = 0; ey < c_dofs1D; ++ey)
6378  {
6379  for (int ex = 0; ex < c_dofs1D; ++ex)
6380  {
6381  const int dx = ex;
6382  const double s = w2[dx][ey][ez];
6383  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6384  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6385  y(local_index, e) += s;
6386  }
6387  }
6388  }
6389  });
6390 }
6391 
6392 static void PAHcurlApplyGradientTranspose3D(
6393  const int c_dofs1D, const int o_dofs1D, const int NE,
6394  const Array<double> &B_, const Array<double> &G_,
6395  const Vector &x_, Vector &y_)
6396 {
6397  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
6398  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6399 
6400  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6401  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6402 
6403  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6404  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6405 
6406  MFEM_FORALL(e, NE,
6407  {
6408  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6409  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6410  // ---
6411  // dofs that point parallel to x-axis (open in x, closed in y, z)
6412  // ---
6413 
6414  // contract in z
6415  for (int dz = 0; dz < c_dofs1D; ++dz)
6416  {
6417  for (int ex = 0; ex < o_dofs1D; ++ex)
6418  {
6419  for (int ey = 0; ey < c_dofs1D; ++ey)
6420  {
6421  w1[ex][ey][dz] = 0.0;
6422  for (int ez = 0; ez < c_dofs1D; ++ez)
6423  {
6424  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6425  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6426  }
6427  }
6428  }
6429  }
6430 
6431  // contract in y
6432  for (int dz = 0; dz < c_dofs1D; ++dz)
6433  {
6434  for (int dy = 0; dy < c_dofs1D; ++dy)
6435  {
6436  for (int ex = 0; ex < o_dofs1D; ++ex)
6437  {
6438  w2[ex][dy][dz] = 0.0;
6439  for (int ey = 0; ey < c_dofs1D; ++ey)
6440  {
6441  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6442  }
6443  }
6444  }
6445  }
6446 
6447  // contract in x
6448  for (int dz = 0; dz < c_dofs1D; ++dz)
6449  {
6450  for (int dy = 0; dy < c_dofs1D; ++dy)
6451  {
6452  for (int dx = 0; dx < c_dofs1D; ++dx)
6453  {
6454  double s = 0.0;
6455  for (int ex = 0; ex < o_dofs1D; ++ex)
6456  {
6457  s += G(ex, dx) * w2[ex][dy][dz];
6458  }
6459  y(dx, dy, dz, e) += s;
6460  }
6461  }
6462  }
6463 
6464  // ---
6465  // dofs that point parallel to y-axis (open in y, closed in x, z)
6466  // ---
6467 
6468  // contract in z
6469  for (int dz = 0; dz < c_dofs1D; ++dz)
6470  {
6471  for (int ex = 0; ex < c_dofs1D; ++ex)
6472  {
6473  for (int ey = 0; ey < o_dofs1D; ++ey)
6474  {
6475  w1[ex][ey][dz] = 0.0;
6476  for (int ez = 0; ez < c_dofs1D; ++ez)
6477  {
6478  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6479  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6480  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6481  }
6482  }
6483  }
6484  }
6485 
6486  // contract in y
6487  for (int dz = 0; dz < c_dofs1D; ++dz)
6488  {
6489  for (int dy = 0; dy < c_dofs1D; ++dy)
6490  {
6491  for (int ex = 0; ex < c_dofs1D; ++ex)
6492  {
6493  w2[ex][dy][dz] = 0.0;
6494  for (int ey = 0; ey < o_dofs1D; ++ey)
6495  {
6496  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6497  }
6498  }
6499  }
6500  }
6501 
6502  // contract in x
6503  for (int dz = 0; dz < c_dofs1D; ++dz)
6504  {
6505  for (int dy = 0; dy < c_dofs1D; ++dy)
6506  {
6507  for (int dx = 0; dx < c_dofs1D; ++dx)
6508  {
6509  double s = 0.0;
6510  for (int ex = 0; ex < c_dofs1D; ++ex)
6511  {
6512  s += B(ex, dx) * w2[ex][dy][dz];
6513  }
6514  y(dx, dy, dz, e) += s;
6515  }
6516  }
6517  }
6518 
6519  // ---
6520  // dofs that point parallel to z-axis (open in z, closed in x, y)
6521  // ---
6522 
6523  // contract in z
6524  for (int dz = 0; dz < c_dofs1D; ++dz)
6525  {
6526  for (int ex = 0; ex < c_dofs1D; ++ex)
6527  {
6528  for (int ey = 0; ey < c_dofs1D; ++ey)
6529  {
6530  w1[ex][ey][dz] = 0.0;
6531  for (int ez = 0; ez < o_dofs1D; ++ez)
6532  {
6533  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6534  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6535  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6536  }
6537  }
6538  }
6539  }
6540 
6541  // contract in y
6542  for (int dz = 0; dz < c_dofs1D; ++dz)
6543  {
6544  for (int dy = 0; dy < c_dofs1D; ++dy)
6545  {
6546  for (int ex = 0; ex < c_dofs1D; ++ex)
6547  {
6548  w2[ex][dy][dz] = 0.0;
6549  for (int ey = 0; ey < c_dofs1D; ++ey)
6550  {
6551  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6552  }
6553  }
6554  }
6555  }
6556 
6557  // contract in x
6558  for (int dz = 0; dz < c_dofs1D; ++dz)
6559  {
6560  for (int dy = 0; dy < c_dofs1D; ++dy)
6561  {
6562  for (int dx = 0; dx < c_dofs1D; ++dx)
6563  {
6564  double s = 0.0;
6565  for (int ex = 0; ex < c_dofs1D; ++ex)
6566  {
6567  s += B(ex, dx) * w2[ex][dy][dz];
6568  }
6569  y(dx, dy, dz, e) += s;
6570  }
6571  }
6572  }
6573  });
6574 }
6575 
6576 // Specialization of PAHcurlApplyGradientTranspose3D to the case where
6577 static void PAHcurlApplyGradientTranspose3DBId(
6578  const int c_dofs1D, const int o_dofs1D, const int NE,
6579  const Array<double> &G_,
6580  const Vector &x_, Vector &y_)
6581 {
6582  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6583 
6584  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6585  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6586 
6587  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6588  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6589 
6590  MFEM_FORALL(e, NE,
6591  {
6592  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6593  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6594  // ---
6595  // dofs that point parallel to x-axis (open in x, closed in y, z)
6596  // ---
6597 
6598  // contract in z
6599  for (int dz = 0; dz < c_dofs1D; ++dz)
6600  {
6601  for (int ex = 0; ex < o_dofs1D; ++ex)
6602  {
6603  for (int ey = 0; ey < c_dofs1D; ++ey)
6604  {
6605  const int ez = dz;
6606  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6607  w1[ex][ey][dz] = x(local_index, e);
6608  }
6609  }
6610  }
6611 
6612  // contract in y
6613  for (int dz = 0; dz < c_dofs1D; ++dz)
6614  {
6615  for (int dy = 0; dy < c_dofs1D; ++dy)
6616  {
6617  for (int ex = 0; ex < o_dofs1D; ++ex)
6618  {
6619  const int ey = dy;
6620  w2[ex][dy][dz] = w1[ex][ey][dz];
6621  }
6622  }
6623  }
6624 
6625  // contract in x
6626  for (int dz = 0; dz < c_dofs1D; ++dz)
6627  {
6628  for (int dy = 0; dy < c_dofs1D; ++dy)
6629  {
6630  for (int dx = 0; dx < c_dofs1D; ++dx)
6631  {
6632  double s = 0.0;
6633  for (int ex = 0; ex < o_dofs1D; ++ex)
6634  {
6635  s += G(ex, dx) * w2[ex][dy][dz];
6636  }
6637  y(dx, dy, dz, e) += s;
6638  }
6639  }
6640  }
6641 
6642  // ---
6643  // dofs that point parallel to y-axis (open in y, closed in x, z)
6644  // ---
6645 
6646  // contract in z
6647  for (int dz = 0; dz < c_dofs1D; ++dz)
6648  {
6649  for (int ex = 0; ex < c_dofs1D; ++ex)
6650  {
6651  for (int ey = 0; ey < o_dofs1D; ++ey)
6652  {
6653  const int ez = dz;
6654  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6655  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6656  w1[ex][ey][dz] = x(local_index, e);
6657  }
6658  }
6659  }
6660 
6661  // contract in y
6662  for (int dz = 0; dz < c_dofs1D; ++dz)
6663  {
6664  for (int dy = 0; dy < c_dofs1D; ++dy)
6665  {
6666  for (int ex = 0; ex < c_dofs1D; ++ex)
6667  {
6668  w2[ex][dy][dz] = 0.0;
6669  for (int ey = 0; ey < o_dofs1D; ++ey)
6670  {
6671  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6672  }
6673  }
6674  }
6675  }
6676 
6677  // contract in x
6678  for (int dz = 0; dz < c_dofs1D; ++dz)
6679  {
6680  for (int dy = 0; dy < c_dofs1D; ++dy)
6681  {
6682  for (int dx = 0; dx < c_dofs1D; ++dx)
6683  {
6684  const int ex = dx;
6685  double s = w2[ex][dy][dz];
6686  y(dx, dy, dz, e) += s;
6687  }
6688  }
6689  }
6690 
6691  // ---
6692  // dofs that point parallel to z-axis (open in z, closed in x, y)
6693  // ---
6694 
6695  // contract in z
6696  for (int dz = 0; dz < c_dofs1D; ++dz)
6697  {
6698  for (int ex = 0; ex < c_dofs1D; ++ex)
6699  {
6700  for (int ey = 0; ey < c_dofs1D; ++ey)
6701  {
6702  w1[ex][ey][dz] = 0.0;
6703  for (int ez = 0; ez < o_dofs1D; ++ez)
6704  {
6705  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6706  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6707  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6708  }
6709  }
6710  }
6711  }
6712 
6713  // contract in y
6714  for (int dz = 0; dz < c_dofs1D; ++dz)
6715  {
6716  for (int dy = 0; dy < c_dofs1D; ++dy)
6717  {
6718  for (int ex = 0; ex < c_dofs1D; ++ex)
6719  {
6720  const int ey = dy;
6721  w2[ex][dy][dz] = w1[ex][ey][dz];
6722  }
6723  }
6724  }
6725 
6726  // contract in x
6727  for (int dz = 0; dz < c_dofs1D; ++dz)
6728  {
6729  for (int dy = 0; dy < c_dofs1D; ++dy)
6730  {
6731  for (int dx = 0; dx < c_dofs1D; ++dx)
6732  {
6733  const int ex = dx;
6734  double s = w2[ex][dy][dz];
6735  y(dx, dy, dz, e) += s;
6736  }
6737  }
6738  }
6739  });
6740 }
6741 
6742 void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
6743  const FiniteElementSpace &test_fes)
6744 {
6745  // Assumes tensor-product elements, with a vector test space and H^1 trial space.
6746  Mesh *mesh = trial_fes.GetMesh();
6747  const FiniteElement *trial_fel = trial_fes.GetFE(0);
6748  const FiniteElement *test_fel = test_fes.GetFE(0);
6749 
6750  const NodalTensorFiniteElement *trial_el =
6751  dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
6752  MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
6753 
6754  const VectorTensorFiniteElement *test_el =
6755  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
6756  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
6757 
6758  const int dims = trial_el->GetDim();
6759  MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!");
6760  dim = mesh->Dimension();
6761  MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!");
6762  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(),
6763  "Orders do not match!");
6764  ne = trial_fes.GetNE();
6765 
6766  const int order = trial_el->GetOrder();
6767  dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType());
6768  mfem::QuadratureFunctions1D qf1d;
6769  mfem::IntegrationRule closed_ir;
6770  closed_ir.SetSize(order + 1);
6771  qf1d.GaussLobatto(order + 1, &closed_ir);
6772  mfem::IntegrationRule open_ir;
6773  open_ir.SetSize(order);
6774  qf1d.GaussLegendre(order, &open_ir);
6775 
6776  maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
6777