MFEM  v4.4.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_hcurl.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
21 void PAHcurlHdivSetup3D(const int Q1D,
22  const int coeffDim,
23  const int NE,
24  const bool transpose,
25  const Array<double> &w_,
26  const Vector &j,
27  Vector &coeff_,
28  Vector &op);
29 
30 void PAHcurlMassApply2D(const int D1D,
31  const int Q1D,
32  const int NE,
33  const bool symmetric,
34  const Array<double> &bo,
35  const Array<double> &bc,
36  const Array<double> &bot,
37  const Array<double> &bct,
38  const Vector &pa_data,
39  const Vector &x,
40  Vector &y)
41 {
42  constexpr static int VDIM = 2;
43  constexpr static int MAX_D1D = HCURL_MAX_D1D;
44  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
45 
46  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
47  auto Bc = Reshape(bc.Read(), Q1D, D1D);
48  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
49  auto Bct = Reshape(bct.Read(), D1D, Q1D);
50  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
51  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
52  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
53 
54  MFEM_FORALL(e, NE,
55  {
56  double mass[MAX_Q1D][MAX_Q1D][VDIM];
57 
58  for (int qy = 0; qy < Q1D; ++qy)
59  {
60  for (int qx = 0; qx < Q1D; ++qx)
61  {
62  for (int c = 0; c < VDIM; ++c)
63  {
64  mass[qy][qx][c] = 0.0;
65  }
66  }
67  }
68 
69  int osc = 0;
70 
71  for (int c = 0; c < VDIM; ++c) // loop over x, y components
72  {
73  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
74  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
75 
76  for (int dy = 0; dy < D1Dy; ++dy)
77  {
78  double massX[MAX_Q1D];
79  for (int qx = 0; qx < Q1D; ++qx)
80  {
81  massX[qx] = 0.0;
82  }
83 
84  for (int dx = 0; dx < D1Dx; ++dx)
85  {
86  const double t = X(dx + (dy * D1Dx) + osc, e);
87  for (int qx = 0; qx < Q1D; ++qx)
88  {
89  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
90  }
91  }
92 
93  for (int qy = 0; qy < Q1D; ++qy)
94  {
95  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
96  for (int qx = 0; qx < Q1D; ++qx)
97  {
98  mass[qy][qx][c] += massX[qx] * wy;
99  }
100  }
101  }
102 
103  osc += D1Dx * D1Dy;
104  } // loop (c) over components
105 
106  // Apply D operator.
107  for (int qy = 0; qy < Q1D; ++qy)
108  {
109  for (int qx = 0; qx < Q1D; ++qx)
110  {
111  const double O11 = op(qx,qy,0,e);
112  const double O21 = op(qx,qy,1,e);
113  const double O12 = symmetric ? O21 : op(qx,qy,2,e);
114  const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
115  const double massX = mass[qy][qx][0];
116  const double massY = mass[qy][qx][1];
117  mass[qy][qx][0] = (O11*massX)+(O12*massY);
118  mass[qy][qx][1] = (O21*massX)+(O22*massY);
119  }
120  }
121 
122  for (int qy = 0; qy < Q1D; ++qy)
123  {
124  osc = 0;
125 
126  for (int c = 0; c < VDIM; ++c) // loop over x, y components
127  {
128  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
129  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
130 
131  double massX[MAX_D1D];
132  for (int dx = 0; dx < D1Dx; ++dx)
133  {
134  massX[dx] = 0.0;
135  }
136  for (int qx = 0; qx < Q1D; ++qx)
137  {
138  for (int dx = 0; dx < D1Dx; ++dx)
139  {
140  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
141  }
142  }
143 
144  for (int dy = 0; dy < D1Dy; ++dy)
145  {
146  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
147 
148  for (int dx = 0; dx < D1Dx; ++dx)
149  {
150  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
151  }
152  }
153 
154  osc += D1Dx * D1Dy;
155  } // loop c
156  } // loop qy
157  }); // end of element loop
158 }
159 
160 void PAHcurlMassAssembleDiagonal2D(const int D1D,
161  const int Q1D,
162  const int NE,
163  const bool symmetric,
164  const Array<double> &bo,
165  const Array<double> &bc,
166  const Vector &pa_data,
167  Vector &diag)
168 {
169  constexpr static int VDIM = 2;
170  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
171 
172  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
173  auto Bc = Reshape(bc.Read(), Q1D, D1D);
174  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
175  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
176 
177  MFEM_FORALL(e, NE,
178  {
179  int osc = 0;
180 
181  for (int c = 0; c < VDIM; ++c) // loop over x, y components
182  {
183  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
184  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
185 
186  double mass[MAX_Q1D];
187 
188  for (int dy = 0; dy < D1Dy; ++dy)
189  {
190  for (int qx = 0; qx < Q1D; ++qx)
191  {
192  mass[qx] = 0.0;
193  for (int qy = 0; qy < Q1D; ++qy)
194  {
195  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
196 
197  mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
198  op(qx,qy,symmetric ? 2 : 3, e));
199  }
200  }
201 
202  for (int dx = 0; dx < D1Dx; ++dx)
203  {
204  for (int qx = 0; qx < Q1D; ++qx)
205  {
206  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
207  D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
208  }
209  }
210  }
211 
212  osc += D1Dx * D1Dy;
213  } // loop c
214  }); // end of element loop
215 }
216 
217 void PAHcurlMassAssembleDiagonal3D(const int D1D,
218  const int Q1D,
219  const int NE,
220  const bool symmetric,
221  const Array<double> &bo,
222  const Array<double> &bc,
223  const Vector &pa_data,
224  Vector &diag)
225 {
226  constexpr static int MAX_D1D = HCURL_MAX_D1D;
227  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
228 
229  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
230  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
231  constexpr static int VDIM = 3;
232 
233  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
234  auto Bc = Reshape(bc.Read(), Q1D, D1D);
235  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
236  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
237 
238  MFEM_FORALL(e, NE,
239  {
240  int osc = 0;
241 
242  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
243  {
244  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
245  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
246  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
247 
248  const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
249  (symmetric ? 5 : 8));
250 
251  double mass[MAX_Q1D];
252 
253  for (int dz = 0; dz < D1Dz; ++dz)
254  {
255  for (int dy = 0; dy < D1Dy; ++dy)
256  {
257  for (int qx = 0; qx < Q1D; ++qx)
258  {
259  mass[qx] = 0.0;
260  for (int qy = 0; qy < Q1D; ++qy)
261  {
262  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
263 
264  for (int qz = 0; qz < Q1D; ++qz)
265  {
266  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
267 
268  mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
269  }
270  }
271  }
272 
273  for (int dx = 0; dx < D1Dx; ++dx)
274  {
275  for (int qx = 0; qx < Q1D; ++qx)
276  {
277  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
278  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
279  }
280  }
281  }
282  }
283 
284  osc += D1Dx * D1Dy * D1Dz;
285  } // loop c
286  }); // end of element loop
287 }
288 
289 template<int T_D1D, int T_Q1D>
290 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
291  const int Q1D,
292  const int NE,
293  const bool symmetric,
294  const Array<double> &bo,
295  const Array<double> &bc,
296  const Vector &pa_data,
297  Vector &diag)
298 {
299  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
300  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
301 
302  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
303  auto Bc = Reshape(bc.Read(), Q1D, D1D);
304  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
305  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
306 
307  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
308  {
309  constexpr int VDIM = 3;
310  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
311  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
312 
313  MFEM_SHARED double sBo[tQ1D][tD1D];
314  MFEM_SHARED double sBc[tQ1D][tD1D];
315 
316  double op3[3];
317  MFEM_SHARED double sop[3][tQ1D][tQ1D];
318 
319  MFEM_FOREACH_THREAD(qx,x,Q1D)
320  {
321  MFEM_FOREACH_THREAD(qy,y,Q1D)
322  {
323  MFEM_FOREACH_THREAD(qz,z,Q1D)
324  {
325  op3[0] = op(qx,qy,qz,0,e);
326  op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
327  op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
328  }
329  }
330  }
331 
332  const int tidx = MFEM_THREAD_ID(x);
333  const int tidy = MFEM_THREAD_ID(y);
334  const int tidz = MFEM_THREAD_ID(z);
335 
336  if (tidz == 0)
337  {
338  MFEM_FOREACH_THREAD(d,y,D1D)
339  {
340  MFEM_FOREACH_THREAD(q,x,Q1D)
341  {
342  sBc[q][d] = Bc(q,d);
343  if (d < D1D-1)
344  {
345  sBo[q][d] = Bo(q,d);
346  }
347  }
348  }
349  }
350  MFEM_SYNC_THREAD;
351 
352  int osc = 0;
353  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
354  {
355  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
356  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
357  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
358 
359  double dxyz = 0.0;
360 
361  for (int qz=0; qz < Q1D; ++qz)
362  {
363  if (tidz == qz)
364  {
365  for (int i=0; i<3; ++i)
366  {
367  sop[i][tidx][tidy] = op3[i];
368  }
369  }
370 
371  MFEM_SYNC_THREAD;
372 
373  MFEM_FOREACH_THREAD(dz,z,D1Dz)
374  {
375  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
376 
377  MFEM_FOREACH_THREAD(dy,y,D1Dy)
378  {
379  MFEM_FOREACH_THREAD(dx,x,D1Dx)
380  {
381  for (int qy = 0; qy < Q1D; ++qy)
382  {
383  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
384 
385  for (int qx = 0; qx < Q1D; ++qx)
386  {
387  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
388  dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
389  }
390  }
391  }
392  }
393  }
394 
395  MFEM_SYNC_THREAD;
396  } // qz loop
397 
398  MFEM_FOREACH_THREAD(dz,z,D1Dz)
399  {
400  MFEM_FOREACH_THREAD(dy,y,D1Dy)
401  {
402  MFEM_FOREACH_THREAD(dx,x,D1Dx)
403  {
404  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
405  }
406  }
407  }
408 
409  osc += D1Dx * D1Dy * D1Dz;
410  } // c loop
411  }); // end of element loop
412 }
413 
414 void PAHcurlMassApply3D(const int D1D,
415  const int Q1D,
416  const int NE,
417  const bool symmetric,
418  const Array<double> &bo,
419  const Array<double> &bc,
420  const Array<double> &bot,
421  const Array<double> &bct,
422  const Vector &pa_data,
423  const Vector &x,
424  Vector &y)
425 {
426  constexpr static int MAX_D1D = HCURL_MAX_D1D;
427  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
428 
429  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
430  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
431  constexpr static int VDIM = 3;
432 
433  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
434  auto Bc = Reshape(bc.Read(), Q1D, D1D);
435  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
436  auto Bct = Reshape(bct.Read(), D1D, Q1D);
437  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
438  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
439  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
440 
441  MFEM_FORALL(e, NE,
442  {
443  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
444 
445  for (int qz = 0; qz < Q1D; ++qz)
446  {
447  for (int qy = 0; qy < Q1D; ++qy)
448  {
449  for (int qx = 0; qx < Q1D; ++qx)
450  {
451  for (int c = 0; c < VDIM; ++c)
452  {
453  mass[qz][qy][qx][c] = 0.0;
454  }
455  }
456  }
457  }
458 
459  int osc = 0;
460 
461  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
462  {
463  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
464  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
465  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
466 
467  for (int dz = 0; dz < D1Dz; ++dz)
468  {
469  double massXY[MAX_Q1D][MAX_Q1D];
470  for (int qy = 0; qy < Q1D; ++qy)
471  {
472  for (int qx = 0; qx < Q1D; ++qx)
473  {
474  massXY[qy][qx] = 0.0;
475  }
476  }
477 
478  for (int dy = 0; dy < D1Dy; ++dy)
479  {
480  double massX[MAX_Q1D];
481  for (int qx = 0; qx < Q1D; ++qx)
482  {
483  massX[qx] = 0.0;
484  }
485 
486  for (int dx = 0; dx < D1Dx; ++dx)
487  {
488  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
489  for (int qx = 0; qx < Q1D; ++qx)
490  {
491  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
492  }
493  }
494 
495  for (int qy = 0; qy < Q1D; ++qy)
496  {
497  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
498  for (int qx = 0; qx < Q1D; ++qx)
499  {
500  const double wx = massX[qx];
501  massXY[qy][qx] += wx * wy;
502  }
503  }
504  }
505 
506  for (int qz = 0; qz < Q1D; ++qz)
507  {
508  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
509  for (int qy = 0; qy < Q1D; ++qy)
510  {
511  for (int qx = 0; qx < Q1D; ++qx)
512  {
513  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
514  }
515  }
516  }
517  }
518 
519  osc += D1Dx * D1Dy * D1Dz;
520  } // loop (c) over components
521 
522  // Apply D operator.
523  for (int qz = 0; qz < Q1D; ++qz)
524  {
525  for (int qy = 0; qy < Q1D; ++qy)
526  {
527  for (int qx = 0; qx < Q1D; ++qx)
528  {
529  const double O11 = op(qx,qy,qz,0,e);
530  const double O12 = op(qx,qy,qz,1,e);
531  const double O13 = op(qx,qy,qz,2,e);
532  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
533  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
534  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
535  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
536  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
537  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
538  const double massX = mass[qz][qy][qx][0];
539  const double massY = mass[qz][qy][qx][1];
540  const double massZ = mass[qz][qy][qx][2];
541  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
542  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
543  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
544  }
545  }
546  }
547 
548  for (int qz = 0; qz < Q1D; ++qz)
549  {
550  double massXY[MAX_D1D][MAX_D1D];
551 
552  osc = 0;
553 
554  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
555  {
556  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
557  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
558  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
559 
560  for (int dy = 0; dy < D1Dy; ++dy)
561  {
562  for (int dx = 0; dx < D1Dx; ++dx)
563  {
564  massXY[dy][dx] = 0.0;
565  }
566  }
567  for (int qy = 0; qy < Q1D; ++qy)
568  {
569  double massX[MAX_D1D];
570  for (int dx = 0; dx < D1Dx; ++dx)
571  {
572  massX[dx] = 0;
573  }
574  for (int qx = 0; qx < Q1D; ++qx)
575  {
576  for (int dx = 0; dx < D1Dx; ++dx)
577  {
578  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
579  }
580  }
581  for (int dy = 0; dy < D1Dy; ++dy)
582  {
583  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
584  for (int dx = 0; dx < D1Dx; ++dx)
585  {
586  massXY[dy][dx] += massX[dx] * wy;
587  }
588  }
589  }
590 
591  for (int dz = 0; dz < D1Dz; ++dz)
592  {
593  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
594  for (int dy = 0; dy < D1Dy; ++dy)
595  {
596  for (int dx = 0; dx < D1Dx; ++dx)
597  {
598  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
599  }
600  }
601  }
602 
603  osc += D1Dx * D1Dy * D1Dz;
604  } // loop c
605  } // loop qz
606  }); // end of element loop
607 }
608 
609 template<int T_D1D, int T_Q1D>
610 void SmemPAHcurlMassApply3D(const int D1D,
611  const int Q1D,
612  const int NE,
613  const bool symmetric,
614  const Array<double> &bo,
615  const Array<double> &bc,
616  const Array<double> &bot,
617  const Array<double> &bct,
618  const Vector &pa_data,
619  const Vector &x,
620  Vector &y)
621 {
622  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
623  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
624 
625  const int dataSize = symmetric ? 6 : 9;
626 
627  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
628  auto Bc = Reshape(bc.Read(), Q1D, D1D);
629  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
630  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
631  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
632 
633  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
634  {
635  constexpr int VDIM = 3;
636  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
637  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
638 
639  MFEM_SHARED double sBo[tQ1D][tD1D];
640  MFEM_SHARED double sBc[tQ1D][tD1D];
641 
642  double op9[9];
643  MFEM_SHARED double sop[9*tQ1D*tQ1D];
644  MFEM_SHARED double mass[tQ1D][tQ1D][3];
645 
646  MFEM_SHARED double sX[tD1D][tD1D][tD1D];
647 
648  MFEM_FOREACH_THREAD(qx,x,Q1D)
649  {
650  MFEM_FOREACH_THREAD(qy,y,Q1D)
651  {
652  MFEM_FOREACH_THREAD(qz,z,Q1D)
653  {
654  for (int i=0; i<dataSize; ++i)
655  {
656  op9[i] = op(qx,qy,qz,i,e);
657  }
658  }
659  }
660  }
661 
662  const int tidx = MFEM_THREAD_ID(x);
663  const int tidy = MFEM_THREAD_ID(y);
664  const int tidz = MFEM_THREAD_ID(z);
665 
666  if (tidz == 0)
667  {
668  MFEM_FOREACH_THREAD(d,y,D1D)
669  {
670  MFEM_FOREACH_THREAD(q,x,Q1D)
671  {
672  sBc[q][d] = Bc(q,d);
673  if (d < D1D-1)
674  {
675  sBo[q][d] = Bo(q,d);
676  }
677  }
678  }
679  }
680  MFEM_SYNC_THREAD;
681 
682  for (int qz=0; qz < Q1D; ++qz)
683  {
684  int osc = 0;
685  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
686  {
687  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
688  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
689  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
690 
691  MFEM_FOREACH_THREAD(dz,z,D1Dz)
692  {
693  MFEM_FOREACH_THREAD(dy,y,D1Dy)
694  {
695  MFEM_FOREACH_THREAD(dx,x,D1Dx)
696  {
697  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
698  }
699  }
700  }
701  MFEM_SYNC_THREAD;
702 
703  if (tidz == qz)
704  {
705  for (int i=0; i<dataSize; ++i)
706  {
707  sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
708  }
709 
710  MFEM_FOREACH_THREAD(qy,y,Q1D)
711  {
712  MFEM_FOREACH_THREAD(qx,x,Q1D)
713  {
714  double u = 0.0;
715 
716  for (int dz = 0; dz < D1Dz; ++dz)
717  {
718  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
719  for (int dy = 0; dy < D1Dy; ++dy)
720  {
721  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
722  for (int dx = 0; dx < D1Dx; ++dx)
723  {
724  const double t = sX[dz][dy][dx];
725  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
726  u += t * wx * wy * wz;
727  }
728  }
729  }
730 
731  mass[qy][qx][c] = u;
732  } // qx
733  } // qy
734  } // tidz == qz
735 
736  osc += D1Dx * D1Dy * D1Dz;
737  MFEM_SYNC_THREAD;
738  } // c
739 
740  MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
741 
742  osc = 0;
743  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
744  {
745  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
746  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
747  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
748 
749  double dxyz = 0.0;
750 
751  MFEM_FOREACH_THREAD(dz,z,D1Dz)
752  {
753  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
754 
755  MFEM_FOREACH_THREAD(dy,y,D1Dy)
756  {
757  MFEM_FOREACH_THREAD(dx,x,D1Dx)
758  {
759  for (int qy = 0; qy < Q1D; ++qy)
760  {
761  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
762  for (int qx = 0; qx < Q1D; ++qx)
763  {
764  const int os = (dataSize*qx) + (dataSize*Q1D*qy);
765  const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
766  (symmetric ? 2 : 6))); // O11, O21, O31
767  const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
768  (symmetric ? 4 : 7))); // O12, O22, O32
769  const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
770  (symmetric ? 5 : 8))); // O13, O23, O33
771 
772  const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
773  (sop[id3] * mass[qy][qx][2]);
774 
775  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
776  dxyz += m_c * wx * wy * wz;
777  }
778  }
779  }
780  }
781  }
782 
783  MFEM_SYNC_THREAD;
784 
785  MFEM_FOREACH_THREAD(dz,z,D1Dz)
786  {
787  MFEM_FOREACH_THREAD(dy,y,D1Dy)
788  {
789  MFEM_FOREACH_THREAD(dx,x,D1Dx)
790  {
791  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
792  }
793  }
794  }
795 
796  osc += D1Dx * D1Dy * D1Dz;
797  } // c loop
798  } // qz
799  }); // end of element loop
800 }
801 
802 // PA H(curl) curl-curl assemble 2D kernel
803 static void PACurlCurlSetup2D(const int Q1D,
804  const int NE,
805  const Array<double> &w,
806  const Vector &j,
807  Vector &coeff,
808  Vector &op)
809 {
810  const int NQ = Q1D*Q1D;
811  auto W = w.Read();
812  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
813  auto C = Reshape(coeff.Read(), NQ, NE);
814  auto y = Reshape(op.Write(), NQ, NE);
815  MFEM_FORALL(e, NE,
816  {
817  for (int q = 0; q < NQ; ++q)
818  {
819  const double J11 = J(q,0,0,e);
820  const double J21 = J(q,1,0,e);
821  const double J12 = J(q,0,1,e);
822  const double J22 = J(q,1,1,e);
823  const double detJ = (J11*J22)-(J21*J12);
824  y(q,e) = W[q] * C(q,e) / detJ;
825  }
826  });
827 }
828 
829 // PA H(curl) curl-curl assemble 3D kernel
830 static void PACurlCurlSetup3D(const int Q1D,
831  const int coeffDim,
832  const int NE,
833  const Array<double> &w,
834  const Vector &j,
835  Vector &coeff,
836  Vector &op)
837 {
838  const int NQ = Q1D*Q1D*Q1D;
839  const bool symmetric = (coeffDim != 9);
840  auto W = w.Read();
841  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
842  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
843  auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
844 
845  MFEM_FORALL(e, NE,
846  {
847  for (int q = 0; q < NQ; ++q)
848  {
849  const double J11 = J(q,0,0,e);
850  const double J21 = J(q,1,0,e);
851  const double J31 = J(q,2,0,e);
852  const double J12 = J(q,0,1,e);
853  const double J22 = J(q,1,1,e);
854  const double J32 = J(q,2,1,e);
855  const double J13 = J(q,0,2,e);
856  const double J23 = J(q,1,2,e);
857  const double J33 = J(q,2,2,e);
858  const double detJ = J11 * (J22 * J33 - J32 * J23) -
859  /* */ J21 * (J12 * J33 - J32 * J13) +
860  /* */ J31 * (J12 * J23 - J22 * J13);
861 
862  const double c_detJ = W[q] / detJ;
863 
864  if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
865  {
866  // Set y to the 6 or 9 entries of J^T M J / det
867  const double M11 = C(0, q, e);
868  const double M12 = C(1, q, e);
869  const double M13 = C(2, q, e);
870  const double M21 = (!symmetric) ? C(3, q, e) : M12;
871  const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
872  const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
873  const double M31 = (!symmetric) ? C(6, q, e) : M13;
874  const double M32 = (!symmetric) ? C(7, q, e) : M23;
875  const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
876 
877  // First compute R = MJ
878  const double R11 = M11*J11 + M12*J21 + M13*J31;
879  const double R12 = M11*J12 + M12*J22 + M13*J32;
880  const double R13 = M11*J13 + M12*J23 + M13*J33;
881  const double R21 = M21*J11 + M22*J21 + M23*J31;
882  const double R22 = M21*J12 + M22*J22 + M23*J32;
883  const double R23 = M21*J13 + M22*J23 + M23*J33;
884  const double R31 = M31*J11 + M32*J21 + M33*J31;
885  const double R32 = M31*J12 + M32*J22 + M33*J32;
886  const double R33 = M31*J13 + M32*J23 + M33*J33;
887 
888  // Now set y to J^T R / det
889  y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
890  const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
891  y(q,1,e) = Y12; // 1,2
892  y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
893 
894  const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
895  const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
896  const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
897 
898  const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
899 
900  y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
901  y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
902  y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
903 
904  if (!symmetric)
905  {
906  y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
907  y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
908  y(q,8,e) = Y33; // 3,3
909  }
910  }
911  else // Vector or scalar coefficient version
912  {
913  // Set y to the 6 entries of J^T D J / det^2
914  const double D1 = C(0, q, e);
915  const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
916  const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
917 
918  y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
919  y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
920  y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
921  y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
922  y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
923  y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
924  }
925  }
926  });
927 }
928 
929 // PA H(curl)-L2 assemble 2D kernel
930 static void PACurlL2Setup2D(const int Q1D,
931  const int NE,
932  const Array<double> &w,
933  Vector &coeff,
934  Vector &op)
935 {
936  const int NQ = Q1D*Q1D;
937  auto W = w.Read();
938  auto C = Reshape(coeff.Read(), NQ, NE);
939  auto y = Reshape(op.Write(), NQ, NE);
940  MFEM_FORALL(e, NE,
941  {
942  for (int q = 0; q < NQ; ++q)
943  {
944  y(q,e) = W[q] * C(q,e);
945  }
946  });
947 }
948 
949 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
950 {
951  // Assumes tensor-product elements
952  Mesh *mesh = fes.GetMesh();
953  const FiniteElement *fel = fes.GetFE(0);
954 
955  const VectorTensorFiniteElement *el =
956  dynamic_cast<const VectorTensorFiniteElement*>(fel);
957  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
958 
959  const IntegrationRule *ir
960  = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
961  *mesh->GetElementTransformation(0));
962 
963  const int dims = el->GetDim();
964  MFEM_VERIFY(dims == 2 || dims == 3, "");
965 
966  nq = ir->GetNPoints();
967  dim = mesh->Dimension();
968  MFEM_VERIFY(dim == 2 || dim == 3, "");
969 
970  const int dimc = (dim == 3) ? 3 : 1;
971 
972  ne = fes.GetNE();
973  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
974  mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
975  mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
976  dofs1D = mapsC->ndof;
977  quad1D = mapsC->nqpt;
978 
979  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
980 
981  const int MQsymmDim = SMQ ? (SMQ->GetSize() * (SMQ->GetSize() + 1)) / 2 : 0;
982  const int MQfullDim = MQ ? (MQ->GetHeight() * MQ->GetWidth()) : 0;
983  const int MQdim = MQ ? MQfullDim : MQsymmDim;
984  const int coeffDim = (MQ || SMQ) ? MQdim : (DQ ? DQ->GetVDim() : 1);
985 
986  symmetric = (MQ == NULL);
987 
988  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
989  const int ndata = (dim == 2) ? 1 : (symmetric ? symmDims : MQfullDim);
990  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
991 
992  Vector coeff(coeffDim * ne * nq);
993  coeff = 1.0;
994  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
995  if (Q || DQ || MQ || SMQ)
996  {
997  Vector DM(DQ ? coeffDim : 0);
998  DenseMatrix GM;
999  DenseSymmetricMatrix SM;
1000 
1001  if (DQ)
1002  {
1003  MFEM_VERIFY(coeffDim == dimc, "");
1004  }
1005  if (MQ)
1006  {
1007  GM.SetSize(dimc);
1008  MFEM_VERIFY(coeffDim == MQdim, "");
1009  MFEM_VERIFY(MQ->GetHeight() == dimc && MQ->GetWidth() == dimc, "");
1010  }
1011  if (SMQ)
1012  {
1013  SM.SetSize(dimc);
1014  MFEM_VERIFY(SMQ->GetSize() == dimc, "");
1015  }
1016 
1017  for (int e=0; e<ne; ++e)
1018  {
1019  ElementTransformation *tr = mesh->GetElementTransformation(e);
1020  for (int p=0; p<nq; ++p)
1021  {
1022  if (MQ)
1023  {
1024  MQ->Eval(GM, *tr, ir->IntPoint(p));
1025 
1026  for (int i=0; i<dimc; ++i)
1027  for (int j=0; j<dimc; ++j)
1028  {
1029  coeffh(j+(i*dimc), p, e) = GM(i,j);
1030  }
1031 
1032  }
1033  else if (SMQ)
1034  {
1035  SMQ->Eval(SM, *tr, ir->IntPoint(p));
1036 
1037  int cnt = 0;
1038  for (int i=0; i<dimc; ++i)
1039  for (int j=i; j<dimc; ++j, ++cnt)
1040  {
1041  coeffh(cnt, p, e) = SM(i,j);
1042  }
1043 
1044  }
1045  else if (DQ)
1046  {
1047  DQ->Eval(DM, *tr, ir->IntPoint(p));
1048  for (int i=0; i<coeffDim; ++i)
1049  {
1050  coeffh(i, p, e) = DM[i];
1051  }
1052  }
1053  else
1054  {
1055  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
1056  }
1057  }
1058  }
1059  }
1060 
1061  if (el->GetDerivType() != mfem::FiniteElement::CURL)
1062  {
1063  MFEM_ABORT("Unknown kernel.");
1064  }
1065 
1066  if (dim == 3)
1067  {
1068  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
1069  pa_data);
1070  }
1071  else
1072  {
1073  PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1074  }
1075 }
1076 
1077 static void PACurlCurlApply2D(const int D1D,
1078  const int Q1D,
1079  const int NE,
1080  const Array<double> &bo,
1081  const Array<double> &bot,
1082  const Array<double> &gc,
1083  const Array<double> &gct,
1084  const Vector &pa_data,
1085  const Vector &x,
1086  Vector &y)
1087 {
1088  constexpr static int VDIM = 2;
1089  constexpr static int MAX_D1D = HCURL_MAX_D1D;
1090  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1091 
1092  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1093  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1094  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1095  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1096  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1097  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1098  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1099 
1100  MFEM_FORALL(e, NE,
1101  {
1102  double curl[MAX_Q1D][MAX_Q1D];
1103 
1104  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1105 
1106  for (int qy = 0; qy < Q1D; ++qy)
1107  {
1108  for (int qx = 0; qx < Q1D; ++qx)
1109  {
1110  curl[qy][qx] = 0.0;
1111  }
1112  }
1113 
1114  int osc = 0;
1115 
1116  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1117  {
1118  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1119  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1120 
1121  for (int dy = 0; dy < D1Dy; ++dy)
1122  {
1123  double gradX[MAX_Q1D];
1124  for (int qx = 0; qx < Q1D; ++qx)
1125  {
1126  gradX[qx] = 0;
1127  }
1128 
1129  for (int dx = 0; dx < D1Dx; ++dx)
1130  {
1131  const double t = X(dx + (dy * D1Dx) + osc, e);
1132  for (int qx = 0; qx < Q1D; ++qx)
1133  {
1134  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1135  }
1136  }
1137 
1138  for (int qy = 0; qy < Q1D; ++qy)
1139  {
1140  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1141  for (int qx = 0; qx < Q1D; ++qx)
1142  {
1143  curl[qy][qx] += gradX[qx] * wy;
1144  }
1145  }
1146  }
1147 
1148  osc += D1Dx * D1Dy;
1149  } // loop (c) over components
1150 
1151  // Apply D operator.
1152  for (int qy = 0; qy < Q1D; ++qy)
1153  {
1154  for (int qx = 0; qx < Q1D; ++qx)
1155  {
1156  curl[qy][qx] *= op(qx,qy,e);
1157  }
1158  }
1159 
1160  for (int qy = 0; qy < Q1D; ++qy)
1161  {
1162  osc = 0;
1163 
1164  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1165  {
1166  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1167  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1168 
1169  double gradX[MAX_D1D];
1170  for (int dx = 0; dx < D1Dx; ++dx)
1171  {
1172  gradX[dx] = 0.0;
1173  }
1174  for (int qx = 0; qx < Q1D; ++qx)
1175  {
1176  for (int dx = 0; dx < D1Dx; ++dx)
1177  {
1178  gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1179  }
1180  }
1181  for (int dy = 0; dy < D1Dy; ++dy)
1182  {
1183  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1184 
1185  for (int dx = 0; dx < D1Dx; ++dx)
1186  {
1187  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1188  }
1189  }
1190 
1191  osc += D1Dx * D1Dy;
1192  } // loop c
1193  } // loop qy
1194  }); // end of element loop
1195 }
1196 
1197 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1198 static void PACurlCurlApply3D(const int D1D,
1199  const int Q1D,
1200  const bool symmetric,
1201  const int NE,
1202  const Array<double> &bo,
1203  const Array<double> &bc,
1204  const Array<double> &bot,
1205  const Array<double> &bct,
1206  const Array<double> &gc,
1207  const Array<double> &gct,
1208  const Vector &pa_data,
1209  const Vector &x,
1210  Vector &y)
1211 {
1212  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1213  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1214  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1215  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1216  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1217  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1218  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1219 
1220  constexpr static int VDIM = 3;
1221 
1222  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1223  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1224  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1225  auto Bct = Reshape(bct.Read(), D1D, Q1D);
1226  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1227  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1228  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1229  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1230  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1231 
1232  MFEM_FORALL(e, NE,
1233  {
1234  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1235  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1236 
1237  for (int qz = 0; qz < Q1D; ++qz)
1238  {
1239  for (int qy = 0; qy < Q1D; ++qy)
1240  {
1241  for (int qx = 0; qx < Q1D; ++qx)
1242  {
1243  for (int c = 0; c < VDIM; ++c)
1244  {
1245  curl[qz][qy][qx][c] = 0.0;
1246  }
1247  }
1248  }
1249  }
1250 
1251  // We treat x, y, z components separately for optimization specific to each.
1252 
1253  int osc = 0;
1254 
1255  {
1256  // x component
1257  const int D1Dz = D1D;
1258  const int D1Dy = D1D;
1259  const int D1Dx = D1D - 1;
1260 
1261  for (int dz = 0; dz < D1Dz; ++dz)
1262  {
1263  double gradXY[MAX_Q1D][MAX_Q1D][2];
1264  for (int qy = 0; qy < Q1D; ++qy)
1265  {
1266  for (int qx = 0; qx < Q1D; ++qx)
1267  {
1268  for (int d = 0; d < 2; ++d)
1269  {
1270  gradXY[qy][qx][d] = 0.0;
1271  }
1272  }
1273  }
1274 
1275  for (int dy = 0; dy < D1Dy; ++dy)
1276  {
1277  double massX[MAX_Q1D];
1278  for (int qx = 0; qx < Q1D; ++qx)
1279  {
1280  massX[qx] = 0.0;
1281  }
1282 
1283  for (int dx = 0; dx < D1Dx; ++dx)
1284  {
1285  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1286  for (int qx = 0; qx < Q1D; ++qx)
1287  {
1288  massX[qx] += t * Bo(qx,dx);
1289  }
1290  }
1291 
1292  for (int qy = 0; qy < Q1D; ++qy)
1293  {
1294  const double wy = Bc(qy,dy);
1295  const double wDy = Gc(qy,dy);
1296  for (int qx = 0; qx < Q1D; ++qx)
1297  {
1298  const double wx = massX[qx];
1299  gradXY[qy][qx][0] += wx * wDy;
1300  gradXY[qy][qx][1] += wx * wy;
1301  }
1302  }
1303  }
1304 
1305  for (int qz = 0; qz < Q1D; ++qz)
1306  {
1307  const double wz = Bc(qz,dz);
1308  const double wDz = Gc(qz,dz);
1309  for (int qy = 0; qy < Q1D; ++qy)
1310  {
1311  for (int qx = 0; qx < Q1D; ++qx)
1312  {
1313  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1314  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1315  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1316  }
1317  }
1318  }
1319  }
1320 
1321  osc += D1Dx * D1Dy * D1Dz;
1322  }
1323 
1324  {
1325  // y component
1326  const int D1Dz = D1D;
1327  const int D1Dy = D1D - 1;
1328  const int D1Dx = D1D;
1329 
1330  for (int dz = 0; dz < D1Dz; ++dz)
1331  {
1332  double gradXY[MAX_Q1D][MAX_Q1D][2];
1333  for (int qy = 0; qy < Q1D; ++qy)
1334  {
1335  for (int qx = 0; qx < Q1D; ++qx)
1336  {
1337  for (int d = 0; d < 2; ++d)
1338  {
1339  gradXY[qy][qx][d] = 0.0;
1340  }
1341  }
1342  }
1343 
1344  for (int dx = 0; dx < D1Dx; ++dx)
1345  {
1346  double massY[MAX_Q1D];
1347  for (int qy = 0; qy < Q1D; ++qy)
1348  {
1349  massY[qy] = 0.0;
1350  }
1351 
1352  for (int dy = 0; dy < D1Dy; ++dy)
1353  {
1354  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1355  for (int qy = 0; qy < Q1D; ++qy)
1356  {
1357  massY[qy] += t * Bo(qy,dy);
1358  }
1359  }
1360 
1361  for (int qx = 0; qx < Q1D; ++qx)
1362  {
1363  const double wx = Bc(qx,dx);
1364  const double wDx = Gc(qx,dx);
1365  for (int qy = 0; qy < Q1D; ++qy)
1366  {
1367  const double wy = massY[qy];
1368  gradXY[qy][qx][0] += wDx * wy;
1369  gradXY[qy][qx][1] += wx * wy;
1370  }
1371  }
1372  }
1373 
1374  for (int qz = 0; qz < Q1D; ++qz)
1375  {
1376  const double wz = Bc(qz,dz);
1377  const double wDz = Gc(qz,dz);
1378  for (int qy = 0; qy < Q1D; ++qy)
1379  {
1380  for (int qx = 0; qx < Q1D; ++qx)
1381  {
1382  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1383  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1384  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1385  }
1386  }
1387  }
1388  }
1389 
1390  osc += D1Dx * D1Dy * D1Dz;
1391  }
1392 
1393  {
1394  // z component
1395  const int D1Dz = D1D - 1;
1396  const int D1Dy = D1D;
1397  const int D1Dx = D1D;
1398 
1399  for (int dx = 0; dx < D1Dx; ++dx)
1400  {
1401  double gradYZ[MAX_Q1D][MAX_Q1D][2];
1402  for (int qz = 0; qz < Q1D; ++qz)
1403  {
1404  for (int qy = 0; qy < Q1D; ++qy)
1405  {
1406  for (int d = 0; d < 2; ++d)
1407  {
1408  gradYZ[qz][qy][d] = 0.0;
1409  }
1410  }
1411  }
1412 
1413  for (int dy = 0; dy < D1Dy; ++dy)
1414  {
1415  double massZ[MAX_Q1D];
1416  for (int qz = 0; qz < Q1D; ++qz)
1417  {
1418  massZ[qz] = 0.0;
1419  }
1420 
1421  for (int dz = 0; dz < D1Dz; ++dz)
1422  {
1423  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1424  for (int qz = 0; qz < Q1D; ++qz)
1425  {
1426  massZ[qz] += t * Bo(qz,dz);
1427  }
1428  }
1429 
1430  for (int qy = 0; qy < Q1D; ++qy)
1431  {
1432  const double wy = Bc(qy,dy);
1433  const double wDy = Gc(qy,dy);
1434  for (int qz = 0; qz < Q1D; ++qz)
1435  {
1436  const double wz = massZ[qz];
1437  gradYZ[qz][qy][0] += wz * wy;
1438  gradYZ[qz][qy][1] += wz * wDy;
1439  }
1440  }
1441  }
1442 
1443  for (int qx = 0; qx < Q1D; ++qx)
1444  {
1445  const double wx = Bc(qx,dx);
1446  const double wDx = Gc(qx,dx);
1447 
1448  for (int qy = 0; qy < Q1D; ++qy)
1449  {
1450  for (int qz = 0; qz < Q1D; ++qz)
1451  {
1452  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1453  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1454  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1455  }
1456  }
1457  }
1458  }
1459  }
1460 
1461  // Apply D operator.
1462  for (int qz = 0; qz < Q1D; ++qz)
1463  {
1464  for (int qy = 0; qy < Q1D; ++qy)
1465  {
1466  for (int qx = 0; qx < Q1D; ++qx)
1467  {
1468  const double O11 = op(qx,qy,qz,0,e);
1469  const double O12 = op(qx,qy,qz,1,e);
1470  const double O13 = op(qx,qy,qz,2,e);
1471  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1472  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1473  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1474  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1475  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1476  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1477 
1478  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1479  (O13 * curl[qz][qy][qx][2]);
1480  const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1481  (O23 * curl[qz][qy][qx][2]);
1482  const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1483  (O33 * curl[qz][qy][qx][2]);
1484 
1485  curl[qz][qy][qx][0] = c1;
1486  curl[qz][qy][qx][1] = c2;
1487  curl[qz][qy][qx][2] = c3;
1488  }
1489  }
1490  }
1491 
1492  // x component
1493  osc = 0;
1494  {
1495  const int D1Dz = D1D;
1496  const int D1Dy = D1D;
1497  const int D1Dx = D1D - 1;
1498 
1499  for (int qz = 0; qz < Q1D; ++qz)
1500  {
1501  double gradXY12[MAX_D1D][MAX_D1D];
1502  double gradXY21[MAX_D1D][MAX_D1D];
1503 
1504  for (int dy = 0; dy < D1Dy; ++dy)
1505  {
1506  for (int dx = 0; dx < D1Dx; ++dx)
1507  {
1508  gradXY12[dy][dx] = 0.0;
1509  gradXY21[dy][dx] = 0.0;
1510  }
1511  }
1512  for (int qy = 0; qy < Q1D; ++qy)
1513  {
1514  double massX[MAX_D1D][2];
1515  for (int dx = 0; dx < D1Dx; ++dx)
1516  {
1517  for (int n = 0; n < 2; ++n)
1518  {
1519  massX[dx][n] = 0.0;
1520  }
1521  }
1522  for (int qx = 0; qx < Q1D; ++qx)
1523  {
1524  for (int dx = 0; dx < D1Dx; ++dx)
1525  {
1526  const double wx = Bot(dx,qx);
1527 
1528  massX[dx][0] += wx * curl[qz][qy][qx][1];
1529  massX[dx][1] += wx * curl[qz][qy][qx][2];
1530  }
1531  }
1532  for (int dy = 0; dy < D1Dy; ++dy)
1533  {
1534  const double wy = Bct(dy,qy);
1535  const double wDy = Gct(dy,qy);
1536 
1537  for (int dx = 0; dx < D1Dx; ++dx)
1538  {
1539  gradXY21[dy][dx] += massX[dx][0] * wy;
1540  gradXY12[dy][dx] += massX[dx][1] * wDy;
1541  }
1542  }
1543  }
1544 
1545  for (int dz = 0; dz < D1Dz; ++dz)
1546  {
1547  const double wz = Bct(dz,qz);
1548  const double wDz = Gct(dz,qz);
1549  for (int dy = 0; dy < D1Dy; ++dy)
1550  {
1551  for (int dx = 0; dx < D1Dx; ++dx)
1552  {
1553  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1554  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1555  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1556  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1557  }
1558  }
1559  }
1560  } // loop qz
1561 
1562  osc += D1Dx * D1Dy * D1Dz;
1563  }
1564 
1565  // y component
1566  {
1567  const int D1Dz = D1D;
1568  const int D1Dy = D1D - 1;
1569  const int D1Dx = D1D;
1570 
1571  for (int qz = 0; qz < Q1D; ++qz)
1572  {
1573  double gradXY02[MAX_D1D][MAX_D1D];
1574  double gradXY20[MAX_D1D][MAX_D1D];
1575 
1576  for (int dy = 0; dy < D1Dy; ++dy)
1577  {
1578  for (int dx = 0; dx < D1Dx; ++dx)
1579  {
1580  gradXY02[dy][dx] = 0.0;
1581  gradXY20[dy][dx] = 0.0;
1582  }
1583  }
1584  for (int qx = 0; qx < Q1D; ++qx)
1585  {
1586  double massY[MAX_D1D][2];
1587  for (int dy = 0; dy < D1Dy; ++dy)
1588  {
1589  massY[dy][0] = 0.0;
1590  massY[dy][1] = 0.0;
1591  }
1592  for (int qy = 0; qy < Q1D; ++qy)
1593  {
1594  for (int dy = 0; dy < D1Dy; ++dy)
1595  {
1596  const double wy = Bot(dy,qy);
1597 
1598  massY[dy][0] += wy * curl[qz][qy][qx][2];
1599  massY[dy][1] += wy * curl[qz][qy][qx][0];
1600  }
1601  }
1602  for (int dx = 0; dx < D1Dx; ++dx)
1603  {
1604  const double wx = Bct(dx,qx);
1605  const double wDx = Gct(dx,qx);
1606 
1607  for (int dy = 0; dy < D1Dy; ++dy)
1608  {
1609  gradXY02[dy][dx] += massY[dy][0] * wDx;
1610  gradXY20[dy][dx] += massY[dy][1] * wx;
1611  }
1612  }
1613  }
1614 
1615  for (int dz = 0; dz < D1Dz; ++dz)
1616  {
1617  const double wz = Bct(dz,qz);
1618  const double wDz = Gct(dz,qz);
1619  for (int dy = 0; dy < D1Dy; ++dy)
1620  {
1621  for (int dx = 0; dx < D1Dx; ++dx)
1622  {
1623  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1624  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1625  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1626  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1627  }
1628  }
1629  }
1630  } // loop qz
1631 
1632  osc += D1Dx * D1Dy * D1Dz;
1633  }
1634 
1635  // z component
1636  {
1637  const int D1Dz = D1D - 1;
1638  const int D1Dy = D1D;
1639  const int D1Dx = D1D;
1640 
1641  for (int qx = 0; qx < Q1D; ++qx)
1642  {
1643  double gradYZ01[MAX_D1D][MAX_D1D];
1644  double gradYZ10[MAX_D1D][MAX_D1D];
1645 
1646  for (int dy = 0; dy < D1Dy; ++dy)
1647  {
1648  for (int dz = 0; dz < D1Dz; ++dz)
1649  {
1650  gradYZ01[dz][dy] = 0.0;
1651  gradYZ10[dz][dy] = 0.0;
1652  }
1653  }
1654  for (int qy = 0; qy < Q1D; ++qy)
1655  {
1656  double massZ[MAX_D1D][2];
1657  for (int dz = 0; dz < D1Dz; ++dz)
1658  {
1659  for (int n = 0; n < 2; ++n)
1660  {
1661  massZ[dz][n] = 0.0;
1662  }
1663  }
1664  for (int qz = 0; qz < Q1D; ++qz)
1665  {
1666  for (int dz = 0; dz < D1Dz; ++dz)
1667  {
1668  const double wz = Bot(dz,qz);
1669 
1670  massZ[dz][0] += wz * curl[qz][qy][qx][0];
1671  massZ[dz][1] += wz * curl[qz][qy][qx][1];
1672  }
1673  }
1674  for (int dy = 0; dy < D1Dy; ++dy)
1675  {
1676  const double wy = Bct(dy,qy);
1677  const double wDy = Gct(dy,qy);
1678 
1679  for (int dz = 0; dz < D1Dz; ++dz)
1680  {
1681  gradYZ01[dz][dy] += wy * massZ[dz][1];
1682  gradYZ10[dz][dy] += wDy * massZ[dz][0];
1683  }
1684  }
1685  }
1686 
1687  for (int dx = 0; dx < D1Dx; ++dx)
1688  {
1689  const double wx = Bct(dx,qx);
1690  const double wDx = Gct(dx,qx);
1691 
1692  for (int dy = 0; dy < D1Dy; ++dy)
1693  {
1694  for (int dz = 0; dz < D1Dz; ++dz)
1695  {
1696  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1697  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1698  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1699  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1700  }
1701  }
1702  }
1703  } // loop qx
1704  }
1705  }); // end of element loop
1706 }
1707 
1708 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1709 static void SmemPACurlCurlApply3D(const int D1D,
1710  const int Q1D,
1711  const bool symmetric,
1712  const int NE,
1713  const Array<double> &bo,
1714  const Array<double> &bc,
1715  const Array<double> &bot,
1716  const Array<double> &bct,
1717  const Array<double> &gc,
1718  const Array<double> &gct,
1719  const Vector &pa_data,
1720  const Vector &x,
1721  Vector &y)
1722 {
1723  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1724  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1725  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1726  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1727  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1728  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1729  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1730 
1731  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1732  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1733  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1734  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1735  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1736  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1737 
1738  const int s = symmetric ? 6 : 9;
1739 
1740  auto device_kernel = [=] MFEM_DEVICE (int e)
1741  {
1742  constexpr int VDIM = 3;
1743 
1744  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1745  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1746  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1747 
1748  double ope[9];
1749  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1750  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1751 
1752  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1753 
1754  MFEM_FOREACH_THREAD(qx,x,Q1D)
1755  {
1756  MFEM_FOREACH_THREAD(qy,y,Q1D)
1757  {
1758  MFEM_FOREACH_THREAD(qz,z,Q1D)
1759  {
1760  for (int i=0; i<s; ++i)
1761  {
1762  ope[i] = op(qx,qy,qz,i,e);
1763  }
1764  }
1765  }
1766  }
1767 
1768  const int tidx = MFEM_THREAD_ID(x);
1769  const int tidy = MFEM_THREAD_ID(y);
1770  const int tidz = MFEM_THREAD_ID(z);
1771 
1772  if (tidz == 0)
1773  {
1774  MFEM_FOREACH_THREAD(d,y,D1D)
1775  {
1776  MFEM_FOREACH_THREAD(q,x,Q1D)
1777  {
1778  sBc[d][q] = Bc(q,d);
1779  sGc[d][q] = Gc(q,d);
1780  if (d < D1D-1)
1781  {
1782  sBo[d][q] = Bo(q,d);
1783  }
1784  }
1785  }
1786  }
1787  MFEM_SYNC_THREAD;
1788 
1789  for (int qz=0; qz < Q1D; ++qz)
1790  {
1791  if (tidz == qz)
1792  {
1793  MFEM_FOREACH_THREAD(qy,y,Q1D)
1794  {
1795  MFEM_FOREACH_THREAD(qx,x,Q1D)
1796  {
1797  for (int i=0; i<3; ++i)
1798  {
1799  curl[qy][qx][i] = 0.0;
1800  }
1801  }
1802  }
1803  }
1804 
1805  int osc = 0;
1806  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1807  {
1808  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1809  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1810  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1811 
1812  MFEM_FOREACH_THREAD(dz,z,D1Dz)
1813  {
1814  MFEM_FOREACH_THREAD(dy,y,D1Dy)
1815  {
1816  MFEM_FOREACH_THREAD(dx,x,D1Dx)
1817  {
1818  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1819  }
1820  }
1821  }
1822  MFEM_SYNC_THREAD;
1823 
1824  if (tidz == qz)
1825  {
1826  if (c == 0)
1827  {
1828  for (int i=0; i<s; ++i)
1829  {
1830  sop[i][tidx][tidy] = ope[i];
1831  }
1832  }
1833 
1834  MFEM_FOREACH_THREAD(qy,y,Q1D)
1835  {
1836  MFEM_FOREACH_THREAD(qx,x,Q1D)
1837  {
1838  double u = 0.0;
1839  double v = 0.0;
1840 
1841  // We treat x, y, z components separately for optimization specific to each.
1842  if (c == 0) // x component
1843  {
1844  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1845 
1846  for (int dz = 0; dz < D1Dz; ++dz)
1847  {
1848  const double wz = sBc[dz][qz];
1849  const double wDz = sGc[dz][qz];
1850 
1851  for (int dy = 0; dy < D1Dy; ++dy)
1852  {
1853  const double wy = sBc[dy][qy];
1854  const double wDy = sGc[dy][qy];
1855 
1856  for (int dx = 0; dx < D1Dx; ++dx)
1857  {
1858  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1859  u += wx * wDy * wz;
1860  v += wx * wy * wDz;
1861  }
1862  }
1863  }
1864 
1865  curl[qy][qx][1] += v; // (u_0)_{x_2}
1866  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1867  }
1868  else if (c == 1) // y component
1869  {
1870  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1871 
1872  for (int dz = 0; dz < D1Dz; ++dz)
1873  {
1874  const double wz = sBc[dz][qz];
1875  const double wDz = sGc[dz][qz];
1876 
1877  for (int dy = 0; dy < D1Dy; ++dy)
1878  {
1879  const double wy = sBo[dy][qy];
1880 
1881  for (int dx = 0; dx < D1Dx; ++dx)
1882  {
1883  const double t = sX[dz][dy][dx];
1884  const double wx = t * sBc[dx][qx];
1885  const double wDx = t * sGc[dx][qx];
1886 
1887  u += wDx * wy * wz;
1888  v += wx * wy * wDz;
1889  }
1890  }
1891  }
1892 
1893  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1894  curl[qy][qx][2] += u; // (u_1)_{x_0}
1895  }
1896  else // z component
1897  {
1898  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1899 
1900  for (int dz = 0; dz < D1Dz; ++dz)
1901  {
1902  const double wz = sBo[dz][qz];
1903 
1904  for (int dy = 0; dy < D1Dy; ++dy)
1905  {
1906  const double wy = sBc[dy][qy];
1907  const double wDy = sGc[dy][qy];
1908 
1909  for (int dx = 0; dx < D1Dx; ++dx)
1910  {
1911  const double t = sX[dz][dy][dx];
1912  const double wx = t * sBc[dx][qx];
1913  const double wDx = t * sGc[dx][qx];
1914 
1915  u += wDx * wy * wz;
1916  v += wx * wDy * wz;
1917  }
1918  }
1919  }
1920 
1921  curl[qy][qx][0] += v; // (u_2)_{x_1}
1922  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1923  }
1924  } // qx
1925  } // qy
1926  } // tidz == qz
1927 
1928  osc += D1Dx * D1Dy * D1Dz;
1929  MFEM_SYNC_THREAD;
1930  } // c
1931 
1932  double dxyz1 = 0.0;
1933  double dxyz2 = 0.0;
1934  double dxyz3 = 0.0;
1935 
1936  MFEM_FOREACH_THREAD(dz,z,D1D)
1937  {
1938  const double wcz = sBc[dz][qz];
1939  const double wcDz = sGc[dz][qz];
1940  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1941 
1942  MFEM_FOREACH_THREAD(dy,y,D1D)
1943  {
1944  MFEM_FOREACH_THREAD(dx,x,D1D)
1945  {
1946  for (int qy = 0; qy < Q1D; ++qy)
1947  {
1948  const double wcy = sBc[dy][qy];
1949  const double wcDy = sGc[dy][qy];
1950  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1951 
1952  for (int qx = 0; qx < Q1D; ++qx)
1953  {
1954  const double O11 = sop[0][qx][qy];
1955  const double O12 = sop[1][qx][qy];
1956  const double O13 = sop[2][qx][qy];
1957  const double O21 = symmetric ? O12 : sop[3][qx][qy];
1958  const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1959  const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1960  const double O31 = symmetric ? O13 : sop[6][qx][qy];
1961  const double O32 = symmetric ? O23 : sop[7][qx][qy];
1962  const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1963 
1964  const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1965  (O13 * curl[qy][qx][2]);
1966  const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1967  (O23 * curl[qy][qx][2]);
1968  const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1969  (O33 * curl[qy][qx][2]);
1970 
1971  const double wcx = sBc[dx][qx];
1972  const double wDx = sGc[dx][qx];
1973 
1974  if (dx < D1D-1)
1975  {
1976  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1977  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1978  const double wx = sBo[dx][qx];
1979  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1980  }
1981 
1982  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1983  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1984  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1985 
1986  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1987  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1988  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1989  } // qx
1990  } // qy
1991  } // dx
1992  } // dy
1993  } // dz
1994 
1995  MFEM_SYNC_THREAD;
1996 
1997  MFEM_FOREACH_THREAD(dz,z,D1D)
1998  {
1999  MFEM_FOREACH_THREAD(dy,y,D1D)
2000  {
2001  MFEM_FOREACH_THREAD(dx,x,D1D)
2002  {
2003  if (dx < D1D-1)
2004  {
2005  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
2006  }
2007  if (dy < D1D-1)
2008  {
2009  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
2010  }
2011  if (dz < D1D-1)
2012  {
2013  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
2014  }
2015  }
2016  }
2017  }
2018  } // qz
2019  }; // end of element loop
2020 
2021  auto host_kernel = [&] MFEM_LAMBDA (int)
2022  {
2023  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
2024  };
2025 
2026  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
2027 }
2028 
2029 static void PACurlL2Apply2D(const int D1D,
2030  const int D1Dtest,
2031  const int Q1D,
2032  const int NE,
2033  const Array<double> &bo,
2034  const Array<double> &bot,
2035  const Array<double> &bt,
2036  const Array<double> &gc,
2037  const Vector &pa_data,
2038  const Vector &x, // trial = H(curl)
2039  Vector &y) // test = L2 or H1
2040 {
2041  constexpr static int VDIM = 2;
2042  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2043  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2044  const int H1 = (D1Dtest == D1D);
2045 
2046  MFEM_VERIFY(y.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension");
2047 
2048  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2049  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2050  auto Bt = Reshape(bt.Read(), D1D, Q1D);
2051  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2052  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2053  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
2054  auto Y = Reshape(y.ReadWrite(), D1Dtest, D1Dtest, NE);
2055 
2056  MFEM_FORALL(e, NE,
2057  {
2058  double curl[MAX_Q1D][MAX_Q1D];
2059 
2060  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
2061 
2062  for (int qy = 0; qy < Q1D; ++qy)
2063  {
2064  for (int qx = 0; qx < Q1D; ++qx)
2065  {
2066  curl[qy][qx] = 0.0;
2067  }
2068  }
2069 
2070  int osc = 0;
2071 
2072  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2073  {
2074  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2075  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2076 
2077  for (int dy = 0; dy < D1Dy; ++dy)
2078  {
2079  double gradX[MAX_Q1D];
2080  for (int qx = 0; qx < Q1D; ++qx)
2081  {
2082  gradX[qx] = 0;
2083  }
2084 
2085  for (int dx = 0; dx < D1Dx; ++dx)
2086  {
2087  const double t = X(dx + (dy * D1Dx) + osc, e);
2088  for (int qx = 0; qx < Q1D; ++qx)
2089  {
2090  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2091  }
2092  }
2093 
2094  for (int qy = 0; qy < Q1D; ++qy)
2095  {
2096  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
2097  for (int qx = 0; qx < Q1D; ++qx)
2098  {
2099  curl[qy][qx] += gradX[qx] * wy;
2100  }
2101  }
2102  }
2103 
2104  osc += D1Dx * D1Dy;
2105  } // loop (c) over components
2106 
2107  // Apply D operator.
2108  for (int qy = 0; qy < Q1D; ++qy)
2109  {
2110  for (int qx = 0; qx < Q1D; ++qx)
2111  {
2112  curl[qy][qx] *= op(qx,qy,e);
2113  }
2114  }
2115 
2116  for (int qy = 0; qy < Q1D; ++qy)
2117  {
2118  double sol_x[MAX_D1D];
2119  for (int dx = 0; dx < D1Dtest; ++dx)
2120  {
2121  sol_x[dx] = 0.0;
2122  }
2123  for (int qx = 0; qx < Q1D; ++qx)
2124  {
2125  const double s = curl[qy][qx];
2126  for (int dx = 0; dx < D1Dtest; ++dx)
2127  {
2128  sol_x[dx] += s * ((H1 == 1) ? Bt(dx,qx) : Bot(dx,qx));
2129  }
2130  }
2131  for (int dy = 0; dy < D1Dtest; ++dy)
2132  {
2133  const double wy = (H1 == 1) ? Bt(dy,qy) : Bot(dy,qy);
2134 
2135  for (int dx = 0; dx < D1Dtest; ++dx)
2136  {
2137  Y(dx,dy,e) += sol_x[dx] * wy;
2138  }
2139  }
2140  } // loop qy
2141  }); // end of element loop
2142 }
2143 
2144 static void PACurlL2ApplyTranspose2D(const int D1D,
2145  const int D1Dtest,
2146  const int Q1D,
2147  const int NE,
2148  const Array<double> &bo,
2149  const Array<double> &bot,
2150  const Array<double> &b,
2151  const Array<double> &gct,
2152  const Vector &pa_data,
2153  const Vector &x, // trial = H(curl)
2154  Vector &y) // test = L2 or H1
2155 {
2156  constexpr static int VDIM = 2;
2157  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2158  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2159  const int H1 = (D1Dtest == D1D);
2160 
2161  MFEM_VERIFY(x.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension");
2162 
2163  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2164  auto B = Reshape(b.Read(), Q1D, D1D);
2165  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2166  auto Gct = Reshape(gct.Read(), D1D, Q1D);
2167  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2168  auto X = Reshape(x.Read(), D1Dtest, D1Dtest, NE);
2169  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2170 
2171  MFEM_FORALL(e, NE,
2172  {
2173  double mass[MAX_Q1D][MAX_Q1D];
2174 
2175  // Zero-order term in L2 or H1 test space
2176 
2177  for (int qy = 0; qy < Q1D; ++qy)
2178  {
2179  for (int qx = 0; qx < Q1D; ++qx)
2180  {
2181  mass[qy][qx] = 0.0;
2182  }
2183  }
2184 
2185  for (int dy = 0; dy < D1Dtest; ++dy)
2186  {
2187  double sol_x[MAX_Q1D];
2188  for (int qy = 0; qy < Q1D; ++qy)
2189  {
2190  sol_x[qy] = 0.0;
2191  }
2192  for (int dx = 0; dx < D1Dtest; ++dx)
2193  {
2194  const double s = X(dx,dy,e);
2195  for (int qx = 0; qx < Q1D; ++qx)
2196  {
2197  sol_x[qx] += s * ((H1 == 1) ? B(qx,dx) : Bo(qx,dx));
2198  }
2199  }
2200  for (int qy = 0; qy < Q1D; ++qy)
2201  {
2202  const double d2q = (H1 == 1) ? B(qy,dy) : Bo(qy,dy);
2203  for (int qx = 0; qx < Q1D; ++qx)
2204  {
2205  mass[qy][qx] += d2q * sol_x[qx];
2206  }
2207  }
2208  }
2209 
2210  // Apply D operator.
2211  for (int qy = 0; qy < Q1D; ++qy)
2212  {
2213  for (int qx = 0; qx < Q1D; ++qx)
2214  {
2215  mass[qy][qx] *= op(qx,qy,e);
2216  }
2217  }
2218 
2219  for (int qy = 0; qy < Q1D; ++qy)
2220  {
2221  int osc = 0;
2222 
2223  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2224  {
2225  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2226  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2227 
2228  double gradX[MAX_D1D];
2229  for (int dx = 0; dx < D1Dx; ++dx)
2230  {
2231  gradX[dx] = 0.0;
2232  }
2233  for (int qx = 0; qx < Q1D; ++qx)
2234  {
2235  for (int dx = 0; dx < D1Dx; ++dx)
2236  {
2237  gradX[dx] += mass[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
2238  }
2239  }
2240  for (int dy = 0; dy < D1Dy; ++dy)
2241  {
2242  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
2243 
2244  for (int dx = 0; dx < D1Dx; ++dx)
2245  {
2246  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
2247  }
2248  }
2249 
2250  osc += D1Dx * D1Dy;
2251  } // loop c
2252  } // loop qy
2253  }); // end of element loop
2254 }
2255 
2256 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
2257 {
2258  if (dim == 3)
2259  {
2260  if (Device::Allows(Backend::DEVICE_MASK))
2261  {
2262  const int ID = (dofs1D << 4) | quad1D;
2263  switch (ID)
2264  {
2265  case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2266  mapsO->B, mapsC->B, mapsO->Bt,
2267  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2268  case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2269  mapsO->B, mapsC->B, mapsO->Bt,
2270  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2271  case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2272  mapsO->B,
2273  mapsC->B, mapsO->Bt,
2274  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2275  case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2276  mapsO->B, mapsC->B, mapsO->Bt,
2277  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2278  default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2279  mapsC->B, mapsO->Bt, mapsC->Bt,
2280  mapsC->G, mapsC->Gt, pa_data, x, y);
2281  }
2282  }
2283  else
2284  PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2285  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2286  }
2287  else if (dim == 2)
2288  {
2289  PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2290  mapsC->G, mapsC->Gt, pa_data, x, y);
2291  }
2292  else
2293  {
2294  MFEM_ABORT("Unsupported dimension!");
2295  }
2296 }
2297 
2298 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2299  const int Q1D,
2300  const int NE,
2301  const Array<double> &bo,
2302  const Array<double> &gc,
2303  const Vector &pa_data,
2304  Vector &diag)
2305 {
2306  constexpr static int VDIM = 2;
2307  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2308 
2309  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2310  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2311  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2312  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2313 
2314  MFEM_FORALL(e, NE,
2315  {
2316  int osc = 0;
2317 
2318  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2319  {
2320  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2321  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2322 
2323  double t[MAX_Q1D];
2324 
2325  for (int dy = 0; dy < D1Dy; ++dy)
2326  {
2327  for (int qx = 0; qx < Q1D; ++qx)
2328  {
2329  t[qx] = 0.0;
2330  for (int qy = 0; qy < Q1D; ++qy)
2331  {
2332  const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2333  t[qx] += wy * wy * op(qx,qy,e);
2334  }
2335  }
2336 
2337  for (int dx = 0; dx < D1Dx; ++dx)
2338  {
2339  for (int qx = 0; qx < Q1D; ++qx)
2340  {
2341  const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2342  D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2343  }
2344  }
2345  }
2346 
2347  osc += D1Dx * D1Dy;
2348  } // loop c
2349  }); // end of element loop
2350 }
2351 
2352 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2353 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2354  const int Q1D,
2355  const bool symmetric,
2356  const int NE,
2357  const Array<double> &bo,
2358  const Array<double> &bc,
2359  const Array<double> &go,
2360  const Array<double> &gc,
2361  const Vector &pa_data,
2362  Vector &diag)
2363 {
2364  constexpr static int VDIM = 3;
2365  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2366  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2367 
2368  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2369  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2370  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2371  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2372  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2373  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2374 
2375  const int s = symmetric ? 6 : 9;
2376  const int i11 = 0;
2377  const int i12 = 1;
2378  const int i13 = 2;
2379  const int i21 = symmetric ? i12 : 3;
2380  const int i22 = symmetric ? 3 : 4;
2381  const int i23 = symmetric ? 4 : 5;
2382  const int i31 = symmetric ? i13 : 6;
2383  const int i32 = symmetric ? i23 : 7;
2384  const int i33 = symmetric ? 5 : 8;
2385 
2386  MFEM_FORALL(e, NE,
2387  {
2388  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2389  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2390  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2391  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2392  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2393 
2394  // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2395  // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2396 
2397  int osc = 0;
2398 
2399  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2400  {
2401  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2402  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2403  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2404 
2405  double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2406 
2407  // z contraction
2408  for (int qx = 0; qx < Q1D; ++qx)
2409  {
2410  for (int qy = 0; qy < Q1D; ++qy)
2411  {
2412  for (int dz = 0; dz < D1Dz; ++dz)
2413  {
2414  for (int i=0; i<s; ++i)
2415  {
2416  for (int d=0; d<3; ++d)
2417  {
2418  zt[qx][qy][dz][i][d] = 0.0;
2419  }
2420  }
2421 
2422  for (int qz = 0; qz < Q1D; ++qz)
2423  {
2424  const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2425  const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2426 
2427  for (int i=0; i<s; ++i)
2428  {
2429  zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2430  zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2431  zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2432  }
2433  }
2434  }
2435  }
2436  } // end of z contraction
2437 
2438  double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2439 
2440  // y contraction
2441  for (int qx = 0; qx < Q1D; ++qx)
2442  {
2443  for (int dz = 0; dz < D1Dz; ++dz)
2444  {
2445  for (int dy = 0; dy < D1Dy; ++dy)
2446  {
2447  for (int i=0; i<s; ++i)
2448  {
2449  for (int d=0; d<3; ++d)
2450  for (int j=0; j<3; ++j)
2451  {
2452  yt[qx][dy][dz][i][d][j] = 0.0;
2453  }
2454  }
2455 
2456  for (int qy = 0; qy < Q1D; ++qy)
2457  {
2458  const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2459  const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2460 
2461  for (int i=0; i<s; ++i)
2462  {
2463  for (int d=0; d<3; ++d)
2464  {
2465  yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2466  yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2467  yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2468  }
2469  }
2470  }
2471  }
2472  }
2473  } // end of y contraction
2474 
2475  // x contraction
2476  for (int dz = 0; dz < D1Dz; ++dz)
2477  {
2478  for (int dy = 0; dy < D1Dy; ++dy)
2479  {
2480  for (int dx = 0; dx < D1Dx; ++dx)
2481  {
2482  for (int qx = 0; qx < Q1D; ++qx)
2483  {
2484  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2485  const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2486 
2487  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2488  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2489  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2490  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2491  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2492 
2493  /*
2494  const double O11 = op(q,0,e);
2495  const double O12 = op(q,1,e);
2496  const double O13 = op(q,2,e);
2497  const double O22 = op(q,3,e);
2498  const double O23 = op(q,4,e);
2499  const double O33 = op(q,5,e);
2500  */
2501 
2502  if (c == 0)
2503  {
2504  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2505  const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2506  - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2507 
2508  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2509  }
2510  else if (c == 1)
2511  {
2512  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2513  const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2514  - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2515  + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2516 
2517  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2518  }
2519  else
2520  {
2521  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2522  const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2523  - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2524  + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2525 
2526  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2527  }
2528  }
2529  }
2530  }
2531  } // end of x contraction
2532 
2533  osc += D1Dx * D1Dy * D1Dz;
2534  } // loop c
2535  }); // end of element loop
2536 }
2537 
2538 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2539 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2540  const int Q1D,
2541  const bool symmetric,
2542  const int NE,
2543  const Array<double> &bo,
2544  const Array<double> &bc,
2545  const Array<double> &go,
2546  const Array<double> &gc,
2547  const Vector &pa_data,
2548  Vector &diag)
2549 {
2550  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2551  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2552 
2553  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2554  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2555  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2556  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2557  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2558  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2559 
2560  const int s = symmetric ? 6 : 9;
2561  const int i11 = 0;
2562  const int i12 = 1;
2563  const int i13 = 2;
2564  const int i21 = symmetric ? i12 : 3;
2565  const int i22 = symmetric ? 3 : 4;
2566  const int i23 = symmetric ? 4 : 5;
2567  const int i31 = symmetric ? i13 : 6;
2568  const int i32 = symmetric ? i23 : 7;
2569  const int i33 = symmetric ? 5 : 8;
2570 
2571  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2572  {
2573  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2574  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2575  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2576  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2577  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2578 
2579  constexpr int VDIM = 3;
2580 
2581  MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2582  MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2583  MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2584  MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2585 
2586  double ope[9];
2587  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2588 
2589  MFEM_FOREACH_THREAD(qx,x,Q1D)
2590  {
2591  MFEM_FOREACH_THREAD(qy,y,Q1D)
2592  {
2593  MFEM_FOREACH_THREAD(qz,z,Q1D)
2594  {
2595  for (int i=0; i<s; ++i)
2596  {
2597  ope[i] = op(qx,qy,qz,i,e);
2598  }
2599  }
2600  }
2601  }
2602 
2603  const int tidx = MFEM_THREAD_ID(x);
2604  const int tidy = MFEM_THREAD_ID(y);
2605  const int tidz = MFEM_THREAD_ID(z);
2606 
2607  if (tidz == 0)
2608  {
2609  MFEM_FOREACH_THREAD(d,y,D1D)
2610  {
2611  MFEM_FOREACH_THREAD(q,x,Q1D)
2612  {
2613  sBc[q][d] = Bc(q,d);
2614  sGc[q][d] = Gc(q,d);
2615  if (d < D1D-1)
2616  {
2617  sBo[q][d] = Bo(q,d);
2618  sGo[q][d] = Go(q,d);
2619  }
2620  }
2621  }
2622  }
2623  MFEM_SYNC_THREAD;
2624 
2625  int osc = 0;
2626  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2627  {
2628  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2629  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2630  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2631 
2632  double dxyz = 0.0;
2633 
2634  for (int qz=0; qz < Q1D; ++qz)
2635  {
2636  if (tidz == qz)
2637  {
2638  for (int i=0; i<s; ++i)
2639  {
2640  sop[i][tidx][tidy] = ope[i];
2641  }
2642  }
2643 
2644  MFEM_SYNC_THREAD;
2645 
2646  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2647  {
2648  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2649  const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2650 
2651  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2652  {
2653  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2654  {
2655  for (int qy = 0; qy < Q1D; ++qy)
2656  {
2657  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2658  const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2659 
2660  for (int qx = 0; qx < Q1D; ++qx)
2661  {
2662  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2663  const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2664 
2665  if (c == 0)
2666  {
2667  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2668 
2669  // (u_0)_{x_2} O22 (u_0)_{x_2}
2670  dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2671 
2672  // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2673  dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2674 
2675  // (u_0)_{x_1} O33 (u_0)_{x_1}
2676  dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2677  }
2678  else if (c == 1)
2679  {
2680  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2681 
2682  // (u_1)_{x_2} O11 (u_1)_{x_2}
2683  dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2684 
2685  // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2686  dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2687 
2688  // (u_1)_{x_0} O33 (u_1)_{x_0})
2689  dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2690  }
2691  else
2692  {
2693  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2694 
2695  // (u_2)_{x_1} O11 (u_2)_{x_1}
2696  dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2697 
2698  // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2699  dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2700 
2701  // (u_2)_{x_0} O22 (u_2)_{x_0}
2702  dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2703  }
2704  }
2705  }
2706  }
2707  }
2708  }
2709 
2710  MFEM_SYNC_THREAD;
2711  } // qz loop
2712 
2713  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2714  {
2715  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2716  {
2717  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2718  {
2719  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2720  }
2721  }
2722  }
2723 
2724  osc += D1Dx * D1Dy * D1Dz;
2725  } // c loop
2726  }); // end of element loop
2727 }
2728 
2729 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2730 {
2731  if (dim == 3)
2732  {
2733  if (Device::Allows(Backend::DEVICE_MASK))
2734  {
2735  const int ID = (dofs1D << 4) | quad1D;
2736  switch (ID)
2737  {
2738  case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2739  symmetric, ne,
2740  mapsO->B, mapsC->B,
2741  mapsO->G, mapsC->G,
2742  pa_data, diag);
2743  case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2744  symmetric, ne,
2745  mapsO->B, mapsC->B,
2746  mapsO->G, mapsC->G,
2747  pa_data, diag);
2748  case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2749  symmetric, ne,
2750  mapsO->B, mapsC->B,
2751  mapsO->G, mapsC->G,
2752  pa_data, diag);
2753  case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2754  symmetric, ne,
2755  mapsO->B, mapsC->B,
2756  mapsO->G, mapsC->G,
2757  pa_data, diag);
2758  default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2759  mapsO->B, mapsC->B,
2760  mapsO->G, mapsC->G,
2761  pa_data, diag);
2762  }
2763  }
2764  else
2765  PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2766  mapsO->B, mapsC->B,
2767  mapsO->G, mapsC->G,
2768  pa_data, diag);
2769  }
2770  else if (dim == 2)
2771  {
2772  PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2773  mapsO->B, mapsC->G, pa_data, diag);
2774  }
2775  else
2776  {
2777  MFEM_ABORT("Unsupported dimension!");
2778  }
2779 }
2780 
2781 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2782 // integrated against H(curl) test functions corresponding to y.
2783 void PAHcurlH1Apply3D(const int D1D,
2784  const int Q1D,
2785  const int NE,
2786  const Array<double> &bc,
2787  const Array<double> &gc,
2788  const Array<double> &bot,
2789  const Array<double> &bct,
2790  const Vector &pa_data,
2791  const Vector &x,
2792  Vector &y)
2793 {
2794  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2795  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2796 
2797  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2798  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2799 
2800  constexpr static int VDIM = 3;
2801 
2802  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2803  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2804  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2805  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2806  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2807  auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2808  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2809 
2810  MFEM_FORALL(e, NE,
2811  {
2812  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2813 
2814  for (int qz = 0; qz < Q1D; ++qz)
2815  {
2816  for (int qy = 0; qy < Q1D; ++qy)
2817  {
2818  for (int qx = 0; qx < Q1D; ++qx)
2819  {
2820  for (int c = 0; c < VDIM; ++c)
2821  {
2822  mass[qz][qy][qx][c] = 0.0;
2823  }
2824  }
2825  }
2826  }
2827 
2828  for (int dz = 0; dz < D1D; ++dz)
2829  {
2830  double gradXY[MAX_Q1D][MAX_Q1D][3];
2831  for (int qy = 0; qy < Q1D; ++qy)
2832  {
2833  for (int qx = 0; qx < Q1D; ++qx)
2834  {
2835  gradXY[qy][qx][0] = 0.0;
2836  gradXY[qy][qx][1] = 0.0;
2837  gradXY[qy][qx][2] = 0.0;
2838  }
2839  }
2840  for (int dy = 0; dy < D1D; ++dy)
2841  {
2842  double gradX[MAX_Q1D][2];
2843  for (int qx = 0; qx < Q1D; ++qx)
2844  {
2845  gradX[qx][0] = 0.0;
2846  gradX[qx][1] = 0.0;
2847  }
2848  for (int dx = 0; dx < D1D; ++dx)
2849  {
2850  const double s = X(dx,dy,dz,e);
2851  for (int qx = 0; qx < Q1D; ++qx)
2852  {
2853  gradX[qx][0] += s * Bc(qx,dx);
2854  gradX[qx][1] += s * Gc(qx,dx);
2855  }
2856  }
2857  for (int qy = 0; qy < Q1D; ++qy)
2858  {
2859  const double wy = Bc(qy,dy);
2860  const double wDy = Gc(qy,dy);
2861  for (int qx = 0; qx < Q1D; ++qx)
2862  {
2863  const double wx = gradX[qx][0];
2864  const double wDx = gradX[qx][1];
2865  gradXY[qy][qx][0] += wDx * wy;
2866  gradXY[qy][qx][1] += wx * wDy;
2867  gradXY[qy][qx][2] += wx * wy;
2868  }
2869  }
2870  }
2871  for (int qz = 0; qz < Q1D; ++qz)
2872  {
2873  const double wz = Bc(qz,dz);
2874  const double wDz = Gc(qz,dz);
2875  for (int qy = 0; qy < Q1D; ++qy)
2876  {
2877  for (int qx = 0; qx < Q1D; ++qx)
2878  {
2879  mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2880  mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2881  mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2882  }
2883  }
2884  }
2885  }
2886 
2887  // Apply D operator.
2888  for (int qz = 0; qz < Q1D; ++qz)
2889  {
2890  for (int qy = 0; qy < Q1D; ++qy)
2891  {
2892  for (int qx = 0; qx < Q1D; ++qx)
2893  {
2894  const double O11 = op(qx,qy,qz,0,e);
2895  const double O12 = op(qx,qy,qz,1,e);
2896  const double O13 = op(qx,qy,qz,2,e);
2897  const double O22 = op(qx,qy,qz,3,e);
2898  const double O23 = op(qx,qy,qz,4,e);
2899  const double O33 = op(qx,qy,qz,5,e);
2900  const double massX = mass[qz][qy][qx][0];
2901  const double massY = mass[qz][qy][qx][1];
2902  const double massZ = mass[qz][qy][qx][2];
2903  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2904  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2905  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2906  }
2907  }
2908  }
2909 
2910  for (int qz = 0; qz < Q1D; ++qz)
2911  {
2912  double massXY[MAX_D1D][MAX_D1D];
2913 
2914  int osc = 0;
2915 
2916  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2917  {
2918  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2919  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2920  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2921 
2922  for (int dy = 0; dy < D1Dy; ++dy)
2923  {
2924  for (int dx = 0; dx < D1Dx; ++dx)
2925  {
2926  massXY[dy][dx] = 0.0;
2927  }
2928  }
2929  for (int qy = 0; qy < Q1D; ++qy)
2930  {
2931  double massX[MAX_D1D];
2932  for (int dx = 0; dx < D1Dx; ++dx)
2933  {
2934  massX[dx] = 0;
2935  }
2936  for (int qx = 0; qx < Q1D; ++qx)
2937  {
2938  for (int dx = 0; dx < D1Dx; ++dx)
2939  {
2940  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2941  }
2942  }
2943  for (int dy = 0; dy < D1Dy; ++dy)
2944  {
2945  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2946  for (int dx = 0; dx < D1Dx; ++dx)
2947  {
2948  massXY[dy][dx] += massX[dx] * wy;
2949  }
2950  }
2951  }
2952 
2953  for (int dz = 0; dz < D1Dz; ++dz)
2954  {
2955  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2956  for (int dy = 0; dy < D1Dy; ++dy)
2957  {
2958  for (int dx = 0; dx < D1Dx; ++dx)
2959  {
2960  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2961  }
2962  }
2963  }
2964 
2965  osc += D1Dx * D1Dy * D1Dz;
2966  } // loop c
2967  } // loop qz
2968  }); // end of element loop
2969 }
2970 
2971 // Apply to x corresponding to DOF's in H(curl), integrated
2972 // against gradients of H^1 functions corresponding to y.
2973 void PAHcurlH1ApplyTranspose3D(const int D1D,
2974  const int Q1D,
2975  const int NE,
2976  const Array<double> &bc,
2977  const Array<double> &bo,
2978  const Array<double> &bct,
2979  const Array<double> &gct,
2980  const Vector &pa_data,
2981  const Vector &x,
2982  Vector &y)
2983 {
2984  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2985  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2986 
2987  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2988  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2989 
2990  constexpr static int VDIM = 3;
2991 
2992  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2993  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2994  auto Bt = Reshape(bct.Read(), D1D, Q1D);
2995  auto Gt = Reshape(gct.Read(), D1D, Q1D);
2996  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2997  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2998  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
2999 
3000  MFEM_FORALL(e, NE,
3001  {
3002  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3003 
3004  for (int qz = 0; qz < Q1D; ++qz)
3005  {
3006  for (int qy = 0; qy < Q1D; ++qy)
3007  {
3008  for (int qx = 0; qx < Q1D; ++qx)
3009  {
3010  for (int c = 0; c < VDIM; ++c)
3011  {
3012  mass[qz][qy][qx][c] = 0.0;
3013  }
3014  }
3015  }
3016  }
3017 
3018  int osc = 0;
3019 
3020  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3021  {
3022  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3023  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3024  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3025 
3026  for (int dz = 0; dz < D1Dz; ++dz)
3027  {
3028  double massXY[MAX_Q1D][MAX_Q1D];
3029  for (int qy = 0; qy < Q1D; ++qy)
3030  {
3031  for (int qx = 0; qx < Q1D; ++qx)
3032  {
3033  massXY[qy][qx] = 0.0;
3034  }
3035  }
3036 
3037  for (int dy = 0; dy < D1Dy; ++dy)
3038  {
3039  double massX[MAX_Q1D];
3040  for (int qx = 0; qx < Q1D; ++qx)
3041  {
3042  massX[qx] = 0.0;
3043  }
3044 
3045  for (int dx = 0; dx < D1Dx; ++dx)
3046  {
3047  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3048  for (int qx = 0; qx < Q1D; ++qx)
3049  {
3050  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
3051  }
3052  }
3053 
3054  for (int qy = 0; qy < Q1D; ++qy)
3055  {
3056  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
3057  for (int qx = 0; qx < Q1D; ++qx)
3058  {
3059  const double wx = massX[qx];
3060  massXY[qy][qx] += wx * wy;
3061  }
3062  }
3063  }
3064 
3065  for (int qz = 0; qz < Q1D; ++qz)
3066  {
3067  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
3068  for (int qy = 0; qy < Q1D; ++qy)
3069  {
3070  for (int qx = 0; qx < Q1D; ++qx)
3071  {
3072  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
3073  }
3074  }
3075  }
3076  }
3077 
3078  osc += D1Dx * D1Dy * D1Dz;
3079  } // loop (c) over components
3080 
3081  // Apply D operator.
3082  for (int qz = 0; qz < Q1D; ++qz)
3083  {
3084  for (int qy = 0; qy < Q1D; ++qy)
3085  {
3086  for (int qx = 0; qx < Q1D; ++qx)
3087  {
3088  const double O11 = op(qx,qy,qz,0,e);
3089  const double O12 = op(qx,qy,qz,1,e);
3090  const double O13 = op(qx,qy,qz,2,e);
3091  const double O22 = op(qx,qy,qz,3,e);
3092  const double O23 = op(qx,qy,qz,4,e);
3093  const double O33 = op(qx,qy,qz,5,e);
3094  const double massX = mass[qz][qy][qx][0];
3095  const double massY = mass[qz][qy][qx][1];
3096  const double massZ = mass[qz][qy][qx][2];
3097  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
3098  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
3099  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
3100  }
3101  }
3102  }
3103 
3104  for (int qz = 0; qz < Q1D; ++qz)
3105  {
3106  double gradXY[MAX_D1D][MAX_D1D][3];
3107  for (int dy = 0; dy < D1D; ++dy)
3108  {
3109  for (int dx = 0; dx < D1D; ++dx)
3110  {
3111  gradXY[dy][dx][0] = 0;
3112  gradXY[dy][dx][1] = 0;
3113  gradXY[dy][dx][2] = 0;
3114  }
3115  }
3116  for (int qy = 0; qy < Q1D; ++qy)
3117  {
3118  double gradX[MAX_D1D][3];
3119  for (int dx = 0; dx < D1D; ++dx)
3120  {
3121  gradX[dx][0] = 0;
3122  gradX[dx][1] = 0;
3123  gradX[dx][2] = 0;
3124  }
3125  for (int qx = 0; qx < Q1D; ++qx)
3126  {
3127  const double gX = mass[qz][qy][qx][0];
3128  const double gY = mass[qz][qy][qx][1];
3129  const double gZ = mass[qz][qy][qx][2];
3130  for (int dx = 0; dx < D1D; ++dx)
3131  {
3132  const double wx = Bt(dx,qx);
3133  const double wDx = Gt(dx,qx);
3134  gradX[dx][0] += gX * wDx;
3135  gradX[dx][1] += gY * wx;
3136  gradX[dx][2] += gZ * wx;
3137  }
3138  }
3139  for (int dy = 0; dy < D1D; ++dy)
3140  {
3141  const double wy = Bt(dy,qy);
3142  const double wDy = Gt(dy,qy);
3143  for (int dx = 0; dx < D1D; ++dx)
3144  {
3145  gradXY[dy][dx][0] += gradX[dx][0] * wy;
3146  gradXY[dy][dx][1] += gradX[dx][1] * wDy;
3147  gradXY[dy][dx][2] += gradX[dx][2] * wy;
3148  }
3149  }
3150  }
3151  for (int dz = 0; dz < D1D; ++dz)
3152  {
3153  const double wz = Bt(dz,qz);
3154  const double wDz = Gt(dz,qz);
3155  for (int dy = 0; dy < D1D; ++dy)
3156  {
3157  for (int dx = 0; dx < D1D; ++dx)
3158  {
3159  Y(dx,dy,dz,e) +=
3160  ((gradXY[dy][dx][0] * wz) +
3161  (gradXY[dy][dx][1] * wz) +
3162  (gradXY[dy][dx][2] * wDz));
3163  }
3164  }
3165  }
3166  } // loop qz
3167  }); // end of element loop
3168 }
3169 
3170 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
3171 // integrated against H(curl) test functions corresponding to y.
3172 void PAHcurlH1Apply2D(const int D1D,
3173  const int Q1D,
3174  const int NE,
3175  const Array<double> &bc,
3176  const Array<double> &gc,
3177  const Array<double> &bot,
3178  const Array<double> &bct,
3179  const Vector &pa_data,
3180  const Vector &x,
3181  Vector &y)
3182 {
3183  constexpr static int VDIM = 2;
3184  constexpr static int MAX_D1D = HCURL_MAX_D1D;
3185  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3186 
3187  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3188  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3189  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3190  auto Bct = Reshape(bct.Read(), D1D, Q1D);
3191  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3192  auto X = Reshape(x.Read(), D1D, D1D, NE);
3193  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
3194 
3195  MFEM_FORALL(e, NE,
3196  {
3197  double mass[MAX_Q1D][MAX_Q1D][VDIM];
3198 
3199  for (int qy = 0; qy < Q1D; ++qy)
3200  {
3201  for (int qx = 0; qx < Q1D; ++qx)
3202  {
3203  for (int c = 0; c < VDIM; ++c)
3204  {
3205  mass[qy][qx][c] = 0.0;
3206  }
3207  }
3208  }
3209 
3210  for (int dy = 0; dy < D1D; ++dy)
3211  {
3212  double gradX[MAX_Q1D][2];
3213  for (int qx = 0; qx < Q1D; ++qx)
3214  {
3215  gradX[qx][0] = 0.0;
3216  gradX[qx][1] = 0.0;
3217  }
3218  for (int dx = 0; dx < D1D; ++dx)
3219  {
3220  const double s = X(dx,dy,e);
3221  for (int qx = 0; qx < Q1D; ++qx)
3222  {
3223  gradX[qx][0] += s * Bc(qx,dx);
3224  gradX[qx][1] += s * Gc(qx,dx);
3225  }
3226  }
3227  for (int qy = 0; qy < Q1D; ++qy)
3228  {
3229  const double wy = Bc(qy,dy);
3230  const double wDy = Gc(qy,dy);
3231  for (int qx = 0; qx < Q1D; ++qx)
3232  {
3233  const double wx = gradX[qx][0];
3234  const double wDx = gradX[qx][1];
3235  mass[qy][qx][0] += wDx * wy;
3236  mass[qy][qx][1] += wx * wDy;
3237  }
3238  }
3239  }
3240 
3241  // Apply D operator.
3242  for (int qy = 0; qy < Q1D; ++qy)
3243  {
3244  for (int qx = 0; qx < Q1D; ++qx)
3245  {
3246  const double O11 = op(qx,qy,0,e);
3247  const double O12 = op(qx,qy,1,e);
3248  const double O22 = op(qx,qy,2,e);
3249  const double massX = mass[qy][qx][0];
3250  const double massY = mass[qy][qx][1];
3251  mass[qy][qx][0] = (O11*massX)+(O12*massY);
3252  mass[qy][qx][1] = (O12*massX)+(O22*massY);
3253  }
3254  }
3255 
3256  for (int qy = 0; qy < Q1D; ++qy)
3257  {
3258  int osc = 0;
3259 
3260  for (int c = 0; c < VDIM; ++c) // loop over x, y components
3261  {
3262  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3263  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3264 
3265  double massX[MAX_D1D];
3266  for (int dx = 0; dx < D1Dx; ++dx)
3267  {
3268  massX[dx] = 0;
3269  }
3270  for (int qx = 0; qx < Q1D; ++qx)
3271  {
3272  for (int dx = 0; dx < D1Dx; ++dx)
3273  {
3274  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3275  }
3276  }
3277 
3278  for (int dy = 0; dy < D1Dy; ++dy)
3279  {
3280  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3281 
3282  for (int dx = 0; dx < D1Dx; ++dx)
3283  {
3284  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
3285  }
3286  }
3287 
3288  osc += D1Dx * D1Dy;
3289  } // loop c
3290  }
3291  }); // end of element loop
3292 }
3293 
3294 // Apply to x corresponding to DOF's in H(curl), integrated
3295 // against gradients of H^1 functions corresponding to y.
3296 void PAHcurlH1ApplyTranspose2D(const int D1D,
3297  const int Q1D,
3298  const int NE,
3299  const Array<double> &bc,
3300  const Array<double> &bo,
3301  const Array<double> &bct,
3302  const Array<double> &gct,
3303  const Vector &pa_data,
3304  const Vector &x,
3305  Vector &y)
3306 {
3307  constexpr static int VDIM = 2;
3308  constexpr static int MAX_D1D = HCURL_MAX_D1D;
3309  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3310 
3311  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3312  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3313  auto Bt = Reshape(bct.Read(), D1D, Q1D);
3314  auto Gt = Reshape(gct.Read(), D1D, Q1D);
3315  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3316  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
3317  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
3318 
3319  MFEM_FORALL(e, NE,
3320  {
3321  double mass[MAX_Q1D][MAX_Q1D][VDIM];
3322 
3323  for (int qy = 0; qy < Q1D; ++qy)
3324  {
3325  for (int qx = 0; qx < Q1D; ++qx)
3326  {
3327  for (int c = 0; c < VDIM; ++c)
3328  {
3329  mass[qy][qx][c] = 0.0;
3330  }
3331  }
3332  }
3333 
3334  int osc = 0;
3335 
3336  for (int c = 0; c < VDIM; ++c) // loop over x, y components
3337  {
3338  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3339  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3340 
3341  for (int dy = 0; dy < D1Dy; ++dy)
3342  {
3343  double massX[MAX_Q1D];
3344  for (int qx = 0; qx < Q1D; ++qx)
3345  {
3346  massX[qx] = 0.0;
3347  }
3348 
3349  for (int dx = 0; dx < D1Dx; ++dx)
3350  {
3351  const double t = X(dx + (dy * D1Dx) + osc, e);
3352  for (int qx = 0; qx < Q1D; ++qx)
3353  {
3354  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
3355  }
3356  }
3357 
3358  for (int qy = 0; qy < Q1D; ++qy)
3359  {
3360  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
3361  for (int qx = 0; qx < Q1D; ++qx)
3362  {
3363  mass[qy][qx][c] += massX[qx] * wy;
3364  }
3365  }
3366  }
3367 
3368  osc += D1Dx * D1Dy;
3369  } // loop (c) over components
3370 
3371  // Apply D operator.
3372  for (int qy = 0; qy < Q1D; ++qy)
3373  {
3374  for (int qx = 0; qx < Q1D; ++qx)
3375  {
3376  const double O11 = op(qx,qy,0,e);
3377  const double O12 = op(qx,qy,1,e);
3378  const double O22 = op(qx,qy,2,e);
3379  const double massX = mass[qy][qx][0];
3380  const double massY = mass[qy][qx][1];
3381  mass[qy][qx][0] = (O11*massX)+(O12*massY);
3382  mass[qy][qx][1] = (O12*massX)+(O22*massY);
3383  }
3384  }
3385 
3386  for (int qy = 0; qy < Q1D; ++qy)
3387  {
3388  double gradX[MAX_D1D][2];
3389  for (int dx = 0; dx < D1D; ++dx)
3390  {
3391  gradX[dx][0] = 0;
3392  gradX[dx][1] = 0;
3393  }
3394  for (int qx = 0; qx < Q1D; ++qx)
3395  {
3396  const double gX = mass[qy][qx][0];
3397  const double gY = mass[qy][qx][1];
3398  for (int dx = 0; dx < D1D; ++dx)
3399  {
3400  const double wx = Bt(dx,qx);
3401  const double wDx = Gt(dx,qx);
3402  gradX[dx][0] += gX * wDx;
3403  gradX[dx][1] += gY * wx;
3404  }
3405  }
3406  for (int dy = 0; dy < D1D; ++dy)
3407  {
3408  const double wy = Bt(dy,qy);
3409  const double wDy = Gt(dy,qy);
3410  for (int dx = 0; dx < D1D; ++dx)
3411  {
3412  Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
3413  }
3414  }
3415  }
3416  }); // end of element loop
3417 }
3418 
3419 // PA H(curl) Mass Assemble 3D kernel
3420 void PAHcurlL2Setup(const int NQ,
3421  const int coeffDim,
3422  const int NE,
3423  const Array<double> &w,
3424  Vector &coeff,
3425  Vector &op)
3426 {
3427  auto W = w.Read();
3428  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
3429  auto y = Reshape(op.Write(), coeffDim, NQ, NE);
3430 
3431  MFEM_FORALL(e, NE,
3432  {
3433  for (int q = 0; q < NQ; ++q)
3434  {
3435  for (int c=0; c<coeffDim; ++c)
3436  {
3437  y(c,q,e) = W[q] * C(c,q,e);
3438  }
3439  }
3440  });
3441 }
3442 
3443 void MixedScalarCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3444  const FiniteElementSpace &test_fes)
3445 {
3446  // Assumes tensor-product elements
3447  Mesh *mesh = trial_fes.GetMesh();
3448  const FiniteElement *fel = trial_fes.GetFE(0); // In H(curl)
3449  const FiniteElement *eltest = test_fes.GetFE(0); // In scalar space
3450 
3451  const VectorTensorFiniteElement *el =
3452  dynamic_cast<const VectorTensorFiniteElement*>(fel);
3453  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
3454 
3455  if (el->GetDerivType() != mfem::FiniteElement::CURL)
3456  {
3457  MFEM_ABORT("Unknown kernel.");
3458  }
3459 
3460  const IntegrationRule *ir
3461  = IntRule ? IntRule : &MassIntegrator::GetRule(*eltest, *eltest,
3462  *mesh->GetElementTransformation(0));
3463 
3464  const int dims = el->GetDim();
3465  MFEM_VERIFY(dims == 2, "");
3466 
3467  const int nq = ir->GetNPoints();
3468  dim = mesh->Dimension();
3469  MFEM_VERIFY(dim == 2, "");
3470 
3471  ne = test_fes.GetNE();
3472  mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3473  mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3474  dofs1D = mapsC->ndof;
3475  quad1D = mapsC->nqpt;
3476 
3477  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3478 
3479  if (el->GetOrder() == eltest->GetOrder())
3480  {
3481  dofs1Dtest = dofs1D;
3482  }
3483  else
3484  {
3485  dofs1Dtest = dofs1D - 1;
3486  }
3487 
3488  pa_data.SetSize(nq * ne, Device::GetMemoryType());
3489 
3490  Vector coeff(ne * nq);
3491  coeff = 1.0;
3492  auto coeffh = Reshape(coeff.HostWrite(), nq, ne);
3493  if (Q)
3494  {
3495  for (int e=0; e<ne; ++e)
3496  {
3497  ElementTransformation *tr = mesh->GetElementTransformation(e);
3498  for (int p=0; p<nq; ++p)
3499  {
3500  coeffh(p, e) = Q->Eval(*tr, ir->IntPoint(p));
3501  }
3502  }
3503  }
3504 
3505  if (dim == 2)
3506  {
3507  PACurlL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
3508  }
3509  else
3510  {
3511  MFEM_ABORT("Unsupported dimension!");
3512  }
3513 }
3514 
3515 void MixedScalarCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3516 {
3517  if (dim == 2)
3518  {
3519  PACurlL2Apply2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3520  mapsC->Bt, mapsC->G, pa_data, x, y);
3521  }
3522  else
3523  {
3524  MFEM_ABORT("Unsupported dimension!");
3525  }
3526 }
3527 
3528 void MixedScalarCurlIntegrator::AddMultTransposePA(const Vector &x,
3529  Vector &y) const
3530 {
3531  if (dim == 2)
3532  {
3533  PACurlL2ApplyTranspose2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3534  mapsC->B, mapsC->Gt, pa_data, x, y);
3535  }
3536  else
3537  {
3538  MFEM_ABORT("Unsupported dimension!");
3539  }
3540 }
3541 
3542 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3543  const FiniteElementSpace &test_fes)
3544 {
3545  // Assumes tensor-product elements, with vector test and trial spaces.
3546  Mesh *mesh = trial_fes.GetMesh();
3547  const FiniteElement *trial_fel = trial_fes.GetFE(0);
3548  const FiniteElement *test_fel = test_fes.GetFE(0);
3549 
3550  const VectorTensorFiniteElement *trial_el =
3551  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3552  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
3553 
3554  const VectorTensorFiniteElement *test_el =
3555  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
3556  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
3557 
3558  const IntegrationRule *ir
3559  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
3560  *mesh->GetElementTransformation(0));
3561  const int dims = trial_el->GetDim();
3562  MFEM_VERIFY(dims == 3, "");
3563 
3564  const int nq = ir->GetNPoints();
3565  dim = mesh->Dimension();
3566  MFEM_VERIFY(dim == 3, "");
3567 
3568  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
3569 
3570  ne = trial_fes.GetNE();
3571  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
3572  mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3573  mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3574  mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3575  mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3576  dofs1D = mapsC->ndof;
3577  quad1D = mapsC->nqpt;
3578  dofs1Dtest = mapsCtest->ndof;
3579 
3580  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3581 
3582  testType = test_el->GetDerivType();
3583  trialType = trial_el->GetDerivType();
3584 
3585  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
3586  coeffDim = (DQ ? 3 : 1);
3587 
3588  const bool curlSpaces = (testType == mfem::FiniteElement::CURL &&
3589  trialType == mfem::FiniteElement::CURL);
3590 
3591  const int ndata = curlSpaces ? (coeffDim == 1 ? 1 : 9) : symmDims;
3592  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
3593 
3594  Vector coeff(coeffDim * nq * ne);
3595  coeff = 1.0;
3596  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
3597  if (Q || DQ)
3598  {
3599  Vector V(coeffDim);
3600  if (DQ)
3601  {
3602  MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
3603  }
3604 
3605  for (int e=0; e<ne; ++e)
3606  {
3607  ElementTransformation *tr = mesh->GetElementTransformation(e);
3608 
3609  for (int p=0; p<nq; ++p)
3610  {
3611  if (DQ)
3612  {
3613  DQ->Eval(V, *tr, ir->IntPoint(p));
3614  for (int i=0; i<coeffDim; ++i)
3615  {
3616  coeffh(i, p, e) = V[i];
3617  }
3618  }
3619  else
3620  {
3621  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
3622  }
3623  }
3624  }
3625  }
3626 
3627  if (testType == mfem::FiniteElement::CURL &&
3628  trialType == mfem::FiniteElement::CURL && dim == 3)
3629  {
3630  if (coeffDim == 1)
3631  {
3632  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
3633  }
3634  else
3635  {
3636  PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), geom->J,
3637  coeff, pa_data);
3638  }
3639  }
3640  else if (testType == mfem::FiniteElement::DIV &&
3641  trialType == mfem::FiniteElement::CURL && dim == 3 &&
3642  test_fel->GetOrder() == trial_fel->GetOrder())
3643  {
3644  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
3645  pa_data);
3646  }
3647  else
3648  {
3649  MFEM_ABORT("Unknown kernel.");
3650  }
3651 }
3652 
3653 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3654 // integrated against H(curl) test functions corresponding to y.
3655 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3656 static void PAHcurlL2Apply3D(const int D1D,
3657  const int Q1D,
3658  const int coeffDim,
3659  const int NE,
3660  const Array<double> &bo,
3661  const Array<double> &bc,
3662  const Array<double> &bot,
3663  const Array<double> &bct,
3664  const Array<double> &gc,
3665  const Vector &pa_data,
3666  const Vector &x,
3667  Vector &y)
3668 {
3669  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3670  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3671  // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
3672  // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
3673  // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
3674  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3675  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3676  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3677 
3678  constexpr static int VDIM = 3;
3679 
3680  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3681  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3682  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3683  auto Bct = Reshape(bct.Read(), D1D, Q1D);
3684  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3685  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3686  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3687  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3688 
3689  MFEM_FORALL(e, NE,
3690  {
3691  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3692  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3693 
3694  for (int qz = 0; qz < Q1D; ++qz)
3695  {
3696  for (int qy = 0; qy < Q1D; ++qy)
3697  {
3698  for (int qx = 0; qx < Q1D; ++qx)
3699  {
3700  for (int c = 0; c < VDIM; ++c)
3701  {
3702  curl[qz][qy][qx][c] = 0.0;
3703  }
3704  }
3705  }
3706  }
3707 
3708  // We treat x, y, z components separately for optimization specific to each.
3709 
3710  int osc = 0;
3711 
3712  {
3713  // x component
3714  const int D1Dz = D1D;
3715  const int D1Dy = D1D;
3716  const int D1Dx = D1D - 1;
3717 
3718  for (int dz = 0; dz < D1Dz; ++dz)
3719  {
3720  double gradXY[MAX_Q1D][MAX_Q1D][2];
3721  for (int qy = 0; qy < Q1D; ++qy)
3722  {
3723  for (int qx = 0; qx < Q1D; ++qx)
3724  {
3725  for (int d = 0; d < 2; ++d)
3726  {
3727  gradXY[qy][qx][d] = 0.0;
3728  }
3729  }
3730  }
3731 
3732  for (int dy = 0; dy < D1Dy; ++dy)
3733  {
3734  double massX[MAX_Q1D];
3735  for (int qx = 0; qx < Q1D; ++qx)
3736  {
3737  massX[qx] = 0.0;
3738  }
3739 
3740  for (int dx = 0; dx < D1Dx; ++dx)
3741  {
3742  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3743  for (int qx = 0; qx < Q1D; ++qx)
3744  {
3745  massX[qx] += t * Bo(qx,dx);
3746  }
3747  }
3748 
3749  for (int qy = 0; qy < Q1D; ++qy)
3750  {
3751  const double wy = Bc(qy,dy);
3752  const double wDy = Gc(qy,dy);
3753  for (int qx = 0; qx < Q1D; ++qx)
3754  {
3755  const double wx = massX[qx];
3756  gradXY[qy][qx][0] += wx * wDy;
3757  gradXY[qy][qx][1] += wx * wy;
3758  }
3759  }
3760  }
3761 
3762  for (int qz = 0; qz < Q1D; ++qz)
3763  {
3764  const double wz = Bc(qz,dz);
3765  const double wDz = Gc(qz,dz);
3766  for (int qy = 0; qy < Q1D; ++qy)
3767  {
3768  for (int qx = 0; qx < Q1D; ++qx)
3769  {
3770  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3771  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3772  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3773  }
3774  }
3775  }
3776  }
3777 
3778  osc += D1Dx * D1Dy * D1Dz;
3779  }
3780 
3781  {
3782  // y component
3783  const int D1Dz = D1D;
3784  const int D1Dy = D1D - 1;
3785  const int D1Dx = D1D;
3786 
3787  for (int dz = 0; dz < D1Dz; ++dz)
3788  {
3789  double gradXY[MAX_Q1D][MAX_Q1D][2];
3790  for (int qy = 0; qy < Q1D; ++qy)
3791  {
3792  for (int qx = 0; qx < Q1D; ++qx)
3793  {
3794  for (int d = 0; d < 2; ++d)
3795  {
3796  gradXY[qy][qx][d] = 0.0;
3797  }
3798  }
3799  }
3800 
3801  for (int dx = 0; dx < D1Dx; ++dx)
3802  {
3803  double massY[MAX_Q1D];
3804  for (int qy = 0; qy < Q1D; ++qy)
3805  {
3806  massY[qy] = 0.0;
3807  }
3808 
3809  for (int dy = 0; dy < D1Dy; ++dy)
3810  {
3811  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3812  for (int qy = 0; qy < Q1D; ++qy)
3813  {
3814  massY[qy] += t * Bo(qy,dy);
3815  }
3816  }
3817 
3818  for (int qx = 0; qx < Q1D; ++qx)
3819  {
3820  const double wx = Bc(qx,dx);
3821  const double wDx = Gc(qx,dx);
3822  for (int qy = 0; qy < Q1D; ++qy)
3823  {
3824  const double wy = massY[qy];
3825  gradXY[qy][qx][0] += wDx * wy;
3826  gradXY[qy][qx][1] += wx * wy;
3827  }
3828  }
3829  }
3830 
3831  for (int qz = 0; qz < Q1D; ++qz)
3832  {
3833  const double wz = Bc(qz,dz);
3834  const double wDz = Gc(qz,dz);
3835  for (int qy = 0; qy < Q1D; ++qy)
3836  {
3837  for (int qx = 0; qx < Q1D; ++qx)
3838  {
3839  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3840  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3841  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3842  }
3843  }
3844  }
3845  }
3846 
3847  osc += D1Dx * D1Dy * D1Dz;
3848  }
3849 
3850  {
3851  // z component
3852  const int D1Dz = D1D - 1;
3853  const int D1Dy = D1D;
3854  const int D1Dx = D1D;
3855 
3856  for (int dx = 0; dx < D1Dx; ++dx)
3857  {
3858  double gradYZ[MAX_Q1D][MAX_Q1D][2];
3859  for (int qz = 0; qz < Q1D; ++qz)
3860  {
3861  for (int qy = 0; qy < Q1D; ++qy)
3862  {
3863  for (int d = 0; d < 2; ++d)
3864  {
3865  gradYZ[qz][qy][d] = 0.0;
3866  }
3867  }
3868  }
3869 
3870  for (int dy = 0; dy < D1Dy; ++dy)
3871  {
3872  double massZ[MAX_Q1D];
3873  for (int qz = 0; qz < Q1D; ++qz)
3874  {
3875  massZ[qz] = 0.0;
3876  }
3877 
3878  for (int dz = 0; dz < D1Dz; ++dz)
3879  {
3880  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3881  for (int qz = 0; qz < Q1D; ++qz)
3882  {
3883  massZ[qz] += t * Bo(qz,dz);
3884  }
3885  }
3886 
3887  for (int qy = 0; qy < Q1D; ++qy)
3888  {
3889  const double wy = Bc(qy,dy);
3890  const double wDy = Gc(qy,dy);
3891  for (int qz = 0; qz < Q1D; ++qz)
3892  {
3893  const double wz = massZ[qz];
3894  gradYZ[qz][qy][0] += wz * wy;
3895  gradYZ[qz][qy][1] += wz * wDy;
3896  }
3897  }
3898  }
3899 
3900  for (int qx = 0; qx < Q1D; ++qx)
3901  {
3902  const double wx = Bc(qx,dx);
3903  const double wDx = Gc(qx,dx);
3904 
3905  for (int qy = 0; qy < Q1D; ++qy)
3906  {
3907  for (int qz = 0; qz < Q1D; ++qz)
3908  {
3909  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3910  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3911  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3912  }
3913  }
3914  }
3915  }
3916  }
3917 
3918  // Apply D operator.
3919  for (int qz = 0; qz < Q1D; ++qz)
3920  {
3921  for (int qy = 0; qy < Q1D; ++qy)
3922  {
3923  for (int qx = 0; qx < Q1D; ++qx)
3924  {
3925  const double O11 = op(0,qx,qy,qz,e);
3926  if (coeffDim == 1)
3927  {
3928  for (int c = 0; c < VDIM; ++c)
3929  {
3930  curl[qz][qy][qx][c] *= O11;
3931  }
3932  }
3933  else
3934  {
3935  const double O21 = op(1,qx,qy,qz,e);
3936  const double O31 = op(2,qx,qy,qz,e);
3937  const double O12 = op(3,qx,qy,qz,e);
3938  const double O22 = op(4,qx,qy,qz,e);
3939  const double O32 = op(5,qx,qy,qz,e);
3940  const double O13 = op(6,qx,qy,qz,e);
3941  const double O23 = op(7,qx,qy,qz,e);
3942  const double O33 = op(8,qx,qy,qz,e);
3943  const double curlX = curl[qz][qy][qx][0];
3944  const double curlY = curl[qz][qy][qx][1];
3945  const double curlZ = curl[qz][qy][qx][2];
3946  curl[qz][qy][qx][0] = (O11*curlX)+(O12*curlY)+(O13*curlZ);
3947  curl[qz][qy][qx][1] = (O21*curlX)+(O22*curlY)+(O23*curlZ);
3948  curl[qz][qy][qx][2] = (O31*curlX)+(O32*curlY)+(O33*curlZ);
3949  }
3950  }
3951  }
3952  }
3953 
3954  for (int qz = 0; qz < Q1D; ++qz)
3955  {
3956  double massXY[MAX_D1D][MAX_D1D];
3957 
3958  osc = 0;
3959 
3960  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3961  {
3962  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3963  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3964  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3965 
3966  for (int dy = 0; dy < D1Dy; ++dy)
3967  {
3968  for (int dx = 0; dx < D1Dx; ++dx)
3969  {
3970  massXY[dy][dx] = 0;
3971  }
3972  }
3973  for (int qy = 0; qy < Q1D; ++qy)
3974  {
3975  double massX[MAX_D1D];
3976  for (int dx = 0; dx < D1Dx; ++dx)
3977  {
3978  massX[dx] = 0.0;
3979  }
3980  for (int qx = 0; qx < Q1D; ++qx)
3981  {
3982  for (int dx = 0; dx < D1Dx; ++dx)
3983  {
3984  massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3985  }
3986  }
3987 
3988  for (int dy = 0; dy < D1Dy; ++dy)
3989  {
3990  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3991  for (int dx = 0; dx < D1Dx; ++dx)
3992  {
3993  massXY[dy][dx] += massX[dx] * wy;
3994  }
3995  }
3996  }
3997 
3998  for (int dz = 0; dz < D1Dz; ++dz)
3999  {
4000  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
4001  for (int dy = 0; dy < D1Dy; ++dy)
4002  {
4003  for (int dx = 0; dx < D1Dx; ++dx)
4004  {
4005  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
4006  }
4007  }
4008  }
4009 
4010  osc += D1Dx * D1Dy * D1Dz;
4011  } // loop c
4012  } // loop qz
4013  }); // end of element loop
4014 }
4015 
4016 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
4017 // integrated against H(curl) test functions corresponding to y.
4018 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4019 static void SmemPAHcurlL2Apply3D(const int D1D,
4020  const int Q1D,
4021  const int coeffDim,
4022  const int NE,
4023  const Array<double> &bo,
4024  const Array<double> &bc,
4025  const Array<double> &gc,
4026  const Vector &pa_data,
4027  const Vector &x,
4028  Vector &y)
4029 {
4030  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4031  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4032 
4033  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4034  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4035  auto Gc = Reshape(gc.Read(), Q1D, D1D);
4036  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4037  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4038  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4039 
4040  auto device_kernel = [=] MFEM_DEVICE (int e)
4041  {
4042  constexpr int VDIM = 3;
4043  constexpr int maxCoeffDim = 9;
4044 
4045  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
4046  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
4047  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
4048 
4049  double opc[maxCoeffDim];
4050  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
4051  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
4052 
4053  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
4054 
4055  MFEM_FOREACH_THREAD(qx,x,Q1D)
4056  {
4057  MFEM_FOREACH_THREAD(qy,y,Q1D)
4058  {
4059  MFEM_FOREACH_THREAD(qz,z,Q1D)
4060  {
4061  for (int i=0; i<coeffDim; ++i)
4062  {
4063  opc[i] = op(i,qx,qy,qz,e);
4064  }
4065  }
4066  }
4067  }
4068 
4069  const int tidx = MFEM_THREAD_ID(x);
4070  const int tidy = MFEM_THREAD_ID(y);
4071  const int tidz = MFEM_THREAD_ID(z);
4072 
4073  if (tidz == 0)
4074  {
4075  MFEM_FOREACH_THREAD(d,y,D1D)
4076  {
4077  MFEM_FOREACH_THREAD(q,x,Q1D)
4078  {
4079  sBc[d][q] = Bc(q,d);
4080  sGc[d][q] = Gc(q,d);
4081  if (d < D1D-1)
4082  {
4083  sBo[d][q] = Bo(q,d);
4084  }
4085  }
4086  }
4087  }
4088  MFEM_SYNC_THREAD;
4089 
4090  for (int qz=0; qz < Q1D; ++qz)
4091  {
4092  if (tidz == qz)
4093  {
4094  MFEM_FOREACH_THREAD(qy,y,Q1D)
4095  {
4096  MFEM_FOREACH_THREAD(qx,x,Q1D)
4097  {
4098  for (int i=0; i<3; ++i)
4099  {
4100  curl[qy][qx][i] = 0.0;
4101  }
4102  }
4103  }
4104  }
4105 
4106  int osc = 0;
4107  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4108  {
4109  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4110  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4111  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4112 
4113  MFEM_FOREACH_THREAD(dz,z,D1Dz)
4114  {
4115  MFEM_FOREACH_THREAD(dy,y,D1Dy)
4116  {
4117  MFEM_FOREACH_THREAD(dx,x,D1Dx)
4118  {
4119  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4120  }
4121  }
4122  }
4123  MFEM_SYNC_THREAD;
4124 
4125  if (tidz == qz)
4126  {
4127  if (c == 0)
4128  {
4129  for (int i=0; i<coeffDim; ++i)
4130  {
4131  sop[i][tidx][tidy] = opc[i];
4132  }
4133  }
4134 
4135  MFEM_FOREACH_THREAD(qy,y,Q1D)
4136  {
4137  MFEM_FOREACH_THREAD(qx,x,Q1D)
4138  {
4139  double u = 0.0;
4140  double v = 0.0;
4141 
4142  // We treat x, y, z components separately for optimization specific to each.
4143  if (c == 0) // x component
4144  {
4145  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4146 
4147  for (int dz = 0; dz < D1Dz; ++dz)
4148  {
4149  const double wz = sBc[dz][qz];
4150  const double wDz = sGc[dz][qz];
4151 
4152  for (int dy = 0; dy < D1Dy; ++dy)
4153  {
4154  const double wy = sBc[dy][qy];
4155  const double wDy = sGc[dy][qy];
4156 
4157  for (int dx = 0; dx < D1Dx; ++dx)
4158  {
4159  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
4160  u += wx * wDy * wz;
4161  v += wx * wy * wDz;
4162  }
4163  }
4164  }
4165 
4166  curl[qy][qx][1] += v; // (u_0)_{x_2}
4167  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
4168  }
4169  else if (c == 1) // y component
4170  {
4171  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4172 
4173  for (int dz = 0; dz < D1Dz; ++dz)
4174  {
4175  const double wz = sBc[dz][qz];
4176  const double wDz = sGc[dz][qz];
4177 
4178  for (int dy = 0; dy < D1Dy; ++dy)
4179  {
4180  const double wy = sBo[dy][qy];
4181 
4182  for (int dx = 0; dx < D1Dx; ++dx)
4183  {
4184  const double t = sX[dz][dy][dx];
4185  const double wx = t * sBc[dx][qx];
4186  const double wDx = t * sGc[dx][qx];
4187 
4188  u += wDx * wy * wz;
4189  v += wx * wy * wDz;
4190  }
4191  }
4192  }
4193 
4194  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
4195  curl[qy][qx][2] += u; // (u_1)_{x_0}
4196  }
4197  else // z component
4198  {
4199  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4200 
4201  for (int dz = 0; dz < D1Dz; ++dz)
4202  {
4203  const double wz = sBo[dz][qz];
4204 
4205  for (int dy = 0; dy < D1Dy; ++dy)
4206  {
4207  const double wy = sBc[dy][qy];
4208  const double wDy = sGc[dy][qy];
4209 
4210  for (int dx = 0; dx < D1Dx; ++dx)
4211  {
4212  const double t = sX[dz][dy][dx];
4213  const double wx = t * sBc[dx][qx];
4214  const double wDx = t * sGc[dx][qx];
4215 
4216  u += wDx * wy * wz;
4217  v += wx * wDy * wz;
4218  }
4219  }
4220  }
4221 
4222  curl[qy][qx][0] += v; // (u_2)_{x_1}
4223  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
4224  }
4225  } // qx
4226  } // qy
4227  } // tidz == qz
4228 
4229  osc += D1Dx * D1Dy * D1Dz;
4230  MFEM_SYNC_THREAD;
4231  } // c
4232 
4233  double dxyz1 = 0.0;
4234  double dxyz2 = 0.0;
4235  double dxyz3 = 0.0;
4236 
4237  MFEM_FOREACH_THREAD(dz,z,D1D)
4238  {
4239  const double wcz = sBc[dz][qz];
4240  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4241 
4242  MFEM_FOREACH_THREAD(dy,y,D1D)
4243  {
4244  MFEM_FOREACH_THREAD(dx,x,D1D)
4245  {
4246  for (int qy = 0; qy < Q1D; ++qy)
4247  {
4248  const double wcy = sBc[dy][qy];
4249  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4250 
4251  for (int qx = 0; qx < Q1D; ++qx)
4252  {
4253  const double O11 = sop[0][qx][qy];
4254  double c1, c2, c3;
4255  if (coeffDim == 1)
4256  {
4257  c1 = O11 * curl[qy][qx][0];
4258  c2 = O11 * curl[qy][qx][1];
4259  c3 = O11 * curl[qy][qx][2];
4260  }
4261  else
4262  {
4263  const double O21 = sop[1][qx][qy];
4264  const double O31 = sop[2][qx][qy];
4265  const double O12 = sop[3][qx][qy];
4266  const double O22 = sop[4][qx][qy];
4267  const double O32 = sop[5][qx][qy];
4268  const double O13 = sop[6][qx][qy];
4269  const double O23 = sop[7][qx][qy];
4270  const double O33 = sop[8][qx][qy];
4271  c1 = (O11*curl[qy][qx][0])+(O12*curl[qy][qx][1])+(O13*curl[qy][qx][2]);
4272  c2 = (O21*curl[qy][qx][0])+(O22*curl[qy][qx][1])+(O23*curl[qy][qx][2]);
4273  c3 = (O31*curl[qy][qx][0])+(O32*curl[qy][qx][1])+(O33*curl[qy][qx][2]);
4274  }
4275 
4276  const double wcx = sBc[dx][qx];
4277 
4278  if (dx < D1D-1)
4279  {
4280  const double wx = sBo[dx][qx];
4281  dxyz1 += c1 * wx * wcy * wcz;
4282  }
4283 
4284  dxyz2 += c2 * wcx * wy * wcz;
4285  dxyz3 += c3 * wcx * wcy * wz;
4286  } // qx
4287  } // qy
4288  } // dx
4289  } // dy
4290  } // dz
4291 
4292  MFEM_SYNC_THREAD;
4293 
4294  MFEM_FOREACH_THREAD(dz,z,D1D)
4295  {
4296  MFEM_FOREACH_THREAD(dy,y,D1D)
4297  {
4298  MFEM_FOREACH_THREAD(dx,x,D1D)
4299  {
4300  if (dx < D1D-1)
4301  {
4302  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4303  }
4304  if (dy < D1D-1)
4305  {
4306  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4307  }
4308  if (dz < D1D-1)
4309  {
4310  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4311  }
4312  }
4313  }
4314  }
4315  } // qz
4316  }; // end of element loop
4317 
4318  auto host_kernel = [&] MFEM_LAMBDA (int)
4319  {
4320  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
4321  };
4322 
4323  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
4324 }
4325 
4326 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
4327 // integrated against H(div) test functions corresponding to y.
4328 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4329 static void PAHcurlHdivApply3D(const int D1D,
4330  const int D1Dtest,
4331  const int Q1D,
4332  const int NE,
4333  const Array<double> &bo,
4334  const Array<double> &bc,
4335  const Array<double> &bot,
4336  const Array<double> &bct,
4337  const Array<double> &gc,
4338  const Vector &pa_data,
4339  const Vector &x,
4340  Vector &y)
4341 {
4342  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4343  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4344  // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
4345  // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
4346  // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
4347  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4348  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4349  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4350 
4351  constexpr static int VDIM = 3;
4352 
4353  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4354  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4355  auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
4356  auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
4357  auto Gc = Reshape(gc.Read(), Q1D, D1D);
4358  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
4359  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4360  auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
4361 
4362  MFEM_FORALL(e, NE,
4363  {
4364  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4365  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
4366 
4367  for (int qz = 0; qz < Q1D; ++qz)
4368  {
4369  for (int qy = 0; qy < Q1D; ++qy)
4370  {
4371  for (int qx = 0; qx < Q1D; ++qx)
4372  {
4373  for (int c = 0; c < VDIM; ++c)
4374  {
4375  curl[qz][qy][qx][c] = 0.0;
4376  }
4377  }
4378  }
4379  }
4380 
4381  // We treat x, y, z components separately for optimization specific to each.
4382 
4383  int osc = 0;
4384 
4385  {
4386  // x component
4387  const int D1Dz = D1D;
4388  const int D1Dy = D1D;
4389  const int D1Dx = D1D - 1;
4390 
4391  for (int dz = 0; dz < D1Dz; ++dz)
4392  {
4393  double gradXY[MAX_Q1D][MAX_Q1D][2];
4394  for (int qy = 0; qy < Q1D; ++qy)
4395  {
4396  for (int qx = 0; qx < Q1D; ++qx)
4397  {
4398  for (int d = 0; d < 2; ++d)
4399  {
4400  gradXY[qy][qx][d] = 0.0;
4401  }
4402  }
4403  }
4404 
4405  for (int dy = 0; dy < D1Dy; ++dy)
4406  {
4407  double massX[MAX_Q1D];
4408  for (int qx = 0; qx < Q1D; ++qx)
4409  {
4410  massX[qx] = 0.0;
4411  }
4412 
4413  for (int dx = 0; dx < D1Dx; ++dx)
4414  {
4415  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4416  for (int qx = 0; qx < Q1D; ++qx)
4417  {
4418  massX[qx] += t * Bo(qx,dx);
4419  }
4420  }
4421 
4422  for (int qy = 0; qy < Q1D; ++qy)
4423  {
4424  const double wy = Bc(qy,dy);
4425  const double wDy = Gc(qy,dy);
4426  for (int qx = 0; qx < Q1D; ++qx)
4427  {
4428  const double wx = massX[qx];
4429  gradXY[qy][qx][0] += wx * wDy;
4430  gradXY[qy][qx][1] += wx * wy;
4431  }
4432  }
4433  }
4434 
4435  for (int qz = 0; qz < Q1D; ++qz)
4436  {
4437  const double wz = Bc(qz,dz);
4438  const double wDz = Gc(qz,dz);
4439  for (int qy = 0; qy < Q1D; ++qy)
4440  {
4441  for (int qx = 0; qx < Q1D; ++qx)
4442  {
4443  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4444  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
4445  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
4446  }
4447  }
4448  }
4449  }
4450 
4451  osc += D1Dx * D1Dy * D1Dz;
4452  }
4453 
4454  {
4455  // y component
4456  const int D1Dz = D1D;
4457  const int D1Dy = D1D - 1;
4458  const int D1Dx = D1D;
4459 
4460  for (int dz = 0; dz < D1Dz; ++dz)
4461  {
4462  double gradXY[MAX_Q1D][MAX_Q1D][2];
4463  for (int qy = 0; qy < Q1D; ++qy)
4464  {
4465  for (int qx = 0; qx < Q1D; ++qx)
4466  {
4467  for (int d = 0; d < 2; ++d)
4468  {
4469  gradXY[qy][qx][d] = 0.0;
4470  }
4471  }
4472  }
4473 
4474  for (int dx = 0; dx < D1Dx; ++dx)
4475  {
4476  double massY[MAX_Q1D];
4477  for (int qy = 0; qy < Q1D; ++qy)
4478  {
4479  massY[qy] = 0.0;
4480  }
4481 
4482  for (int dy = 0; dy < D1Dy; ++dy)
4483  {
4484  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4485  for (int qy = 0; qy < Q1D; ++qy)
4486  {
4487  massY[qy] += t * Bo(qy,dy);
4488  }
4489  }
4490 
4491  for (int qx = 0; qx < Q1D; ++qx)
4492  {
4493  const double wx = Bc(qx,dx);
4494  const double wDx = Gc(qx,dx);
4495  for (int qy = 0; qy < Q1D; ++qy)
4496  {
4497  const double wy = massY[qy];
4498  gradXY[qy][qx][0] += wDx * wy;
4499  gradXY[qy][qx][1] += wx * wy;
4500  }
4501  }
4502  }
4503 
4504  for (int qz = 0; qz < Q1D; ++qz)
4505  {
4506  const double wz = Bc(qz,dz);
4507  const double wDz = Gc(qz,dz);
4508  for (int qy = 0; qy < Q1D; ++qy)
4509  {
4510  for (int qx = 0; qx < Q1D; ++qx)
4511  {
4512  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4513  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
4514  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
4515  }
4516  }
4517  }
4518  }
4519 
4520  osc += D1Dx * D1Dy * D1Dz;
4521  }
4522 
4523  {
4524  // z component
4525  const int D1Dz = D1D - 1;
4526  const int D1Dy = D1D;
4527  const int D1Dx = D1D;
4528 
4529  for (int dx = 0; dx < D1Dx; ++dx)
4530  {
4531  double gradYZ[MAX_Q1D][MAX_Q1D][2];
4532  for (int qz = 0; qz < Q1D; ++qz)
4533  {
4534  for (int qy = 0; qy < Q1D; ++qy)
4535  {
4536  for (int d = 0; d < 2; ++d)
4537  {
4538  gradYZ[qz][qy][d] = 0.0;
4539  }
4540  }
4541  }
4542 
4543  for (int dy = 0; dy < D1Dy; ++dy)
4544  {
4545  double massZ[MAX_Q1D];
4546  for (int qz = 0; qz < Q1D; ++qz)
4547  {
4548  massZ[qz] = 0.0;
4549  }
4550 
4551  for (int dz = 0; dz < D1Dz; ++dz)
4552  {
4553  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4554  for (int qz = 0; qz < Q1D; ++qz)
4555  {
4556  massZ[qz] += t * Bo(qz,dz);
4557  }
4558  }
4559 
4560  for (int qy = 0; qy < Q1D; ++qy)
4561  {
4562  const double wy = Bc(qy,dy);
4563  const double wDy = Gc(qy,dy);
4564  for (int qz = 0; qz < Q1D; ++qz)
4565  {
4566  const double wz = massZ[qz];
4567  gradYZ[qz][qy][0] += wz * wy;
4568  gradYZ[qz][qy][1] += wz * wDy;
4569  }
4570  }
4571  }
4572 
4573  for (int qx = 0; qx < Q1D; ++qx)
4574  {
4575  const double wx = Bc(qx,dx);
4576  const double wDx = Gc(qx,dx);
4577 
4578  for (int qy = 0; qy < Q1D; ++qy)
4579  {
4580  for (int qz = 0; qz < Q1D; ++qz)
4581  {
4582  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4583  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
4584  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
4585  }
4586  }
4587  }
4588  }
4589  }
4590 
4591  // Apply D operator.
4592  for (int qz = 0; qz < Q1D; ++qz)
4593  {
4594  for (int qy = 0; qy < Q1D; ++qy)
4595  {
4596  for (int qx = 0; qx < Q1D; ++qx)
4597  {
4598  const double O11 = op(qx,qy,qz,0,e);
4599  const double O12 = op(qx,qy,qz,1,e);
4600  const double O13 = op(qx,qy,qz,2,e);
4601  const double O22 = op(qx,qy,qz,3,e);
4602  const double O23 = op(qx,qy,qz,4,e);
4603  const double O33 = op(qx,qy,qz,5,e);
4604 
4605  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
4606  (O13 * curl[qz][qy][qx][2]);
4607  const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
4608  (O23 * curl[qz][qy][qx][2]);
4609  const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
4610  (O33 * curl[qz][qy][qx][2]);
4611 
4612  curl[qz][qy][qx][0] = c1;
4613  curl[qz][qy][qx][1] = c2;
4614  curl[qz][qy][qx][2] = c3;
4615  }
4616  }
4617  }
4618 
4619  for (int qz = 0; qz < Q1D; ++qz)
4620  {
4621  double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
4622 
4623  osc = 0;
4624 
4625  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4626  {
4627  const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
4628  const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
4629  const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
4630 
4631  for (int dy = 0; dy < D1Dy; ++dy)
4632  {
4633  for (int dx = 0; dx < D1Dx; ++dx)
4634  {
4635  massXY[dy][dx] = 0;
4636  }
4637  }
4638  for (int qy = 0; qy < Q1D; ++qy)
4639  {
4640  double massX[HCURL_MAX_D1D];
4641  for (int dx = 0; dx < D1Dx; ++dx)
4642  {
4643  massX[dx] = 0;
4644  }
4645  for (int qx = 0; qx < Q1D; ++qx)
4646  {
4647  for (int dx = 0; dx < D1Dx; ++dx)
4648  {
4649  massX[dx] += curl[qz][qy][qx][c] *
4650  ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
4651  }
4652  }
4653  for (int dy = 0; dy < D1Dy; ++dy)
4654  {
4655  const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
4656  for (int dx = 0; dx < D1Dx; ++dx)
4657  {
4658  massXY[dy][dx] += massX[dx] * wy;
4659  }
4660  }
4661  }
4662 
4663  for (int dz = 0; dz < D1Dz; ++dz)
4664  {
4665  const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
4666  for (int dy = 0; dy < D1Dy; ++dy)
4667  {
4668  for (int dx = 0; dx < D1Dx; ++dx)
4669  {
4670  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
4671  massXY[dy][dx] * wz;
4672  }
4673  }
4674  }
4675 
4676  osc += D1Dx * D1Dy * D1Dz;
4677  } // loop c
4678  } // loop qz
4679  }); // end of element loop
4680 }
4681 
4682 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4683 {
4684  if (testType == mfem::FiniteElement::CURL &&
4685  trialType == mfem::FiniteElement::CURL && dim == 3)
4686  {
4687  const int ndata = coeffDim == 1 ? 1 : 9;
4688 
4689  if (Device::Allows(Backend::DEVICE_MASK))
4690  {
4691  const int ID = (dofs1D << 4) | quad1D;
4692  switch (ID)
4693  {
4694  case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, ndata, ne,
4695  mapsO->B, mapsC->B,
4696  mapsC->G, pa_data, x, y);
4697  case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, ndata, ne,
4698  mapsO->B, mapsC->B,
4699  mapsC->G, pa_data, x, y);
4700  case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, ndata, ne,
4701  mapsO->B, mapsC->B,
4702  mapsC->G, pa_data, x, y);
4703  case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, ndata, ne,
4704  mapsO->B, mapsC->B,
4705  mapsC->G, pa_data, x, y);
4706  default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne,
4707  mapsO->B, mapsC->B, mapsC->G,
4708  pa_data, x, y);
4709  }
4710  }
4711  else
4712  PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B,
4713  mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
4714  }
4715  else if (testType == mfem::FiniteElement::DIV &&
4716  trialType == mfem::FiniteElement::CURL && dim == 3)
4717  PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
4718  mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
4719  pa_data, x, y);
4720  else
4721  {
4722  MFEM_ABORT("Unsupported dimension or space!");
4723  }
4724 }
4725 
4726 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
4727  &trial_fes,
4728  const FiniteElementSpace &test_fes)
4729 {
4730  // Assumes tensor-product elements, with vector test and trial spaces.
4731  Mesh *mesh = trial_fes.GetMesh();
4732  const FiniteElement *trial_fel = trial_fes.GetFE(0);
4733  const FiniteElement *test_fel = test_fes.GetFE(0);
4734 
4735  const VectorTensorFiniteElement *trial_el =
4736  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
4737  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
4738 
4739  const VectorTensorFiniteElement *test_el =
4740  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
4741  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
4742 
4743  const IntegrationRule *ir
4744  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
4745  *mesh->GetElementTransformation(0));
4746  const int dims = trial_el->GetDim();
4747  MFEM_VERIFY(dims == 3, "");
4748 
4749  const int nq = ir->GetNPoints();
4750  dim = mesh->Dimension();
4751  MFEM_VERIFY(dim == 3, "");
4752 
4753  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
4754 
4755  ne = trial_fes.GetNE();
4756  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
4757  mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
4758  mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
4759  dofs1D = mapsC->ndof;
4760  quad1D = mapsC->nqpt;
4761 
4762  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
4763 
4764  coeffDim = DQ ? 3 : 1;
4765  const int ndata = DQ ? 9 : 1;
4766 
4767  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
4768 
4769  Vector coeff(coeffDim * nq * ne);
4770  coeff = 1.0;
4771  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
4772  if (Q || DQ)
4773  {
4774  Vector V(coeffDim);
4775  if (DQ)
4776  {
4777  MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
4778  }
4779 
4780  for (int e=0; e<ne; ++e)
4781  {
4782  ElementTransformation *tr = mesh->GetElementTransformation(e);
4783 
4784  for (int p=0; p<nq; ++p)
4785  {
4786  if (DQ)
4787  {
4788  DQ->Eval(V, *tr, ir->IntPoint(p));
4789  for (int i=0; i<coeffDim; ++i)
4790  {
4791  coeffh(i, p, e) = V[i];
4792  }
4793  }
4794  else
4795  {
4796  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
4797  }
4798  }
4799  }
4800  }
4801 
4802  testType = test_el->GetDerivType();
4803  trialType = trial_el->GetDerivType();
4804 
4805  if (trialType == mfem::FiniteElement::CURL && dim == 3)
4806  {
4807  if (coeffDim == 1)
4808  {
4809  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
4810  }
4811  else
4812  {
4813  PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), geom->J,
4814  coeff, pa_data);
4815  }
4816  }
4817  else
4818  {
4819  MFEM_ABORT("Unknown kernel.");
4820  }
4821 }
4822 
4823 // Apply to x corresponding to DOF's in H(curl) (trial), integrated against curl
4824 // of H(curl) test functions corresponding to y.
4825 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4826 static void PAHcurlL2Apply3DTranspose(const int D1D,
4827  const int Q1D,
4828  const int coeffDim,
4829  const int NE,
4830  const Array<double> &bo,
4831  const Array<double> &bc,
4832  const Array<double> &bot,
4833  const Array<double> &bct,
4834  const Array<double> &gct,
4835  const Vector &pa_data,
4836  const Vector &x,
4837  Vector &y)
4838 {
4839  // See PAHcurlL2Apply3D for comments.
4840 
4841  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4842  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4843 
4844  constexpr static int VDIM = 3;
4845 
4846  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4847  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4848  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
4849  auto Bct = Reshape(bct.Read(), D1D, Q1D);
4850  auto Gct = Reshape(gct.Read(), D1D, Q1D);
4851  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4852  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4853  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4854 
4855  MFEM_FORALL(e, NE,
4856  {
4857  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4858 
4859  for (int qz = 0; qz < Q1D; ++qz)
4860  {
4861  for (int qy = 0; qy < Q1D; ++qy)
4862  {
4863  for (int qx = 0; qx < Q1D; ++qx)
4864  {
4865  for (int c = 0; c < VDIM; ++c)
4866  {
4867  mass[qz][qy][qx][c] = 0.0;
4868  }
4869  }
4870  }
4871  }
4872 
4873  int osc = 0;
4874 
4875  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4876  {
4877  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4878  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4879  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4880 
4881  for (int dz = 0; dz < D1Dz; ++dz)
4882  {
4883  double massXY[MAX_Q1D][MAX_Q1D];
4884  for (int qy = 0; qy < Q1D; ++qy)
4885  {
4886  for (int qx = 0; qx < Q1D; ++qx)
4887  {
4888  massXY[qy][qx] = 0.0;
4889  }
4890  }
4891 
4892  for (int dy = 0; dy < D1Dy; ++dy)
4893  {
4894  double massX[MAX_Q1D];
4895  for (int qx = 0; qx < Q1D; ++qx)
4896  {
4897  massX[qx] = 0.0;
4898  }
4899 
4900  for (int dx = 0; dx < D1Dx; ++dx)
4901  {
4902  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4903  for (int qx = 0; qx < Q1D; ++qx)
4904  {
4905  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
4906  }
4907  }
4908 
4909  for (int qy = 0; qy < Q1D; ++qy)
4910  {
4911  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
4912  for (int qx = 0; qx < Q1D; ++qx)
4913  {
4914  const double wx = massX[qx];
4915  massXY[qy][qx] += wx * wy;
4916  }
4917  }
4918  }
4919 
4920  for (int qz = 0; qz < Q1D; ++qz)
4921  {
4922  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
4923  for (int qy = 0; qy < Q1D; ++qy)
4924  {
4925  for (int qx = 0; qx < Q1D; ++qx)
4926  {
4927  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4928  }
4929  }
4930  }
4931  }
4932 
4933  osc += D1Dx * D1Dy * D1Dz;
4934  } // loop (c) over components
4935 
4936  // Apply D operator.
4937  for (int qz = 0; qz < Q1D; ++qz)
4938  {
4939  for (int qy = 0; qy < Q1D; ++qy)
4940  {
4941  for (int qx = 0; qx < Q1D; ++qx)
4942  {
4943  const double O11 = op(0,qx,qy,qz,e);
4944  if (coeffDim == 1)
4945  {
4946  for (int c = 0; c < VDIM; ++c)
4947  {
4948  mass[qz][qy][qx][c] *= O11;
4949  }
4950  }
4951  else
4952  {
4953  const double O12 = op(1,qx,qy,qz,e);
4954  const double O13 = op(2,qx,qy,qz,e);
4955  const double O21 = op(3,qx,qy,qz,e);
4956  const double O22 = op(4,qx,qy,qz,e);
4957  const double O23 = op(5,qx,qy,qz,e);
4958  const double O31 = op(6,qx,qy,qz,e);
4959  const double O32 = op(7,qx,qy,qz,e);
4960  const double O33 = op(8,qx,qy,qz,e);
4961  const double massX = mass[qz][qy][qx][0];
4962  const double massY = mass[qz][qy][qx][1];
4963  const double massZ = mass[qz][qy][qx][2];
4964  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
4965  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
4966  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
4967  }
4968  }
4969  }
4970  }
4971 
4972  // x component
4973  osc = 0;
4974  {
4975  const int D1Dz = D1D;
4976  const int D1Dy = D1D;
4977  const int D1Dx = D1D - 1;
4978 
4979  for (int qz = 0; qz < Q1D; ++qz)
4980  {
4981  double gradXY12[MAX_D1D][MAX_D1D];
4982  double gradXY21[MAX_D1D][MAX_D1D];
4983 
4984  for (int dy = 0; dy < D1Dy; ++dy)
4985  {
4986  for (int dx = 0; dx < D1Dx; ++dx)
4987  {
4988  gradXY12[dy][dx] = 0.0;
4989  gradXY21[dy][dx] = 0.0;
4990  }
4991  }
4992  for (int qy = 0; qy < Q1D; ++qy)
4993  {
4994  double massX[MAX_D1D][2];
4995  for (int dx = 0; dx < D1Dx; ++dx)
4996  {
4997  for (int n = 0; n < 2; ++n)
4998  {
4999  massX[dx][n] = 0.0;
5000  }
5001  }
5002  for (int qx = 0; qx < Q1D; ++qx)
5003  {
5004  for (int dx = 0; dx < D1Dx; ++dx)
5005  {
5006  const double wx = Bot(dx,qx);
5007 
5008  massX[dx][0] += wx * mass[qz][qy][qx][1];
5009  massX[dx][1] += wx * mass[qz][qy][qx][2];
5010  }
5011  }
5012  for (int dy = 0; dy < D1Dy; ++dy)
5013  {
5014  const double wy = Bct(dy,qy);
5015  const double wDy = Gct(dy,qy);
5016 
5017  for (int dx = 0; dx < D1Dx; ++dx)
5018  {
5019  gradXY21[dy][dx] += massX[dx][0] * wy;
5020  gradXY12[dy][dx] += massX[dx][1] * wDy;
5021  }
5022  }
5023  }
5024 
5025  for (int dz = 0; dz < D1Dz; ++dz)
5026  {
5027  const double wz = Bct(dz,qz);
5028  const double wDz = Gct(dz,qz);
5029  for (int dy = 0; dy < D1Dy; ++dy)
5030  {
5031  for (int dx = 0; dx < D1Dx; ++dx)
5032  {
5033  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
5034  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
5035  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5036  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
5037  }
5038  }
5039  }
5040  } // loop qz
5041 
5042  osc += D1Dx * D1Dy * D1Dz;
5043  }
5044 
5045  // y component
5046  {
5047  const int D1Dz = D1D;
5048  const int D1Dy = D1D - 1;
5049  const int D1Dx = D1D;
5050 
5051  for (int qz = 0; qz < Q1D; ++qz)
5052  {
5053  double gradXY02[MAX_D1D][MAX_D1D];
5054  double gradXY20[MAX_D1D][MAX_D1D];
5055 
5056  for (int dy = 0; dy < D1Dy; ++dy)
5057  {
5058  for (int dx = 0; dx < D1Dx; ++dx)
5059  {
5060  gradXY02[dy][dx] = 0.0;
5061  gradXY20[dy][dx] = 0.0;
5062  }
5063  }
5064  for (int qx = 0; qx < Q1D; ++qx)
5065  {
5066  double massY[MAX_D1D][2];
5067  for (int dy = 0; dy < D1Dy; ++dy)
5068  {
5069  massY[dy][0] = 0.0;
5070  massY[dy][1] = 0.0;
5071  }
5072  for (int qy = 0; qy < Q1D; ++qy)
5073  {
5074  for (int dy = 0; dy < D1Dy; ++dy)
5075  {
5076  const double wy = Bot(dy,qy);
5077 
5078  massY[dy][0] += wy * mass[qz][qy][qx][2];
5079  massY[dy][1] += wy * mass[qz][qy][qx][0];
5080  }
5081  }
5082  for (int dx = 0; dx < D1Dx; ++dx)
5083  {
5084  const double wx = Bct(dx,qx);
5085  const double wDx = Gct(dx,qx);
5086 
5087  for (int dy = 0; dy < D1Dy; ++dy)
5088  {
5089  gradXY02[dy][dx] += massY[dy][0] * wDx;
5090  gradXY20[dy][dx] += massY[dy][1] * wx;
5091  }
5092  }
5093  }
5094 
5095  for (int dz = 0; dz < D1Dz; ++dz)
5096  {
5097  const double wz = Bct(dz,qz);
5098  const double wDz = Gct(dz,qz);
5099  for (int dy = 0; dy < D1Dy; ++dy)
5100  {
5101  for (int dx = 0; dx < D1Dx; ++dx)
5102  {
5103  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
5104  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
5105  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5106  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
5107  }
5108  }
5109  }
5110  } // loop qz
5111 
5112  osc += D1Dx * D1Dy * D1Dz;
5113  }
5114 
5115  // z component
5116  {
5117  const int D1Dz = D1D - 1;
5118  const int D1Dy = D1D;
5119  const int D1Dx = D1D;
5120 
5121  for (int qx = 0; qx < Q1D; ++qx)
5122  {
5123  double gradYZ01[MAX_D1D][MAX_D1D];
5124  double gradYZ10[MAX_D1D][MAX_D1D];
5125 
5126  for (int dy = 0; dy < D1Dy; ++dy)
5127  {
5128  for (int dz = 0; dz < D1Dz; ++dz)
5129  {
5130  gradYZ01[dz][dy] = 0.0;
5131  gradYZ10[dz][dy] = 0.0;
5132  }
5133  }
5134  for (int qy = 0; qy < Q1D; ++qy)
5135  {
5136  double massZ[MAX_D1D][2];
5137  for (int dz = 0; dz < D1Dz; ++dz)
5138  {
5139  for (int n = 0; n < 2; ++n)
5140  {
5141  massZ[dz][n] = 0.0;
5142  }
5143  }
5144  for (int qz = 0; qz < Q1D; ++qz)
5145  {
5146  for (int dz = 0; dz < D1Dz; ++dz)
5147  {
5148  const double wz = Bot(dz,qz);
5149 
5150  massZ[dz][0] += wz * mass[qz][qy][qx][0];
5151  massZ[dz][1] += wz * mass[qz][qy][qx][1];
5152  }
5153  }
5154  for (int dy = 0; dy < D1Dy; ++dy)
5155  {
5156  const double wy = Bct(dy,qy);
5157  const double wDy = Gct(dy,qy);
5158 
5159  for (int dz = 0; dz < D1Dz; ++dz)
5160  {
5161  gradYZ01[dz][dy] += wy * massZ[dz][1];
5162  gradYZ10[dz][dy] += wDy * massZ[dz][0];
5163  }
5164  }
5165  }
5166 
5167  for (int dx = 0; dx < D1Dx; ++dx)
5168  {
5169  const double wx = Bct(dx,qx);
5170  const double wDx = Gct(dx,qx);
5171 
5172  for (int dy = 0; dy < D1Dy; ++dy)
5173  {
5174  for (int dz = 0; dz < D1Dz; ++dz)
5175  {
5176  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
5177  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
5178  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5179  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
5180  }
5181  }
5182  }
5183  } // loop qx
5184  }
5185  });
5186 }
5187 
5188 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
5189 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
5190  const int Q1D,
5191  const int coeffDim,
5192  const int NE,
5193  const Array<double> &bo,
5194  const Array<double> &bc,
5195  const Array<double> &gc,
5196  const Vector &pa_data,
5197  const Vector &x,
5198  Vector &y)
5199 {
5200  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
5201  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
5202 
5203  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
5204  auto Bc = Reshape(bc.Read(), Q1D, D1D);
5205  auto Gc = Reshape(gc.Read(), Q1D, D1D);
5206  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
5207  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
5208  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
5209 
5210  auto device_kernel = [=] MFEM_DEVICE (int e)
5211  {
5212  constexpr int VDIM = 3;
5213  constexpr int maxCoeffDim = 9;
5214 
5215  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
5216  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
5217  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
5218 
5219  double opc[maxCoeffDim];
5220  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
5221  MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
5222 
5223  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
5224 
5225  MFEM_FOREACH_THREAD(qx,x,Q1D)
5226  {
5227  MFEM_FOREACH_THREAD(qy,y,Q1D)
5228  {
5229  MFEM_FOREACH_THREAD(qz,z,Q1D)
5230  {
5231  for (int i=0; i<coeffDim; ++i)
5232  {
5233  opc[i] = op(i,qx,qy,qz,e);
5234  }
5235  }
5236  }
5237  }
5238 
5239  const int tidx = MFEM_THREAD_ID(x);
5240  const int tidy = MFEM_THREAD_ID(y);
5241  const int tidz = MFEM_THREAD_ID(z);
5242 
5243  if (tidz == 0)
5244  {
5245  MFEM_FOREACH_THREAD(d,y,D1D)
5246  {
5247  MFEM_FOREACH_THREAD(q,x,Q1D)
5248  {
5249  sBc[d][q] = Bc(q,d);
5250  sGc[d][q] = Gc(q,d);
5251  if (d < D1D-1)
5252  {
5253  sBo[d][q] = Bo(q,d);
5254  }
5255  }
5256  }
5257  }
5258  MFEM_SYNC_THREAD;
5259 
5260  for (int qz=0; qz < Q1D; ++qz)
5261  {
5262  if (tidz == qz)
5263  {
5264  MFEM_FOREACH_THREAD(qy,y,Q1D)
5265  {
5266  MFEM_FOREACH_THREAD(qx,x,Q1D)
5267  {
5268  for (int i=0; i<3; ++i)
5269  {
5270  mass[qy][qx][i] = 0.0;
5271  }
5272  }
5273  }
5274  }
5275 
5276  int osc = 0;
5277  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
5278  {
5279  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
5280  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
5281  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
5282 
5283  MFEM_FOREACH_THREAD(dz,z,D1Dz)
5284  {
5285  MFEM_FOREACH_THREAD(dy,y,D1Dy)
5286  {
5287  MFEM_FOREACH_THREAD(dx,x,D1Dx)
5288  {
5289  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
5290  }
5291  }
5292  }
5293  MFEM_SYNC_THREAD;
5294 
5295  if (tidz == qz)
5296  {
5297  if (c == 0)
5298  {
5299  for (int i=0; i<coeffDim; ++i)
5300  {
5301  sop[i][tidx][tidy] = opc[i];
5302  }
5303  }
5304 
5305  MFEM_FOREACH_THREAD(qy,y,Q1D)
5306  {
5307  MFEM_FOREACH_THREAD(qx,x,Q1D)
5308  {
5309  double u = 0.0;
5310 
5311  for (int dz = 0; dz < D1Dz; ++dz)
5312  {
5313  const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
5314 
5315  for (int dy = 0; dy < D1Dy; ++dy)
5316  {
5317  const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
5318 
5319  for (int dx = 0; dx < D1Dx; ++dx)
5320  {
5321  const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
5322  u += wx * wy * wz;
5323  }
5324  }
5325  }
5326 
5327  mass[qy][qx][c] += u;
5328  } // qx
5329  } // qy
5330  } // tidz == qz
5331 
5332  osc += D1Dx * D1Dy * D1Dz;
5333  MFEM_SYNC_THREAD;
5334  } // c
5335 
5336  double dxyz1 = 0.0;
5337  double dxyz2 = 0.0;
5338  double dxyz3 = 0.0;
5339 
5340  MFEM_FOREACH_THREAD(dz,z,D1D)
5341  {
5342  const double wcz = sBc[dz][qz];
5343  const double wcDz = sGc[dz][qz];
5344  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
5345 
5346  MFEM_FOREACH_THREAD(dy,y,D1D)
5347  {
5348  MFEM_FOREACH_THREAD(dx,x,D1D)
5349  {
5350  for (int qy = 0; qy < Q1D; ++qy)
5351  {
5352  const double wcy = sBc[dy][qy];
5353  const double wcDy = sGc[dy][qy];
5354  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
5355 
5356  for (int qx = 0; qx < Q1D; ++qx)
5357  {
5358  const double O11 = sop[0][qx][qy];
5359  double c1, c2, c3;
5360  if (coeffDim == 1)
5361  {
5362  c1 = O11 * mass[qy][qx][0];
5363  c2 = O11 * mass[qy][qx][1];
5364  c3 = O11 * mass[qy][qx][2];
5365  }
5366  else
5367  {
5368  const double O12 = sop[1][qx][qy];
5369  const double O13 = sop[2][qx][qy];
5370  const double O21 = sop[3][qx][qy];
5371  const double O22 = sop[4][qx][qy];
5372  const double O23 = sop[5][qx][qy];
5373  const double O31 = sop[6][qx][qy];
5374  const double O32 = sop[7][qx][qy];
5375  const double O33 = sop[8][qx][qy];
5376 
5377  c1 = (O11*mass[qy][qx][0])+(O12*mass[qy][qx][1])+(O13*mass[qy][qx][2]);
5378  c2 = (O21*mass[qy][qx][0])+(O22*mass[qy][qx][1])+(O23*mass[qy][qx][2]);
5379  c3 = (O31*mass[qy][qx][0])+(O32*mass[qy][qx][1])+(O33*mass[qy][qx][2]);
5380  }
5381 
5382  const double wcx = sBc[dx][qx];
5383  const double wDx = sGc[dx][qx];
5384 
5385  if (dx < D1D-1)
5386  {
5387  const double wx = sBo[dx][qx];
5388  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
5389  }
5390 
5391  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
5392 
5393  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
5394  } // qx
5395  } // qy
5396  } // dx
5397  } // dy
5398  } // dz
5399 
5400  MFEM_SYNC_THREAD;
5401 
5402  MFEM_FOREACH_THREAD(dz,z,D1D)
5403  {
5404  MFEM_FOREACH_THREAD(dy,y,D1D)
5405  {
5406  MFEM_FOREACH_THREAD(dx,x,D1D)
5407  {
5408  if (dx < D1D-1)
5409  {
5410  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
5411  }
5412  if (dy < D1D-1)
5413  {
5414  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
5415  }
5416  if (dz < D1D-1)
5417  {
5418  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
5419  }
5420  }
5421  }
5422  }
5423  } // qz
5424  }; // end of element loop
5425 
5426  auto host_kernel = [&] MFEM_LAMBDA (int)
5427  {
5428  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
5429  };
5430 
5431  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
5432 }
5433 
5434 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
5435 {
5436  if (testType == mfem::FiniteElement::CURL &&
5437  trialType == mfem::FiniteElement::CURL && dim == 3)
5438  {
5439  const int ndata = coeffDim == 1 ? 1 : 9;
5440  if (Device::Allows(Backend::DEVICE_MASK))
5441  {
5442  const int ID = (dofs1D << 4) | quad1D;
5443  switch (ID)
5444  {
5445  case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, ndata,
5446  ne, mapsO->B, mapsC->B,
5447  mapsC->G, pa_data, x, y);
5448  case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, ndata,
5449  ne, mapsO->B, mapsC->B,
5450  mapsC->G, pa_data, x, y);
5451  case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, ndata,
5452  ne, mapsO->B, mapsC->B,
5453  mapsC->G, pa_data, x, y);
5454  case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, ndata,
5455  ne, mapsO->B, mapsC->B,
5456  mapsC->G, pa_data, x, y);
5457  default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne,
5458  mapsO->B, mapsC->B,
5459  mapsC->G, pa_data, x, y);
5460  }
5461  }
5462  else
5463  PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B,
5464  mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
5465  }
5466  else
5467  {
5468  MFEM_ABORT("Unsupported dimension or space!");
5469  }
5470 }
5471 
5472 // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient
5473 // to get a dof in H(curl) (range). You can think of the range as the "test" space
5474 // and the domain as the "trial" space, but there's no integration.
5475 static void PAHcurlApplyGradient2D(const int c_dofs1D,
5476  const int o_dofs1D,
5477  const int NE,
5478  const Array<double> &B_,
5479  const Array<double> &G_,
5480  const Vector &x_,
5481  Vector &y_)
5482 {
5483  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5484  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5485 
5486  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5487  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5488 
5489  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5490  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5491 
5492  MFEM_FORALL(e, NE,
5493  {
5494  double w[MAX_D1D][MAX_D1D];
5495 
5496  // horizontal part
5497  for (int dx = 0; dx < c_dofs1D; ++dx)
5498  {
5499  for (int ey = 0; ey < c_dofs1D; ++ey)
5500  {
5501  w[dx][ey] = 0.0;
5502  for (int dy = 0; dy < c_dofs1D; ++dy)
5503  {
5504  w[dx][ey] += B(ey, dy) * x(dx, dy, e);
5505  }
5506  }
5507  }
5508 
5509  for (int ey = 0; ey < c_dofs1D; ++ey)
5510  {
5511  for (int ex = 0; ex < o_dofs1D; ++ex)
5512  {
5513  double s = 0.0;
5514  for (int dx = 0; dx < c_dofs1D; ++dx)
5515  {
5516  s += G(ex, dx) * w[dx][ey];
5517  }
5518  const int local_index = ey*o_dofs1D + ex;
5519  y(local_index, e) += s;
5520  }
5521  }
5522 
5523  // vertical part
5524  for (int dx = 0; dx < c_dofs1D; ++dx)
5525  {
5526  for (int ey = 0; ey < o_dofs1D; ++ey)
5527  {
5528  w[dx][ey] = 0.0;
5529  for (int dy = 0; dy < c_dofs1D; ++dy)
5530  {
5531  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5532  }
5533  }
5534  }
5535 
5536  for (int ey = 0; ey < o_dofs1D; ++ey)
5537  {
5538  for (int ex = 0; ex < c_dofs1D; ++ex)
5539  {
5540  double s = 0.0;
5541  for (int dx = 0; dx < c_dofs1D; ++dx)
5542  {
5543  s += B(ex, dx) * w[dx][ey];
5544  }
5545  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5546  y(local_index, e) += s;
5547  }
5548  }
5549  });
5550 }
5551 
5552 // Specialization of PAHcurlApplyGradient2D to the case where B is identity
5553 static void PAHcurlApplyGradient2DBId(const int c_dofs1D,
5554  const int o_dofs1D,
5555  const int NE,
5556  const Array<double> &G_,
5557  const Vector &x_,
5558  Vector &y_)
5559 {
5560  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5561 
5562  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5563  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5564 
5565  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5566  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5567 
5568  MFEM_FORALL(e, NE,
5569  {
5570  double w[MAX_D1D][MAX_D1D];
5571 
5572  // horizontal part
5573  for (int dx = 0; dx < c_dofs1D; ++dx)
5574  {
5575  for (int ey = 0; ey < c_dofs1D; ++ey)
5576  {
5577  const int dy = ey;
5578  w[dx][ey] = x(dx, dy, e);
5579  }
5580  }
5581 
5582  for (int ey = 0; ey < c_dofs1D; ++ey)
5583  {
5584  for (int ex = 0; ex < o_dofs1D; ++ex)
5585  {
5586  double s = 0.0;
5587  for (int dx = 0; dx < c_dofs1D; ++dx)
5588  {
5589  s += G(ex, dx) * w[dx][ey];
5590  }
5591  const int local_index = ey*o_dofs1D + ex;
5592  y(local_index, e) += s;
5593  }
5594  }
5595 
5596  // vertical part
5597  for (int dx = 0; dx < c_dofs1D; ++dx)
5598  {
5599  for (int ey = 0; ey < o_dofs1D; ++ey)
5600  {
5601  w[dx][ey] = 0.0;
5602  for (int dy = 0; dy < c_dofs1D; ++dy)
5603  {
5604  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5605  }
5606  }
5607  }
5608 
5609  for (int ey = 0; ey < o_dofs1D; ++ey)
5610  {
5611  for (int ex = 0; ex < c_dofs1D; ++ex)
5612  {
5613  const int dx = ex;
5614  const double s = w[dx][ey];
5615  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5616  y(local_index, e) += s;
5617  }
5618  }
5619  });
5620 }
5621 
5622 static void PAHcurlApplyGradientTranspose2D(
5623  const int c_dofs1D, const int o_dofs1D, const int NE,
5624  const Array<double> &B_, const Array<double> &G_,
5625  const Vector &x_, Vector &y_)
5626 {
5627  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5628  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5629 
5630  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5631  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5632 
5633  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5634  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5635 
5636  MFEM_FORALL(e, NE,
5637  {
5638  double w[MAX_D1D][MAX_D1D];
5639 
5640  // horizontal part (open x, closed y)
5641  for (int dy = 0; dy < c_dofs1D; ++dy)
5642  {
5643  for (int ex = 0; ex < o_dofs1D; ++ex)
5644  {
5645  w[dy][ex] = 0.0;
5646  for (int ey = 0; ey < c_dofs1D; ++ey)
5647  {
5648  const int local_index = ey*o_dofs1D + ex;
5649  w[dy][ex] += B(ey, dy) * x(local_index, e);
5650  }
5651  }
5652  }
5653 
5654  for (int dy = 0; dy < c_dofs1D; ++dy)
5655  {
5656  for (int dx = 0; dx < c_dofs1D; ++dx)
5657  {
5658  double s = 0.0;
5659  for (int ex = 0; ex < o_dofs1D; ++ex)
5660  {
5661  s += G(ex, dx) * w[dy][ex];
5662  }
5663  y(dx, dy, e) += s;
5664  }
5665  }
5666 
5667  // vertical part (open y, closed x)
5668  for (int dy = 0; dy < c_dofs1D; ++dy)
5669  {
5670  for (int ex = 0; ex < c_dofs1D; ++ex)
5671  {
5672  w[dy][ex] = 0.0;
5673  for (int ey = 0; ey < o_dofs1D; ++ey)
5674  {
5675  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5676  w[dy][ex] += G(ey, dy) * x(local_index, e);
5677  }
5678  }
5679  }
5680 
5681  for (int dy = 0; dy < c_dofs1D; ++dy)
5682  {
5683  for (int dx = 0; dx < c_dofs1D; ++dx)
5684  {
5685  double s = 0.0;
5686  for (int ex = 0; ex < c_dofs1D; ++ex)
5687  {
5688  s += B(ex, dx) * w[dy][ex];
5689  }
5690  y(dx, dy, e) += s;
5691  }
5692  }
5693  });
5694 }
5695 
5696 // Specialization of PAHcurlApplyGradientTranspose2D to the case where
5697 // B is identity
5698 static void PAHcurlApplyGradientTranspose2DBId(
5699  const int c_dofs1D, const int o_dofs1D, const int NE,
5700  const Array<double> &G_,
5701  const Vector &x_, Vector &y_)
5702 {
5703  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5704 
5705  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5706  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5707 
5708  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5709  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5710 
5711  MFEM_FORALL(e, NE,
5712  {
5713  double w[MAX_D1D][MAX_D1D];
5714 
5715  // horizontal part (open x, closed y)
5716  for (int dy = 0; dy < c_dofs1D; ++dy)
5717  {
5718  for (int ex = 0; ex < o_dofs1D; ++ex)
5719  {
5720  const int ey = dy;
5721  const int local_index = ey*o_dofs1D + ex;
5722  w[dy][ex] = x(local_index, e);
5723  }
5724  }
5725 
5726  for (int dy = 0; dy < c_dofs1D; ++dy)
5727  {
5728  for (int dx = 0; dx < c_dofs1D; ++dx)
5729  {
5730  double s = 0.0;
5731  for (int ex = 0; ex < o_dofs1D; ++ex)
5732  {
5733  s += G(ex, dx) * w[dy][ex];
5734  }
5735  y(dx, dy, e) += s;
5736  }
5737  }
5738 
5739  // vertical part (open y, closed x)
5740  for (int dy = 0; dy < c_dofs1D; ++dy)
5741  {
5742  for (int ex = 0; ex < c_dofs1D; ++ex)
5743  {
5744  w[dy][ex] = 0.0;
5745  for (int ey = 0; ey < o_dofs1D; ++ey)
5746  {
5747  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5748  w[dy][ex] += G(ey, dy) * x(local_index, e);
5749  }
5750  }
5751  }
5752 
5753  for (int dy = 0; dy < c_dofs1D; ++dy)
5754  {
5755  for (int dx = 0; dx < c_dofs1D; ++dx)
5756  {
5757  const int ex = dx;
5758  const double s = w[dy][ex];
5759  y(dx, dy, e) += s;
5760  }
5761  }
5762  });
5763 }
5764 
5765 static void PAHcurlApplyGradient3D(const int c_dofs1D,
5766  const int o_dofs1D,
5767  const int NE,
5768  const Array<double> &B_,
5769  const Array<double> &G_,
5770  const Vector &x_,
5771  Vector &y_)
5772 {
5773  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5774  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5775 
5776  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5777  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5778 
5779  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5780  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5781 
5782  MFEM_FORALL(e, NE,
5783  {
5784  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5785  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5786 
5787  // ---
5788  // dofs that point parallel to x-axis (open in x, closed in y, z)
5789  // ---
5790 
5791  // contract in z
5792  for (int ez = 0; ez < c_dofs1D; ++ez)
5793  {
5794  for (int dx = 0; dx < c_dofs1D; ++dx)
5795  {
5796  for (int dy = 0; dy < c_dofs1D; ++dy)
5797  {
5798  w1[dx][dy][ez] = 0.0;
5799  for (int dz = 0; dz < c_dofs1D; ++dz)
5800  {
5801  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5802  }
5803  }
5804  }
5805  }
5806 
5807  // contract in y
5808  for (int ez = 0; ez < c_dofs1D; ++ez)
5809  {
5810  for (int ey = 0; ey < c_dofs1D; ++ey)
5811  {
5812  for (int dx = 0; dx < c_dofs1D; ++dx)
5813  {
5814  w2[dx][ey][ez] = 0.0;
5815  for (int dy = 0; dy < c_dofs1D; ++dy)
5816  {
5817  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5818  }
5819  }
5820  }
5821  }
5822 
5823  // contract in x
5824  for (int ez = 0; ez < c_dofs1D; ++ez)
5825  {
5826  for (int ey = 0; ey < c_dofs1D; ++ey)
5827  {
5828  for (int ex = 0; ex < o_dofs1D; ++ex)
5829  {
5830  double s = 0.0;
5831  for (int dx = 0; dx < c_dofs1D; ++dx)
5832  {
5833  s += G(ex, dx) * w2[dx][ey][ez];
5834  }
5835  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5836  y(local_index, e) += s;
5837  }
5838  }
5839  }
5840 
5841  // ---
5842  // dofs that point parallel to y-axis (open in y, closed in x, z)
5843  // ---
5844 
5845  // contract in z
5846  for (int ez = 0; ez < c_dofs1D; ++ez)
5847  {
5848  for (int dx = 0; dx < c_dofs1D; ++dx)
5849  {
5850  for (int dy = 0; dy < c_dofs1D; ++dy)
5851  {
5852  w1[dx][dy][ez] = 0.0;
5853  for (int dz = 0; dz < c_dofs1D; ++dz)
5854  {
5855  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5856  }
5857  }
5858  }
5859  }
5860 
5861  // contract in y
5862  for (int ez = 0; ez < c_dofs1D; ++ez)
5863  {
5864  for (int ey = 0; ey < o_dofs1D; ++ey)
5865  {
5866  for (int dx = 0; dx < c_dofs1D; ++dx)
5867  {
5868  w2[dx][ey][ez] = 0.0;
5869  for (int dy = 0; dy < c_dofs1D; ++dy)
5870  {
5871  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5872  }
5873  }
5874  }
5875  }
5876 
5877  // contract in x
5878  for (int ez = 0; ez < c_dofs1D; ++ez)
5879  {
5880  for (int ey = 0; ey < o_dofs1D; ++ey)
5881  {
5882  for (int ex = 0; ex < c_dofs1D; ++ex)
5883  {
5884  double s = 0.0;
5885  for (int dx = 0; dx < c_dofs1D; ++dx)
5886  {
5887  s += B(ex, dx) * w2[dx][ey][ez];
5888  }
5889  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5890  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5891  y(local_index, e) += s;
5892  }
5893  }
5894  }
5895 
5896  // ---
5897  // dofs that point parallel to z-axis (open in z, closed in x, y)
5898  // ---
5899 
5900  // contract in z
5901  for (int ez = 0; ez < o_dofs1D; ++ez)
5902  {
5903  for (int dx = 0; dx < c_dofs1D; ++dx)
5904  {
5905  for (int dy = 0; dy < c_dofs1D; ++dy)
5906  {
5907  w1[dx][dy][ez] = 0.0;
5908  for (int dz = 0; dz < c_dofs1D; ++dz)
5909  {
5910  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5911  }
5912  }
5913  }
5914  }
5915 
5916  // contract in y
5917  for (int ez = 0; ez < o_dofs1D; ++ez)
5918  {
5919  for (int ey = 0; ey < c_dofs1D; ++ey)
5920  {
5921  for (int dx = 0; dx < c_dofs1D; ++dx)
5922  {
5923  w2[dx][ey][ez] = 0.0;
5924  for (int dy = 0; dy < c_dofs1D; ++dy)
5925  {
5926  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5927  }
5928  }
5929  }
5930  }
5931 
5932  // contract in x
5933  for (int ez = 0; ez < o_dofs1D; ++ez)
5934  {
5935  for (int ey = 0; ey < c_dofs1D; ++ey)
5936  {
5937  for (int ex = 0; ex < c_dofs1D; ++ex)
5938  {
5939  double s = 0.0;
5940  for (int dx = 0; dx < c_dofs1D; ++dx)
5941  {
5942  s += B(ex, dx) * w2[dx][ey][ez];
5943  }
5944  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5945  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5946  y(local_index, e) += s;
5947  }
5948  }
5949  }
5950  });
5951 }
5952 
5953 // Specialization of PAHcurlApplyGradient3D to the case where
5954 static void PAHcurlApplyGradient3DBId(const int c_dofs1D,
5955  const int o_dofs1D,
5956  const int NE,
5957  const Array<double> &G_,
5958  const Vector &x_,
5959  Vector &y_)
5960 {
5961  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5962 
5963  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5964  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5965 
5966  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5967  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5968 
5969  MFEM_FORALL(e, NE,
5970  {
5971  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5972  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5973 
5974  // ---
5975  // dofs that point parallel to x-axis (open in x, closed in y, z)
5976  // ---
5977 
5978  // contract in z
5979  for (int ez = 0; ez < c_dofs1D; ++ez)
5980  {
5981  for (int dx = 0; dx < c_dofs1D; ++dx)
5982  {
5983  for (int dy = 0; dy < c_dofs1D; ++dy)
5984  {
5985  const int dz = ez;
5986  w1[dx][dy][ez] = x(dx, dy, dz, e);
5987  }
5988  }
5989  }
5990 
5991  // contract in y
5992  for (int ez = 0; ez < c_dofs1D; ++ez)
5993  {
5994  for (int ey = 0; ey < c_dofs1D; ++ey)
5995  {
5996  for (int dx = 0; dx < c_dofs1D; ++dx)
5997  {
5998  const int dy = ey;
5999  w2[dx][ey][ez] = w1[dx][dy][ez];
6000  }
6001  }
6002  }
6003 
6004  // contract in x
6005  for (int ez = 0; ez < c_dofs1D; ++ez)
6006  {
6007  for (int ey = 0; ey < c_dofs1D; ++ey)
6008  {
6009  for (int ex = 0; ex < o_dofs1D; ++ex)
6010  {
6011  double s = 0.0;
6012  for (int dx = 0; dx < c_dofs1D; ++dx)
6013  {
6014  s += G(ex, dx) * w2[dx][ey][ez];
6015  }
6016  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6017  y(local_index, e) += s;
6018  }
6019  }
6020  }
6021 
6022  // ---
6023  // dofs that point parallel to y-axis (open in y, closed in x, z)
6024  // ---
6025 
6026  // contract in z
6027  for (int ez = 0; ez < c_dofs1D; ++ez)
6028  {
6029  for (int dx = 0; dx < c_dofs1D; ++dx)
6030  {
6031  for (int dy = 0; dy < c_dofs1D; ++dy)
6032  {
6033  const int dz = ez;
6034  w1[dx][dy][ez] = x(dx, dy, dz, e);
6035  }
6036  }
6037  }
6038 
6039  // contract in y
6040  for (int ez = 0; ez < c_dofs1D; ++ez)
6041  {
6042  for (int ey = 0; ey < o_dofs1D; ++ey)
6043  {
6044  for (int dx = 0; dx < c_dofs1D; ++dx)
6045  {
6046  w2[dx][ey][ez] = 0.0;
6047  for (int dy = 0; dy < c_dofs1D; ++dy)
6048  {
6049  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
6050  }
6051  }
6052  }
6053  }
6054 
6055  // contract in x
6056  for (int ez = 0; ez < c_dofs1D; ++ez)
6057  {
6058  for (int ey = 0; ey < o_dofs1D; ++ey)
6059  {
6060  for (int ex = 0; ex < c_dofs1D; ++ex)
6061  {
6062  const int dx = ex;
6063  const double s = w2[dx][ey][ez];
6064  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6065  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6066  y(local_index, e) += s;
6067  }
6068  }
6069  }
6070 
6071  // ---
6072  // dofs that point parallel to z-axis (open in z, closed in x, y)
6073  // ---
6074 
6075  // contract in z
6076  for (int ez = 0; ez < o_dofs1D; ++ez)
6077  {
6078  for (int dx = 0; dx < c_dofs1D; ++dx)
6079  {
6080  for (int dy = 0; dy < c_dofs1D; ++dy)
6081  {
6082  w1[dx][dy][ez] = 0.0;
6083  for (int dz = 0; dz < c_dofs1D; ++dz)
6084  {
6085  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
6086  }
6087  }
6088  }
6089  }
6090 
6091  // contract in y
6092  for (int ez = 0; ez < o_dofs1D; ++ez)
6093  {
6094  for (int ey = 0; ey < c_dofs1D; ++ey)
6095  {
6096  for (int dx = 0; dx < c_dofs1D; ++dx)
6097  {
6098  const int dy = ey;
6099  w2[dx][ey][ez] = w1[dx][dy][ez];
6100  }
6101  }
6102  }
6103 
6104  // contract in x
6105  for (int ez = 0; ez < o_dofs1D; ++ez)
6106  {
6107  for (int ey = 0; ey < c_dofs1D; ++ey)
6108  {
6109  for (int ex = 0; ex < c_dofs1D; ++ex)
6110  {
6111  const int dx = ex;
6112  const double s = w2[dx][ey][ez];
6113  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6114  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6115  y(local_index, e) += s;
6116  }
6117  }
6118  }
6119  });
6120 }
6121 
6122 static void PAHcurlApplyGradientTranspose3D(
6123  const int c_dofs1D, const int o_dofs1D, const int NE,
6124  const Array<double> &B_, const Array<double> &G_,
6125  const Vector &x_, Vector &y_)
6126 {
6127  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
6128  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6129 
6130  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6131  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6132 
6133  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6134  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6135 
6136  MFEM_FORALL(e, NE,
6137  {
6138  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6139  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6140  // ---
6141  // dofs that point parallel to x-axis (open in x, closed in y, z)
6142  // ---
6143 
6144  // contract in z
6145  for (int dz = 0; dz < c_dofs1D; ++dz)
6146  {
6147  for (int ex = 0; ex < o_dofs1D; ++ex)
6148  {
6149  for (int ey = 0; ey < c_dofs1D; ++ey)
6150  {
6151  w1[ex][ey][dz] = 0.0;
6152  for (int ez = 0; ez < c_dofs1D; ++ez)
6153  {
6154  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6155  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6156  }
6157  }
6158  }
6159  }
6160 
6161  // contract in y
6162  for (int dz = 0; dz < c_dofs1D; ++dz)
6163  {
6164  for (int dy = 0; dy < c_dofs1D; ++dy)
6165  {
6166  for (int ex = 0; ex < o_dofs1D; ++ex)
6167  {
6168  w2[ex][dy][dz] = 0.0;
6169  for (int ey = 0; ey < c_dofs1D; ++ey)
6170  {
6171  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6172  }
6173  }
6174  }
6175  }
6176 
6177  // contract in x
6178  for (int dz = 0; dz < c_dofs1D; ++dz)
6179  {
6180  for (int dy = 0; dy < c_dofs1D; ++dy)
6181  {
6182  for (int dx = 0; dx < c_dofs1D; ++dx)
6183  {
6184  double s = 0.0;
6185  for (int ex = 0; ex < o_dofs1D; ++ex)
6186  {
6187  s += G(ex, dx) * w2[ex][dy][dz];
6188  }
6189  y(dx, dy, dz, e) += s;
6190  }
6191  }
6192  }
6193 
6194  // ---
6195  // dofs that point parallel to y-axis (open in y, closed in x, z)
6196  // ---
6197 
6198  // contract in z
6199  for (int dz = 0; dz < c_dofs1D; ++dz)
6200  {
6201  for (int ex = 0; ex < c_dofs1D; ++ex)
6202  {
6203  for (int ey = 0; ey < o_dofs1D; ++ey)
6204  {
6205  w1[ex][ey][dz] = 0.0;
6206  for (int ez = 0; ez < c_dofs1D; ++ez)
6207  {
6208  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6209  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6210  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6211  }
6212  }
6213  }
6214  }
6215 
6216  // contract in y
6217  for (int dz = 0; dz < c_dofs1D; ++dz)
6218  {
6219  for (int dy = 0; dy < c_dofs1D; ++dy)
6220  {
6221  for (int ex = 0; ex < c_dofs1D; ++ex)
6222  {
6223  w2[ex][dy][dz] = 0.0;
6224  for (int ey = 0; ey < o_dofs1D; ++ey)
6225  {
6226  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6227  }
6228  }
6229  }
6230  }
6231 
6232  // contract in x
6233  for (int dz = 0; dz < c_dofs1D; ++dz)
6234  {
6235  for (int dy = 0; dy < c_dofs1D; ++dy)
6236  {
6237  for (int dx = 0; dx < c_dofs1D; ++dx)
6238  {
6239  double s = 0.0;
6240  for (int ex = 0; ex < c_dofs1D; ++ex)
6241  {
6242  s += B(ex, dx) * w2[ex][dy][dz];
6243  }
6244  y(dx, dy, dz, e) += s;
6245  }
6246  }
6247  }
6248 
6249  // ---
6250  // dofs that point parallel to z-axis (open in z, closed in x, y)
6251  // ---
6252 
6253  // contract in z
6254  for (int dz = 0; dz < c_dofs1D; ++dz)
6255  {
6256  for (int ex = 0; ex < c_dofs1D; ++ex)
6257  {
6258  for (int ey = 0; ey < c_dofs1D; ++ey)
6259  {
6260  w1[ex][ey][dz] = 0.0;
6261  for (int ez = 0; ez < o_dofs1D; ++ez)
6262  {
6263  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6264  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6265  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6266  }
6267  }
6268  }
6269  }
6270 
6271  // contract in y
6272  for (int dz = 0; dz < c_dofs1D; ++dz)
6273  {
6274  for (int dy = 0; dy < c_dofs1D; ++dy)
6275  {
6276  for (int ex = 0; ex < c_dofs1D; ++ex)
6277  {
6278  w2[ex][dy][dz] = 0.0;
6279  for (int ey = 0; ey < c_dofs1D; ++ey)
6280  {
6281  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6282  }
6283  }
6284  }
6285  }
6286 
6287  // contract in x
6288  for (int dz = 0; dz < c_dofs1D; ++dz)
6289  {
6290  for (int dy = 0; dy < c_dofs1D; ++dy)
6291  {
6292  for (int dx = 0; dx < c_dofs1D; ++dx)
6293  {
6294  double s = 0.0;
6295  for (int ex = 0; ex < c_dofs1D; ++ex)
6296  {
6297  s += B(ex, dx) * w2[ex][dy][dz];
6298  }
6299  y(dx, dy, dz, e) += s;
6300  }
6301  }
6302  }
6303  });
6304 }
6305 
6306 // Specialization of PAHcurlApplyGradientTranspose3D to the case where
6307 static void PAHcurlApplyGradientTranspose3DBId(
6308  const int c_dofs1D, const int o_dofs1D, const int NE,
6309  const Array<double> &G_,
6310  const Vector &x_, Vector &y_)
6311 {
6312  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6313 
6314  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6315  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6316 
6317  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6318  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6319 
6320  MFEM_FORALL(e, NE,
6321  {
6322  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6323  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6324  // ---
6325  // dofs that point parallel to x-axis (open in x, closed in y, z)
6326  // ---
6327 
6328  // contract in z
6329  for (int dz = 0; dz < c_dofs1D; ++dz)
6330  {
6331  for (int ex = 0; ex < o_dofs1D; ++ex)
6332  {
6333  for (int ey = 0; ey < c_dofs1D; ++ey)
6334  {
6335  const int ez = dz;
6336  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6337  w1[ex][ey][dz] = x(local_index, e);
6338  }
6339  }
6340  }
6341 
6342  // contract in y
6343  for (int dz = 0; dz < c_dofs1D; ++dz)
6344  {
6345  for (int dy = 0; dy < c_dofs1D; ++dy)
6346  {
6347  for (int ex = 0; ex < o_dofs1D; ++ex)
6348  {
6349  const int ey = dy;
6350  w2[ex][dy][dz] = w1[ex][ey][dz];
6351  }
6352  }
6353  }
6354 
6355  // contract in x
6356  for (int dz = 0; dz < c_dofs1D; ++dz)
6357  {
6358  for (int dy = 0; dy < c_dofs1D; ++dy)
6359  {
6360  for (int dx = 0; dx < c_dofs1D; ++dx)
6361  {
6362  double s = 0.0;
6363  for (int ex = 0; ex < o_dofs1D; ++ex)
6364  {
6365  s += G(ex, dx) * w2[ex][dy][dz];
6366  }
6367  y(dx, dy, dz, e) += s;
6368  }
6369  }
6370  }
6371 
6372  // ---
6373  // dofs that point parallel to y-axis (open in y, closed in x, z)
6374  // ---
6375 
6376  // contract in z
6377  for (int dz = 0; dz < c_dofs1D; ++dz)
6378  {
6379  for (int ex = 0; ex < c_dofs1D; ++ex)
6380  {
6381  for (int ey = 0; ey < o_dofs1D; ++ey)
6382  {
6383  const int ez = dz;
6384  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6385  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6386  w1[ex][ey][dz] = x(local_index, e);
6387  }
6388  }
6389  }
6390 
6391  // contract in y
6392  for (int dz = 0; dz < c_dofs1D; ++dz)
6393  {
6394  for (int dy = 0; dy < c_dofs1D; ++dy)
6395  {
6396  for (int ex = 0; ex < c_dofs1D; ++ex)
6397  {
6398  w2[ex][dy][dz] = 0.0;
6399  for (int ey = 0; ey < o_dofs1D; ++ey)
6400  {
6401  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6402  }
6403  }
6404  }
6405  }
6406 
6407  // contract in x
6408  for (int dz = 0; dz < c_dofs1D; ++dz)
6409  {
6410  for (int dy = 0; dy < c_dofs1D; ++dy)
6411  {
6412  for (int dx = 0; dx < c_dofs1D; ++dx)
6413  {
6414  const int ex = dx;
6415  double s = w2[ex][dy][dz];
6416  y(dx, dy, dz, e) += s;
6417  }
6418  }
6419  }
6420 
6421  // ---
6422  // dofs that point parallel to z-axis (open in z, closed in x, y)
6423  // ---
6424 
6425  // contract in z
6426  for (int dz = 0; dz < c_dofs1D; ++dz)
6427  {
6428  for (int ex = 0; ex < c_dofs1D; ++ex)
6429  {
6430  for (int ey = 0; ey < c_dofs1D; ++ey)
6431  {
6432  w1[ex][ey][dz] = 0.0;
6433  for (int ez = 0; ez < o_dofs1D; ++ez)
6434  {
6435  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6436  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6437  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6438  }
6439  }
6440  }
6441  }
6442 
6443  // contract in y
6444  for (int dz = 0; dz < c_dofs1D; ++dz)
6445  {
6446  for (int dy = 0; dy < c_dofs1D; ++dy)
6447  {
6448  for (int ex = 0; ex < c_dofs1D; ++ex)
6449  {
6450  const int ey = dy;
6451  w2[ex][dy][dz] = w1[ex][ey][dz];
6452  }
6453  }
6454  }
6455 
6456  // contract in x
6457  for (int dz = 0; dz < c_dofs1D; ++dz)
6458  {
6459  for (int dy = 0; dy < c_dofs1D; ++dy)
6460  {
6461  for (int dx = 0; dx < c_dofs1D; ++dx)
6462  {
6463  const int ex = dx;
6464  double s = w2[ex][dy][dz];
6465  y(dx, dy, dz, e) += s;
6466  }
6467  }
6468  }
6469  });
6470 }
6471 
6472 void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
6473  const FiniteElementSpace &test_fes)
6474 {
6475  // Assumes tensor-product elements, with a vector test space and H^1 trial space.
6476  Mesh *mesh = trial_fes.GetMesh();
6477  const FiniteElement *trial_fel = trial_fes.GetFE(0);
6478  const FiniteElement *test_fel = test_fes.GetFE(0);
6479 
6480  const NodalTensorFiniteElement *trial_el =
6481  dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
6482  MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
6483 
6484  const VectorTensorFiniteElement *test_el =
6485  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
6486  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
6487 
6488  const int dims = trial_el->GetDim();
6489  MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!");
6490  dim = mesh->Dimension();
6491  MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!");
6492  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(),
6493  "Orders do not match!");
6494  ne = trial_fes.GetNE();
6495 
6496  const int order = trial_el->GetOrder();
6497  dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType());
6498  mfem::QuadratureFunctions1D qf1d;
6499  mfem::IntegrationRule closed_ir;
6500  closed_ir.SetSize(order + 1);
6501  qf1d.GaussLobatto(order + 1, &closed_ir);
6502  mfem::IntegrationRule open_ir;
6503  open_ir.SetSize(order);
6504  qf1d.GaussLegendre(order, &open_ir);
6505 
6506  maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
6507  o_dofs1D = maps_O_C->nqpt;
6508  if (trial_el->GetBasisType() == BasisType::GaussLobatto)
6509  {
6510  B_id = true;
6511  c_dofs1D = maps_O_C->ndof;
6512  }
6513  else
6514  {
6515  B_id = false;
6516  maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
6517  c_dofs1D = maps_C_C->nqpt;
6518  }
6519 }
6520 
6521 void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const
6522 {
6523  if (dim == 3)
6524  {
6525  if (B_id)
6526  {
6527  PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne,
6528  maps_O_C->G, x, y);
6529  }
6530  else
6531  {
6532  PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6533  maps_O_C->G, x, y);
6534  }
6535  }
6536  else if (dim == 2)
6537  {
6538  if (B_id)
6539  {
6540  PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne,
6541  maps_O_C->G, x, y);
6542  }
6543  else
6544  {
6545  PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G,
6546  x, y);
6547  }
6548  }
6549  else
6550  {
6551  mfem_error("Bad dimension!");
6552  }
6553 }
6554 
6555 void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
6556 {
6557  if (dim == 3)
6558  {
6559  if (B_id)
6560  {
6561  PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne,
6562  maps_O_C->G, x, y);
6563  }
6564  else
6565  {
6566  PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6567  maps_O_C->G, x, y);
6568  }
6569  }
6570  else if (dim == 2)
6571  {
6572  if (B_id)
6573  {
6574  PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne,
6575  maps_O_C->G, x, y);
6576  }
6577  else
6578  {
6579  PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6580  maps_O_C->G, x, y);
6581  }
6582  }
6583  else
6584  {
6585  mfem_error("Bad dimension!");
6586  }
6587 }
6588 
6589 static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D,
6590  const int o_dofs1D,
6591  const int NE,
6592  const Array<double> &Bclosed,
6593  const Array<double> &Bopen,
6594  const Vector &pa_data,
6595  const Vector &x_,
6596  Vector &y_)
6597 {
6598  auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6599  auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6600 
6601  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
6602  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6603 
6604  auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
6605  NE);
6606 
6607  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6608  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6609 
6610  MFEM_FORALL(e, NE,
6611  {
6612  double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
6613  double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
6614 
6615  // dofs that point parallel to x-axis (open in x, closed in y, z)
6616 
6617  // contract in z
6618  for (int ez = 0; ez < c_dofs1D; ++ez)
6619  {
6620  for (int dx = 0; dx < c_dofs1D; ++dx)
6621  {
6622  for (int dy = 0; dy < c_dofs1D; ++dy)
6623  {
6624  for (int j=0; j<3; ++j)
6625  {
6626  w1[j][dx][dy][ez] = 0.0;
6627  for (int dz = 0; dz < c_dofs1D; ++dz)
6628  {
6629  w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
6630  }
6631  }
6632  }
6633  }
6634  }
6635 
6636  // contract in y
6637  for (int ez = 0; ez < c_dofs1D; ++ez)
6638  {
6639  for (int ey = 0; ey < c_dofs1D; ++ey)
6640  {
6641  for (int dx = 0; dx < c_dofs1D; ++dx)
6642  {
6643  for (int j=0; j<3; ++j)
6644  {
6645  w2[j][dx][ey][ez] = 0.0;
6646  for (int dy = 0; dy < c_dofs1D; ++dy)
6647  {
6648  w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
6649  }
6650  }
6651  }
6652  }
6653  }
6654 
6655  // contract in x
6656  for (int ez = 0; ez < c_dofs1D; ++ez)
6657  {
6658  for (int ey = 0; ey < c_dofs1D; ++ey)
6659  {
6660  for (int ex = 0; ex < o_dofs1D; ++ex)
6661  {
6662  for (int j=0; j<3; ++j)
6663  {
6664  double s = 0.0;
6665  for (int dx = 0; dx < c_dofs1D; ++dx)
6666  {
6667  s += Bo(ex, dx) * w2[j][dx][ey][ez];
6668  }
6669  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6670  y(local_index, e) += s * vk(j, local_index, e);
6671  }
6672  }
6673  }
6674  }
6675 
6676  // dofs that point parallel to y-axis (open in y, closed in x, z)
6677 
6678  // contract in z
6679  for (int ez = 0; ez < c_dofs1D; ++ez)
6680  {
6681  for (int dx = 0; dx < c_dofs1D; ++dx)
6682  {
6683  for (int dy = 0; dy < c_dofs1D; ++dy)
6684  {
6685  for (int j=0; j<3; ++j)
6686  {
6687  w1[j][dx][dy][ez] = 0.0;
6688  for (int dz = 0; dz < c_dofs1D; ++dz)
6689  {
6690  w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
6691  }
6692  }
6693  }
6694  }
6695  }
6696 
6697  // contract in y
6698  for (int ez = 0; ez < c_dofs1D; ++ez)
6699  {
6700  for (int ey = 0; ey < o_dofs1D; ++ey)
6701  {
6702  for (int dx = 0; dx < c_dofs1D; ++dx)
6703  {
6704  for (int j=0; j<3; ++j)
6705  {
6706  w2[j][dx][ey][ez] = 0.0;
6707  for (int dy = 0; dy < c_dofs1D; ++dy)
6708  {
6709  w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez];
6710  }
6711  }
6712  }
6713  }
6714  }
6715 
6716  // contract in x
6717  for (int ez = 0; ez < c_dofs1D; ++ez)
6718  {
6719  for (int ey = 0; ey < o_dofs1D; ++ey)
6720  {
6721  for (int ex = 0; ex < c_dofs1D; ++ex)
6722  {
6723  for (int j=0; j<3; ++j)
6724  {
6725  double s = 0.0;
6726  for (int dx = 0; dx < c_dofs1D; ++dx)
6727  {
6728  s += Bc(ex, dx) * w2[j][dx][ey][ez];
6729  }
6730  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6731  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6732  y(local_index, e) += s * vk(j, local_index, e);
6733  }
6734  }
6735  }
6736  }
6737 
6738  // dofs that point parallel to z-axis (open in z, closed in x, y)
6739 
6740  // contract in z
6741  for (int ez = 0; ez < o_dofs1D; ++ez)
6742  {
6743  for (int dx = 0; dx < c_dofs1D; ++dx)
6744  {
6745  for (int dy = 0; dy < c_dofs1D; ++dy)
6746  {
6747  for (int j=0; j<3; ++j)
6748  {
6749  w1[j][dx][dy][ez] = 0.0;
6750  for (int dz = 0; dz < c_dofs1D; ++dz)
6751  {
6752  w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e);
6753  }
6754  }
6755  }
6756  }
6757  }
6758 
6759  // contract in y
6760  for (int ez = 0; ez < o_dofs1D; ++ez)
6761  {
6762  for (int ey = 0; ey < c_dofs1D; ++ey)
6763