MFEM  v4.5.2 Finite element discretization library
bilininteg_hcurl.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "qspace.hpp"
16
17 using namespace std;
18
19 namespace mfem
20 {
21
22 void PAHcurlHdivSetup3D(const int Q1D,
23  const int coeffDim,
24  const int NE,
25  const bool transpose,
26  const Array<double> &w_,
27  const Vector &j,
28  Vector &coeff_,
29  Vector &op);
30
31 void PAHcurlMassApply2D(const int D1D,
32  const int Q1D,
33  const int NE,
34  const bool symmetric,
35  const Array<double> &bo,
36  const Array<double> &bc,
37  const Array<double> &bot,
38  const Array<double> &bct,
39  const Vector &pa_data,
40  const Vector &x,
41  Vector &y)
42 {
43  constexpr static int VDIM = 2;
44  constexpr static int MAX_D1D = HCURL_MAX_D1D;
45  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
46
47  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
48  auto Bc = Reshape(bc.Read(), Q1D, D1D);
49  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
50  auto Bct = Reshape(bct.Read(), D1D, Q1D);
51  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
52  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
53  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
54
55  MFEM_FORALL(e, NE,
56  {
57  double mass[MAX_Q1D][MAX_Q1D][VDIM];
58
59  for (int qy = 0; qy < Q1D; ++qy)
60  {
61  for (int qx = 0; qx < Q1D; ++qx)
62  {
63  for (int c = 0; c < VDIM; ++c)
64  {
65  mass[qy][qx][c] = 0.0;
66  }
67  }
68  }
69
70  int osc = 0;
71
72  for (int c = 0; c < VDIM; ++c) // loop over x, y components
73  {
74  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
75  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
76
77  for (int dy = 0; dy < D1Dy; ++dy)
78  {
79  double massX[MAX_Q1D];
80  for (int qx = 0; qx < Q1D; ++qx)
81  {
82  massX[qx] = 0.0;
83  }
84
85  for (int dx = 0; dx < D1Dx; ++dx)
86  {
87  const double t = X(dx + (dy * D1Dx) + osc, e);
88  for (int qx = 0; qx < Q1D; ++qx)
89  {
90  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
91  }
92  }
93
94  for (int qy = 0; qy < Q1D; ++qy)
95  {
96  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
97  for (int qx = 0; qx < Q1D; ++qx)
98  {
99  mass[qy][qx][c] += massX[qx] * wy;
100  }
101  }
102  }
103
104  osc += D1Dx * D1Dy;
105  } // loop (c) over components
106
107  // Apply D operator.
108  for (int qy = 0; qy < Q1D; ++qy)
109  {
110  for (int qx = 0; qx < Q1D; ++qx)
111  {
112  const double O11 = op(qx,qy,0,e);
113  const double O21 = op(qx,qy,1,e);
114  const double O12 = symmetric ? O21 : op(qx,qy,2,e);
115  const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
116  const double massX = mass[qy][qx][0];
117  const double massY = mass[qy][qx][1];
118  mass[qy][qx][0] = (O11*massX)+(O12*massY);
119  mass[qy][qx][1] = (O21*massX)+(O22*massY);
120  }
121  }
122
123  for (int qy = 0; qy < Q1D; ++qy)
124  {
125  osc = 0;
126
127  for (int c = 0; c < VDIM; ++c) // loop over x, y components
128  {
129  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
130  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
131
132  double massX[MAX_D1D];
133  for (int dx = 0; dx < D1Dx; ++dx)
134  {
135  massX[dx] = 0.0;
136  }
137  for (int qx = 0; qx < Q1D; ++qx)
138  {
139  for (int dx = 0; dx < D1Dx; ++dx)
140  {
141  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
142  }
143  }
144
145  for (int dy = 0; dy < D1Dy; ++dy)
146  {
147  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
148
149  for (int dx = 0; dx < D1Dx; ++dx)
150  {
151  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
152  }
153  }
154
155  osc += D1Dx * D1Dy;
156  } // loop c
157  } // loop qy
158  }); // end of element loop
159 }
160
161 void PAHcurlMassAssembleDiagonal2D(const int D1D,
162  const int Q1D,
163  const int NE,
164  const bool symmetric,
165  const Array<double> &bo,
166  const Array<double> &bc,
167  const Vector &pa_data,
168  Vector &diag)
169 {
170  constexpr static int VDIM = 2;
171  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
172
173  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
174  auto Bc = Reshape(bc.Read(), Q1D, D1D);
175  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
176  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
177
178  MFEM_FORALL(e, NE,
179  {
180  int osc = 0;
181
182  for (int c = 0; c < VDIM; ++c) // loop over x, y components
183  {
184  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
185  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
186
187  double mass[MAX_Q1D];
188
189  for (int dy = 0; dy < D1Dy; ++dy)
190  {
191  for (int qx = 0; qx < Q1D; ++qx)
192  {
193  mass[qx] = 0.0;
194  for (int qy = 0; qy < Q1D; ++qy)
195  {
196  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
197
198  mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
199  op(qx,qy,symmetric ? 2 : 3, e));
200  }
201  }
202
203  for (int dx = 0; dx < D1Dx; ++dx)
204  {
205  for (int qx = 0; qx < Q1D; ++qx)
206  {
207  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
208  D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
209  }
210  }
211  }
212
213  osc += D1Dx * D1Dy;
214  } // loop c
215  }); // end of element loop
216 }
217
218 void PAHcurlMassAssembleDiagonal3D(const int D1D,
219  const int Q1D,
220  const int NE,
221  const bool symmetric,
222  const Array<double> &bo,
223  const Array<double> &bc,
224  const Vector &pa_data,
225  Vector &diag)
226 {
227  constexpr static int MAX_D1D = HCURL_MAX_D1D;
228  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
229
230  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
231  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
232  constexpr static int VDIM = 3;
233
234  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
235  auto Bc = Reshape(bc.Read(), Q1D, D1D);
236  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
237  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
238
239  MFEM_FORALL(e, NE,
240  {
241  int osc = 0;
242
243  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
244  {
245  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
246  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
247  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
248
249  const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
250  (symmetric ? 5 : 8));
251
252  double mass[MAX_Q1D];
253
254  for (int dz = 0; dz < D1Dz; ++dz)
255  {
256  for (int dy = 0; dy < D1Dy; ++dy)
257  {
258  for (int qx = 0; qx < Q1D; ++qx)
259  {
260  mass[qx] = 0.0;
261  for (int qy = 0; qy < Q1D; ++qy)
262  {
263  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
264
265  for (int qz = 0; qz < Q1D; ++qz)
266  {
267  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
268
269  mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
270  }
271  }
272  }
273
274  for (int dx = 0; dx < D1Dx; ++dx)
275  {
276  for (int qx = 0; qx < Q1D; ++qx)
277  {
278  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
279  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
280  }
281  }
282  }
283  }
284
285  osc += D1Dx * D1Dy * D1Dz;
286  } // loop c
287  }); // end of element loop
288 }
289
290 template<int T_D1D, int T_Q1D>
291 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
292  const int Q1D,
293  const int NE,
294  const bool symmetric,
295  const Array<double> &bo,
296  const Array<double> &bc,
297  const Vector &pa_data,
298  Vector &diag)
299 {
300  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
301  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
302
303  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
304  auto Bc = Reshape(bc.Read(), Q1D, D1D);
305  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
306  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
307
308  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
309  {
310  constexpr int VDIM = 3;
311  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
312  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
313
314  MFEM_SHARED double sBo[tQ1D][tD1D];
315  MFEM_SHARED double sBc[tQ1D][tD1D];
316
317  double op3[3];
318  MFEM_SHARED double sop[3][tQ1D][tQ1D];
319
321  {
323  {
325  {
326  op3[0] = op(qx,qy,qz,0,e);
327  op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
328  op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
329  }
330  }
331  }
332
333  const int tidx = MFEM_THREAD_ID(x);
334  const int tidy = MFEM_THREAD_ID(y);
335  const int tidz = MFEM_THREAD_ID(z);
336
337  if (tidz == 0)
338  {
340  {
342  {
343  sBc[q][d] = Bc(q,d);
344  if (d < D1D-1)
345  {
346  sBo[q][d] = Bo(q,d);
347  }
348  }
349  }
350  }
352
353  int osc = 0;
354  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
355  {
356  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
357  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
358  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
359
360  double dxyz = 0.0;
361
362  for (int qz=0; qz < Q1D; ++qz)
363  {
364  if (tidz == qz)
365  {
366  for (int i=0; i<3; ++i)
367  {
368  sop[i][tidx][tidy] = op3[i];
369  }
370  }
371
373
375  {
376  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
377
379  {
381  {
382  for (int qy = 0; qy < Q1D; ++qy)
383  {
384  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
385
386  for (int qx = 0; qx < Q1D; ++qx)
387  {
388  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
389  dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
390  }
391  }
392  }
393  }
394  }
395
397  } // qz loop
398
400  {
402  {
404  {
405  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
406  }
407  }
408  }
409
410  osc += D1Dx * D1Dy * D1Dz;
411  } // c loop
412  }); // end of element loop
413 }
414
415 void PAHcurlMassApply3D(const int D1D,
416  const int Q1D,
417  const int NE,
418  const bool symmetric,
419  const Array<double> &bo,
420  const Array<double> &bc,
421  const Array<double> &bot,
422  const Array<double> &bct,
423  const Vector &pa_data,
424  const Vector &x,
425  Vector &y)
426 {
427  constexpr static int MAX_D1D = HCURL_MAX_D1D;
428  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
429
430  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
431  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
432  constexpr static int VDIM = 3;
433
434  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
435  auto Bc = Reshape(bc.Read(), Q1D, D1D);
436  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
437  auto Bct = Reshape(bct.Read(), D1D, Q1D);
438  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
439  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
440  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
441
442  MFEM_FORALL(e, NE,
443  {
444  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
445
446  for (int qz = 0; qz < Q1D; ++qz)
447  {
448  for (int qy = 0; qy < Q1D; ++qy)
449  {
450  for (int qx = 0; qx < Q1D; ++qx)
451  {
452  for (int c = 0; c < VDIM; ++c)
453  {
454  mass[qz][qy][qx][c] = 0.0;
455  }
456  }
457  }
458  }
459
460  int osc = 0;
461
462  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
463  {
464  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
465  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
466  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
467
468  for (int dz = 0; dz < D1Dz; ++dz)
469  {
470  double massXY[MAX_Q1D][MAX_Q1D];
471  for (int qy = 0; qy < Q1D; ++qy)
472  {
473  for (int qx = 0; qx < Q1D; ++qx)
474  {
475  massXY[qy][qx] = 0.0;
476  }
477  }
478
479  for (int dy = 0; dy < D1Dy; ++dy)
480  {
481  double massX[MAX_Q1D];
482  for (int qx = 0; qx < Q1D; ++qx)
483  {
484  massX[qx] = 0.0;
485  }
486
487  for (int dx = 0; dx < D1Dx; ++dx)
488  {
489  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
490  for (int qx = 0; qx < Q1D; ++qx)
491  {
492  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
493  }
494  }
495
496  for (int qy = 0; qy < Q1D; ++qy)
497  {
498  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
499  for (int qx = 0; qx < Q1D; ++qx)
500  {
501  const double wx = massX[qx];
502  massXY[qy][qx] += wx * wy;
503  }
504  }
505  }
506
507  for (int qz = 0; qz < Q1D; ++qz)
508  {
509  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
510  for (int qy = 0; qy < Q1D; ++qy)
511  {
512  for (int qx = 0; qx < Q1D; ++qx)
513  {
514  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
515  }
516  }
517  }
518  }
519
520  osc += D1Dx * D1Dy * D1Dz;
521  } // loop (c) over components
522
523  // Apply D operator.
524  for (int qz = 0; qz < Q1D; ++qz)
525  {
526  for (int qy = 0; qy < Q1D; ++qy)
527  {
528  for (int qx = 0; qx < Q1D; ++qx)
529  {
530  const double O11 = op(qx,qy,qz,0,e);
531  const double O12 = op(qx,qy,qz,1,e);
532  const double O13 = op(qx,qy,qz,2,e);
533  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
534  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
535  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
536  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
537  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
538  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
539  const double massX = mass[qz][qy][qx][0];
540  const double massY = mass[qz][qy][qx][1];
541  const double massZ = mass[qz][qy][qx][2];
542  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
543  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
544  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
545  }
546  }
547  }
548
549  for (int qz = 0; qz < Q1D; ++qz)
550  {
551  double massXY[MAX_D1D][MAX_D1D];
552
553  osc = 0;
554
555  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
556  {
557  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
558  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
559  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
560
561  for (int dy = 0; dy < D1Dy; ++dy)
562  {
563  for (int dx = 0; dx < D1Dx; ++dx)
564  {
565  massXY[dy][dx] = 0.0;
566  }
567  }
568  for (int qy = 0; qy < Q1D; ++qy)
569  {
570  double massX[MAX_D1D];
571  for (int dx = 0; dx < D1Dx; ++dx)
572  {
573  massX[dx] = 0;
574  }
575  for (int qx = 0; qx < Q1D; ++qx)
576  {
577  for (int dx = 0; dx < D1Dx; ++dx)
578  {
579  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
580  }
581  }
582  for (int dy = 0; dy < D1Dy; ++dy)
583  {
584  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
585  for (int dx = 0; dx < D1Dx; ++dx)
586  {
587  massXY[dy][dx] += massX[dx] * wy;
588  }
589  }
590  }
591
592  for (int dz = 0; dz < D1Dz; ++dz)
593  {
594  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
595  for (int dy = 0; dy < D1Dy; ++dy)
596  {
597  for (int dx = 0; dx < D1Dx; ++dx)
598  {
599  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
600  }
601  }
602  }
603
604  osc += D1Dx * D1Dy * D1Dz;
605  } // loop c
606  } // loop qz
607  }); // end of element loop
608 }
609
610 template<int T_D1D, int T_Q1D>
611 void SmemPAHcurlMassApply3D(const int D1D,
612  const int Q1D,
613  const int NE,
614  const bool symmetric,
615  const Array<double> &bo,
616  const Array<double> &bc,
617  const Array<double> &bot,
618  const Array<double> &bct,
619  const Vector &pa_data,
620  const Vector &x,
621  Vector &y)
622 {
623  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
624  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
625
626  const int dataSize = symmetric ? 6 : 9;
627
628  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
629  auto Bc = Reshape(bc.Read(), Q1D, D1D);
630  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
631  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
632  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
633
634  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
635  {
636  constexpr int VDIM = 3;
637  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
638  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
639
640  MFEM_SHARED double sBo[tQ1D][tD1D];
641  MFEM_SHARED double sBc[tQ1D][tD1D];
642
643  double op9[9];
644  MFEM_SHARED double sop[9*tQ1D*tQ1D];
645  MFEM_SHARED double mass[tQ1D][tQ1D][3];
646
647  MFEM_SHARED double sX[tD1D][tD1D][tD1D];
648
650  {
652  {
654  {
655  for (int i=0; i<dataSize; ++i)
656  {
657  op9[i] = op(qx,qy,qz,i,e);
658  }
659  }
660  }
661  }
662
663  const int tidx = MFEM_THREAD_ID(x);
664  const int tidy = MFEM_THREAD_ID(y);
665  const int tidz = MFEM_THREAD_ID(z);
666
667  if (tidz == 0)
668  {
670  {
672  {
673  sBc[q][d] = Bc(q,d);
674  if (d < D1D-1)
675  {
676  sBo[q][d] = Bo(q,d);
677  }
678  }
679  }
680  }
682
683  for (int qz=0; qz < Q1D; ++qz)
684  {
685  int osc = 0;
686  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
687  {
688  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
689  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
690  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
691
693  {
695  {
697  {
698  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
699  }
700  }
701  }
703
704  if (tidz == qz)
705  {
706  for (int i=0; i<dataSize; ++i)
707  {
708  sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
709  }
710
712  {
714  {
715  double u = 0.0;
716
717  for (int dz = 0; dz < D1Dz; ++dz)
718  {
719  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
720  for (int dy = 0; dy < D1Dy; ++dy)
721  {
722  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
723  for (int dx = 0; dx < D1Dx; ++dx)
724  {
725  const double t = sX[dz][dy][dx];
726  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
727  u += t * wx * wy * wz;
728  }
729  }
730  }
731
732  mass[qy][qx][c] = u;
733  } // qx
734  } // qy
735  } // tidz == qz
736
737  osc += D1Dx * D1Dy * D1Dz;
739  } // c
740
741  MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
742
743  osc = 0;
744  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
745  {
746  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
747  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
748  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
749
750  double dxyz = 0.0;
751
753  {
754  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
755
757  {
759  {
760  for (int qy = 0; qy < Q1D; ++qy)
761  {
762  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
763  for (int qx = 0; qx < Q1D; ++qx)
764  {
765  const int os = (dataSize*qx) + (dataSize*Q1D*qy);
766  const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
767  (symmetric ? 2 : 6))); // O11, O21, O31
768  const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
769  (symmetric ? 4 : 7))); // O12, O22, O32
770  const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
771  (symmetric ? 5 : 8))); // O13, O23, O33
772
773  const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
774  (sop[id3] * mass[qy][qx][2]);
775
776  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
777  dxyz += m_c * wx * wy * wz;
778  }
779  }
780  }
781  }
782  }
783
785
787  {
789  {
791  {
792  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
793  }
794  }
795  }
796
797  osc += D1Dx * D1Dy * D1Dz;
798  } // c loop
799  } // qz
800  }); // end of element loop
801 }
802
803 // PA H(curl) curl-curl assemble 2D kernel
804 static void PACurlCurlSetup2D(const int Q1D,
805  const int NE,
806  const Array<double> &w,
807  const Vector &j,
808  Vector &coeff,
809  Vector &op)
810 {
811  const int NQ = Q1D*Q1D;
813  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
814  auto C = Reshape(coeff.Read(), NQ, NE);
815  auto y = Reshape(op.Write(), NQ, NE);
816  MFEM_FORALL(e, NE,
817  {
818  for (int q = 0; q < NQ; ++q)
819  {
820  const double J11 = J(q,0,0,e);
821  const double J21 = J(q,1,0,e);
822  const double J12 = J(q,0,1,e);
823  const double J22 = J(q,1,1,e);
824  const double detJ = (J11*J22)-(J21*J12);
825  y(q,e) = W[q] * C(q,e) / detJ;
826  }
827  });
828 }
829
830 // PA H(curl) curl-curl assemble 3D kernel
831 static void PACurlCurlSetup3D(const int Q1D,
832  const int coeffDim,
833  const int NE,
834  const Array<double> &w,
835  const Vector &j,
836  Vector &coeff,
837  Vector &op)
838 {
839  const int NQ = Q1D*Q1D*Q1D;
840  const bool symmetric = (coeffDim != 9);
842  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
843  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
844  auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
845
846  MFEM_FORALL(e, NE,
847  {
848  for (int q = 0; q < NQ; ++q)
849  {
850  const double J11 = J(q,0,0,e);
851  const double J21 = J(q,1,0,e);
852  const double J31 = J(q,2,0,e);
853  const double J12 = J(q,0,1,e);
854  const double J22 = J(q,1,1,e);
855  const double J32 = J(q,2,1,e);
856  const double J13 = J(q,0,2,e);
857  const double J23 = J(q,1,2,e);
858  const double J33 = J(q,2,2,e);
859  const double detJ = J11 * (J22 * J33 - J32 * J23) -
860  /* */ J21 * (J12 * J33 - J32 * J13) +
861  /* */ J31 * (J12 * J23 - J22 * J13);
862
863  const double c_detJ = W[q] / detJ;
864
865  if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
866  {
867  // Set y to the 6 or 9 entries of J^T M J / det
868  const double M11 = C(0, q, e);
869  const double M12 = C(1, q, e);
870  const double M13 = C(2, q, e);
871  const double M21 = (!symmetric) ? C(3, q, e) : M12;
872  const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
873  const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
874  const double M31 = (!symmetric) ? C(6, q, e) : M13;
875  const double M32 = (!symmetric) ? C(7, q, e) : M23;
876  const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
877
878  // First compute R = MJ
879  const double R11 = M11*J11 + M12*J21 + M13*J31;
880  const double R12 = M11*J12 + M12*J22 + M13*J32;
881  const double R13 = M11*J13 + M12*J23 + M13*J33;
882  const double R21 = M21*J11 + M22*J21 + M23*J31;
883  const double R22 = M21*J12 + M22*J22 + M23*J32;
884  const double R23 = M21*J13 + M22*J23 + M23*J33;
885  const double R31 = M31*J11 + M32*J21 + M33*J31;
886  const double R32 = M31*J12 + M32*J22 + M33*J32;
887  const double R33 = M31*J13 + M32*J23 + M33*J33;
888
889  // Now set y to J^T R / det
890  y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
891  const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
892  y(q,1,e) = Y12; // 1,2
893  y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
894
895  const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
896  const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
897  const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
898
899  const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
900
901  y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
902  y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
903  y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
904
905  if (!symmetric)
906  {
907  y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
908  y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
909  y(q,8,e) = Y33; // 3,3
910  }
911  }
912  else // Vector or scalar coefficient version
913  {
914  // Set y to the 6 entries of J^T D J / det^2
915  const double D1 = C(0, q, e);
916  const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
917  const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
918
919  y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
920  y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
921  y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
922  y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
923  y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
924  y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
925  }
926  }
927  });
928 }
929
930 // PA H(curl)-L2 assemble 2D kernel
931 static void PACurlL2Setup2D(const int Q1D,
932  const int NE,
933  const Array<double> &w,
934  Vector &coeff,
935  Vector &op)
936 {
937  const int NQ = Q1D*Q1D;
939  auto C = Reshape(coeff.Read(), NQ, NE);
940  auto y = Reshape(op.Write(), NQ, NE);
941  MFEM_FORALL(e, NE,
942  {
943  for (int q = 0; q < NQ; ++q)
944  {
945  y(q,e) = W[q] * C(q,e);
946  }
947  });
948 }
949
950 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
951 {
952  // Assumes tensor-product elements
953  Mesh *mesh = fes.GetMesh();
954  const FiniteElement *fel = fes.GetFE(0);
955
956  const VectorTensorFiniteElement *el =
957  dynamic_cast<const VectorTensorFiniteElement*>(fel);
958  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
959
960  const IntegrationRule *ir
961  = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
962  *mesh->GetElementTransformation(0));
963
964  const int dims = el->GetDim();
965  MFEM_VERIFY(dims == 2 || dims == 3, "");
966
967  nq = ir->GetNPoints();
968  dim = mesh->Dimension();
969  MFEM_VERIFY(dim == 2 || dim == 3, "");
970
971  ne = fes.GetNE();
972  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
975  dofs1D = mapsC->ndof;
977
978  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
979
981  CoefficientVector coeff(qs, CoefficientStorage::SYMMETRIC);
982  if (Q) { coeff.Project(*Q); }
983  else if (MQ) { coeff.ProjectTranspose(*MQ); }
984  else if (DQ) { coeff.Project(*DQ); }
985  else { coeff.SetConstant(1.0); }
986
987  const int coeff_dim = coeff.GetVDim();
988  symmetric = (coeff_dim != dim*dim);
989  const int sym_dims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
990  const int ndata = (dim == 2) ? 1 : (symmetric ? sym_dims : dim*dim);
991  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
992
993  if (el->GetDerivType() != mfem::FiniteElement::CURL)
994  {
995  MFEM_ABORT("Unknown kernel.");
996  }
997
998  if (dim == 3)
999  {
1000  PACurlCurlSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, coeff,
1001  pa_data);
1002  }
1003  else
1004  {
1005  PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1006  }
1007 }
1008
1009 static void PACurlCurlApply2D(const int D1D,
1010  const int Q1D,
1011  const int NE,
1012  const Array<double> &bo,
1013  const Array<double> &bot,
1014  const Array<double> &gc,
1015  const Array<double> &gct,
1016  const Vector &pa_data,
1017  const Vector &x,
1018  Vector &y)
1019 {
1020  constexpr static int VDIM = 2;
1021  constexpr static int MAX_D1D = HCURL_MAX_D1D;
1022  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1023
1024  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1025  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1026  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1027  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1028  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1029  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1030  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1031
1032  MFEM_FORALL(e, NE,
1033  {
1034  double curl[MAX_Q1D][MAX_Q1D];
1035
1036  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1037
1038  for (int qy = 0; qy < Q1D; ++qy)
1039  {
1040  for (int qx = 0; qx < Q1D; ++qx)
1041  {
1042  curl[qy][qx] = 0.0;
1043  }
1044  }
1045
1046  int osc = 0;
1047
1048  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1049  {
1050  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1051  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1052
1053  for (int dy = 0; dy < D1Dy; ++dy)
1054  {
1056  for (int qx = 0; qx < Q1D; ++qx)
1057  {
1059  }
1060
1061  for (int dx = 0; dx < D1Dx; ++dx)
1062  {
1063  const double t = X(dx + (dy * D1Dx) + osc, e);
1064  for (int qx = 0; qx < Q1D; ++qx)
1065  {
1066  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1067  }
1068  }
1069
1070  for (int qy = 0; qy < Q1D; ++qy)
1071  {
1072  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1073  for (int qx = 0; qx < Q1D; ++qx)
1074  {
1075  curl[qy][qx] += gradX[qx] * wy;
1076  }
1077  }
1078  }
1079
1080  osc += D1Dx * D1Dy;
1081  } // loop (c) over components
1082
1083  // Apply D operator.
1084  for (int qy = 0; qy < Q1D; ++qy)
1085  {
1086  for (int qx = 0; qx < Q1D; ++qx)
1087  {
1088  curl[qy][qx] *= op(qx,qy,e);
1089  }
1090  }
1091
1092  for (int qy = 0; qy < Q1D; ++qy)
1093  {
1094  osc = 0;
1095
1096  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1097  {
1098  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1099  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1100
1102  for (int dx = 0; dx < D1Dx; ++dx)
1103  {
1105  }
1106  for (int qx = 0; qx < Q1D; ++qx)
1107  {
1108  for (int dx = 0; dx < D1Dx; ++dx)
1109  {
1110  gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1111  }
1112  }
1113  for (int dy = 0; dy < D1Dy; ++dy)
1114  {
1115  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1116
1117  for (int dx = 0; dx < D1Dx; ++dx)
1118  {
1119  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1120  }
1121  }
1122
1123  osc += D1Dx * D1Dy;
1124  } // loop c
1125  } // loop qy
1126  }); // end of element loop
1127 }
1128
1129 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1130 static void PACurlCurlApply3D(const int D1D,
1131  const int Q1D,
1132  const bool symmetric,
1133  const int NE,
1134  const Array<double> &bo,
1135  const Array<double> &bc,
1136  const Array<double> &bot,
1137  const Array<double> &bct,
1138  const Array<double> &gc,
1139  const Array<double> &gct,
1140  const Vector &pa_data,
1141  const Vector &x,
1142  Vector &y)
1143 {
1144  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1145  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1146  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1147  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1148  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1149  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1150  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1151
1152  constexpr static int VDIM = 3;
1153
1154  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1155  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1156  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1157  auto Bct = Reshape(bct.Read(), D1D, Q1D);
1158  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1159  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1160  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1161  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1162  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1163
1164  MFEM_FORALL(e, NE,
1165  {
1166  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1167  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1168
1169  for (int qz = 0; qz < Q1D; ++qz)
1170  {
1171  for (int qy = 0; qy < Q1D; ++qy)
1172  {
1173  for (int qx = 0; qx < Q1D; ++qx)
1174  {
1175  for (int c = 0; c < VDIM; ++c)
1176  {
1177  curl[qz][qy][qx][c] = 0.0;
1178  }
1179  }
1180  }
1181  }
1182
1183  // We treat x, y, z components separately for optimization specific to each.
1184
1185  int osc = 0;
1186
1187  {
1188  // x component
1189  const int D1Dz = D1D;
1190  const int D1Dy = D1D;
1191  const int D1Dx = D1D - 1;
1192
1193  for (int dz = 0; dz < D1Dz; ++dz)
1194  {
1196  for (int qy = 0; qy < Q1D; ++qy)
1197  {
1198  for (int qx = 0; qx < Q1D; ++qx)
1199  {
1200  for (int d = 0; d < 2; ++d)
1201  {
1203  }
1204  }
1205  }
1206
1207  for (int dy = 0; dy < D1Dy; ++dy)
1208  {
1209  double massX[MAX_Q1D];
1210  for (int qx = 0; qx < Q1D; ++qx)
1211  {
1212  massX[qx] = 0.0;
1213  }
1214
1215  for (int dx = 0; dx < D1Dx; ++dx)
1216  {
1217  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1218  for (int qx = 0; qx < Q1D; ++qx)
1219  {
1220  massX[qx] += t * Bo(qx,dx);
1221  }
1222  }
1223
1224  for (int qy = 0; qy < Q1D; ++qy)
1225  {
1226  const double wy = Bc(qy,dy);
1227  const double wDy = Gc(qy,dy);
1228  for (int qx = 0; qx < Q1D; ++qx)
1229  {
1230  const double wx = massX[qx];
1231  gradXY[qy][qx][0] += wx * wDy;
1232  gradXY[qy][qx][1] += wx * wy;
1233  }
1234  }
1235  }
1236
1237  for (int qz = 0; qz < Q1D; ++qz)
1238  {
1239  const double wz = Bc(qz,dz);
1240  const double wDz = Gc(qz,dz);
1241  for (int qy = 0; qy < Q1D; ++qy)
1242  {
1243  for (int qx = 0; qx < Q1D; ++qx)
1244  {
1245  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1246  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1247  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1248  }
1249  }
1250  }
1251  }
1252
1253  osc += D1Dx * D1Dy * D1Dz;
1254  }
1255
1256  {
1257  // y component
1258  const int D1Dz = D1D;
1259  const int D1Dy = D1D - 1;
1260  const int D1Dx = D1D;
1261
1262  for (int dz = 0; dz < D1Dz; ++dz)
1263  {
1265  for (int qy = 0; qy < Q1D; ++qy)
1266  {
1267  for (int qx = 0; qx < Q1D; ++qx)
1268  {
1269  for (int d = 0; d < 2; ++d)
1270  {
1272  }
1273  }
1274  }
1275
1276  for (int dx = 0; dx < D1Dx; ++dx)
1277  {
1278  double massY[MAX_Q1D];
1279  for (int qy = 0; qy < Q1D; ++qy)
1280  {
1281  massY[qy] = 0.0;
1282  }
1283
1284  for (int dy = 0; dy < D1Dy; ++dy)
1285  {
1286  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1287  for (int qy = 0; qy < Q1D; ++qy)
1288  {
1289  massY[qy] += t * Bo(qy,dy);
1290  }
1291  }
1292
1293  for (int qx = 0; qx < Q1D; ++qx)
1294  {
1295  const double wx = Bc(qx,dx);
1296  const double wDx = Gc(qx,dx);
1297  for (int qy = 0; qy < Q1D; ++qy)
1298  {
1299  const double wy = massY[qy];
1300  gradXY[qy][qx][0] += wDx * wy;
1301  gradXY[qy][qx][1] += wx * wy;
1302  }
1303  }
1304  }
1305
1306  for (int qz = 0; qz < Q1D; ++qz)
1307  {
1308  const double wz = Bc(qz,dz);
1309  const double wDz = Gc(qz,dz);
1310  for (int qy = 0; qy < Q1D; ++qy)
1311  {
1312  for (int qx = 0; qx < Q1D; ++qx)
1313  {
1314  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1315  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1316  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1317  }
1318  }
1319  }
1320  }
1321
1322  osc += D1Dx * D1Dy * D1Dz;
1323  }
1324
1325  {
1326  // z component
1327  const int D1Dz = D1D - 1;
1328  const int D1Dy = D1D;
1329  const int D1Dx = D1D;
1330
1331  for (int dx = 0; dx < D1Dx; ++dx)
1332  {
1334  for (int qz = 0; qz < Q1D; ++qz)
1335  {
1336  for (int qy = 0; qy < Q1D; ++qy)
1337  {
1338  for (int d = 0; d < 2; ++d)
1339  {
1341  }
1342  }
1343  }
1344
1345  for (int dy = 0; dy < D1Dy; ++dy)
1346  {
1347  double massZ[MAX_Q1D];
1348  for (int qz = 0; qz < Q1D; ++qz)
1349  {
1350  massZ[qz] = 0.0;
1351  }
1352
1353  for (int dz = 0; dz < D1Dz; ++dz)
1354  {
1355  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1356  for (int qz = 0; qz < Q1D; ++qz)
1357  {
1358  massZ[qz] += t * Bo(qz,dz);
1359  }
1360  }
1361
1362  for (int qy = 0; qy < Q1D; ++qy)
1363  {
1364  const double wy = Bc(qy,dy);
1365  const double wDy = Gc(qy,dy);
1366  for (int qz = 0; qz < Q1D; ++qz)
1367  {
1368  const double wz = massZ[qz];
1369  gradYZ[qz][qy][0] += wz * wy;
1370  gradYZ[qz][qy][1] += wz * wDy;
1371  }
1372  }
1373  }
1374
1375  for (int qx = 0; qx < Q1D; ++qx)
1376  {
1377  const double wx = Bc(qx,dx);
1378  const double wDx = Gc(qx,dx);
1379
1380  for (int qy = 0; qy < Q1D; ++qy)
1381  {
1382  for (int qz = 0; qz < Q1D; ++qz)
1383  {
1384  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1385  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1386  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1387  }
1388  }
1389  }
1390  }
1391  }
1392
1393  // Apply D operator.
1394  for (int qz = 0; qz < Q1D; ++qz)
1395  {
1396  for (int qy = 0; qy < Q1D; ++qy)
1397  {
1398  for (int qx = 0; qx < Q1D; ++qx)
1399  {
1400  const double O11 = op(qx,qy,qz,0,e);
1401  const double O12 = op(qx,qy,qz,1,e);
1402  const double O13 = op(qx,qy,qz,2,e);
1403  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1404  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1405  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1406  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1407  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1408  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1409
1410  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1411  (O13 * curl[qz][qy][qx][2]);
1412  const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1413  (O23 * curl[qz][qy][qx][2]);
1414  const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1415  (O33 * curl[qz][qy][qx][2]);
1416
1417  curl[qz][qy][qx][0] = c1;
1418  curl[qz][qy][qx][1] = c2;
1419  curl[qz][qy][qx][2] = c3;
1420  }
1421  }
1422  }
1423
1424  // x component
1425  osc = 0;
1426  {
1427  const int D1Dz = D1D;
1428  const int D1Dy = D1D;
1429  const int D1Dx = D1D - 1;
1430
1431  for (int qz = 0; qz < Q1D; ++qz)
1432  {
1435
1436  for (int dy = 0; dy < D1Dy; ++dy)
1437  {
1438  for (int dx = 0; dx < D1Dx; ++dx)
1439  {
1442  }
1443  }
1444  for (int qy = 0; qy < Q1D; ++qy)
1445  {
1446  double massX[MAX_D1D][2];
1447  for (int dx = 0; dx < D1Dx; ++dx)
1448  {
1449  for (int n = 0; n < 2; ++n)
1450  {
1451  massX[dx][n] = 0.0;
1452  }
1453  }
1454  for (int qx = 0; qx < Q1D; ++qx)
1455  {
1456  for (int dx = 0; dx < D1Dx; ++dx)
1457  {
1458  const double wx = Bot(dx,qx);
1459
1460  massX[dx][0] += wx * curl[qz][qy][qx][1];
1461  massX[dx][1] += wx * curl[qz][qy][qx][2];
1462  }
1463  }
1464  for (int dy = 0; dy < D1Dy; ++dy)
1465  {
1466  const double wy = Bct(dy,qy);
1467  const double wDy = Gct(dy,qy);
1468
1469  for (int dx = 0; dx < D1Dx; ++dx)
1470  {
1471  gradXY21[dy][dx] += massX[dx][0] * wy;
1472  gradXY12[dy][dx] += massX[dx][1] * wDy;
1473  }
1474  }
1475  }
1476
1477  for (int dz = 0; dz < D1Dz; ++dz)
1478  {
1479  const double wz = Bct(dz,qz);
1480  const double wDz = Gct(dz,qz);
1481  for (int dy = 0; dy < D1Dy; ++dy)
1482  {
1483  for (int dx = 0; dx < D1Dx; ++dx)
1484  {
1485  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1486  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1487  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1489  }
1490  }
1491  }
1492  } // loop qz
1493
1494  osc += D1Dx * D1Dy * D1Dz;
1495  }
1496
1497  // y component
1498  {
1499  const int D1Dz = D1D;
1500  const int D1Dy = D1D - 1;
1501  const int D1Dx = D1D;
1502
1503  for (int qz = 0; qz < Q1D; ++qz)
1504  {
1507
1508  for (int dy = 0; dy < D1Dy; ++dy)
1509  {
1510  for (int dx = 0; dx < D1Dx; ++dx)
1511  {
1514  }
1515  }
1516  for (int qx = 0; qx < Q1D; ++qx)
1517  {
1518  double massY[MAX_D1D][2];
1519  for (int dy = 0; dy < D1Dy; ++dy)
1520  {
1521  massY[dy][0] = 0.0;
1522  massY[dy][1] = 0.0;
1523  }
1524  for (int qy = 0; qy < Q1D; ++qy)
1525  {
1526  for (int dy = 0; dy < D1Dy; ++dy)
1527  {
1528  const double wy = Bot(dy,qy);
1529
1530  massY[dy][0] += wy * curl[qz][qy][qx][2];
1531  massY[dy][1] += wy * curl[qz][qy][qx][0];
1532  }
1533  }
1534  for (int dx = 0; dx < D1Dx; ++dx)
1535  {
1536  const double wx = Bct(dx,qx);
1537  const double wDx = Gct(dx,qx);
1538
1539  for (int dy = 0; dy < D1Dy; ++dy)
1540  {
1541  gradXY02[dy][dx] += massY[dy][0] * wDx;
1542  gradXY20[dy][dx] += massY[dy][1] * wx;
1543  }
1544  }
1545  }
1546
1547  for (int dz = 0; dz < D1Dz; ++dz)
1548  {
1549  const double wz = Bct(dz,qz);
1550  const double wDz = Gct(dz,qz);
1551  for (int dy = 0; dy < D1Dy; ++dy)
1552  {
1553  for (int dx = 0; dx < D1Dx; ++dx)
1554  {
1555  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1556  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1557  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1559  }
1560  }
1561  }
1562  } // loop qz
1563
1564  osc += D1Dx * D1Dy * D1Dz;
1565  }
1566
1567  // z component
1568  {
1569  const int D1Dz = D1D - 1;
1570  const int D1Dy = D1D;
1571  const int D1Dx = D1D;
1572
1573  for (int qx = 0; qx < Q1D; ++qx)
1574  {
1577
1578  for (int dy = 0; dy < D1Dy; ++dy)
1579  {
1580  for (int dz = 0; dz < D1Dz; ++dz)
1581  {
1584  }
1585  }
1586  for (int qy = 0; qy < Q1D; ++qy)
1587  {
1588  double massZ[MAX_D1D][2];
1589  for (int dz = 0; dz < D1Dz; ++dz)
1590  {
1591  for (int n = 0; n < 2; ++n)
1592  {
1593  massZ[dz][n] = 0.0;
1594  }
1595  }
1596  for (int qz = 0; qz < Q1D; ++qz)
1597  {
1598  for (int dz = 0; dz < D1Dz; ++dz)
1599  {
1600  const double wz = Bot(dz,qz);
1601
1602  massZ[dz][0] += wz * curl[qz][qy][qx][0];
1603  massZ[dz][1] += wz * curl[qz][qy][qx][1];
1604  }
1605  }
1606  for (int dy = 0; dy < D1Dy; ++dy)
1607  {
1608  const double wy = Bct(dy,qy);
1609  const double wDy = Gct(dy,qy);
1610
1611  for (int dz = 0; dz < D1Dz; ++dz)
1612  {
1613  gradYZ01[dz][dy] += wy * massZ[dz][1];
1614  gradYZ10[dz][dy] += wDy * massZ[dz][0];
1615  }
1616  }
1617  }
1618
1619  for (int dx = 0; dx < D1Dx; ++dx)
1620  {
1621  const double wx = Bct(dx,qx);
1622  const double wDx = Gct(dx,qx);
1623
1624  for (int dy = 0; dy < D1Dy; ++dy)
1625  {
1626  for (int dz = 0; dz < D1Dz; ++dz)
1627  {
1628  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1629  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1630  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1632  }
1633  }
1634  }
1635  } // loop qx
1636  }
1637  }); // end of element loop
1638 }
1639
1640 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1641 static void SmemPACurlCurlApply3D(const int D1D,
1642  const int Q1D,
1643  const bool symmetric,
1644  const int NE,
1645  const Array<double> &bo,
1646  const Array<double> &bc,
1647  const Array<double> &bot,
1648  const Array<double> &bct,
1649  const Array<double> &gc,
1650  const Array<double> &gct,
1651  const Vector &pa_data,
1652  const Vector &x,
1653  Vector &y)
1654 {
1655  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1656  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1657  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1658  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1659  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1660  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1661  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1662
1663  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1664  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1665  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1666  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1667  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1668  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1669
1670  const int s = symmetric ? 6 : 9;
1671
1672  auto device_kernel = [=] MFEM_DEVICE (int e)
1673  {
1674  constexpr int VDIM = 3;
1675
1676  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1677  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1678  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1679
1680  double ope[9];
1681  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1682  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1683
1684  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1685
1687  {
1689  {
1691  {
1692  for (int i=0; i<s; ++i)
1693  {
1694  ope[i] = op(qx,qy,qz,i,e);
1695  }
1696  }
1697  }
1698  }
1699
1700  const int tidx = MFEM_THREAD_ID(x);
1701  const int tidy = MFEM_THREAD_ID(y);
1702  const int tidz = MFEM_THREAD_ID(z);
1703
1704  if (tidz == 0)
1705  {
1707  {
1709  {
1710  sBc[d][q] = Bc(q,d);
1711  sGc[d][q] = Gc(q,d);
1712  if (d < D1D-1)
1713  {
1714  sBo[d][q] = Bo(q,d);
1715  }
1716  }
1717  }
1718  }
1720
1721  for (int qz=0; qz < Q1D; ++qz)
1722  {
1723  if (tidz == qz)
1724  {
1726  {
1728  {
1729  for (int i=0; i<3; ++i)
1730  {
1731  curl[qy][qx][i] = 0.0;
1732  }
1733  }
1734  }
1735  }
1736
1737  int osc = 0;
1738  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1739  {
1740  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1741  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1742  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1743
1745  {
1747  {
1749  {
1750  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1751  }
1752  }
1753  }
1755
1756  if (tidz == qz)
1757  {
1758  if (c == 0)
1759  {
1760  for (int i=0; i<s; ++i)
1761  {
1762  sop[i][tidx][tidy] = ope[i];
1763  }
1764  }
1765
1767  {
1769  {
1770  double u = 0.0;
1771  double v = 0.0;
1772
1773  // We treat x, y, z components separately for optimization specific to each.
1774  if (c == 0) // x component
1775  {
1776  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1777
1778  for (int dz = 0; dz < D1Dz; ++dz)
1779  {
1780  const double wz = sBc[dz][qz];
1781  const double wDz = sGc[dz][qz];
1782
1783  for (int dy = 0; dy < D1Dy; ++dy)
1784  {
1785  const double wy = sBc[dy][qy];
1786  const double wDy = sGc[dy][qy];
1787
1788  for (int dx = 0; dx < D1Dx; ++dx)
1789  {
1790  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1791  u += wx * wDy * wz;
1792  v += wx * wy * wDz;
1793  }
1794  }
1795  }
1796
1797  curl[qy][qx][1] += v; // (u_0)_{x_2}
1798  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1799  }
1800  else if (c == 1) // y component
1801  {
1802  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1803
1804  for (int dz = 0; dz < D1Dz; ++dz)
1805  {
1806  const double wz = sBc[dz][qz];
1807  const double wDz = sGc[dz][qz];
1808
1809  for (int dy = 0; dy < D1Dy; ++dy)
1810  {
1811  const double wy = sBo[dy][qy];
1812
1813  for (int dx = 0; dx < D1Dx; ++dx)
1814  {
1815  const double t = sX[dz][dy][dx];
1816  const double wx = t * sBc[dx][qx];
1817  const double wDx = t * sGc[dx][qx];
1818
1819  u += wDx * wy * wz;
1820  v += wx * wy * wDz;
1821  }
1822  }
1823  }
1824
1825  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1826  curl[qy][qx][2] += u; // (u_1)_{x_0}
1827  }
1828  else // z component
1829  {
1830  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1831
1832  for (int dz = 0; dz < D1Dz; ++dz)
1833  {
1834  const double wz = sBo[dz][qz];
1835
1836  for (int dy = 0; dy < D1Dy; ++dy)
1837  {
1838  const double wy = sBc[dy][qy];
1839  const double wDy = sGc[dy][qy];
1840
1841  for (int dx = 0; dx < D1Dx; ++dx)
1842  {
1843  const double t = sX[dz][dy][dx];
1844  const double wx = t * sBc[dx][qx];
1845  const double wDx = t * sGc[dx][qx];
1846
1847  u += wDx * wy * wz;
1848  v += wx * wDy * wz;
1849  }
1850  }
1851  }
1852
1853  curl[qy][qx][0] += v; // (u_2)_{x_1}
1854  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1855  }
1856  } // qx
1857  } // qy
1858  } // tidz == qz
1859
1860  osc += D1Dx * D1Dy * D1Dz;
1862  } // c
1863
1864  double dxyz1 = 0.0;
1865  double dxyz2 = 0.0;
1866  double dxyz3 = 0.0;
1867
1869  {
1870  const double wcz = sBc[dz][qz];
1871  const double wcDz = sGc[dz][qz];
1872  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1873
1875  {
1877  {
1878  for (int qy = 0; qy < Q1D; ++qy)
1879  {
1880  const double wcy = sBc[dy][qy];
1881  const double wcDy = sGc[dy][qy];
1882  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1883
1884  for (int qx = 0; qx < Q1D; ++qx)
1885  {
1886  const double O11 = sop[0][qx][qy];
1887  const double O12 = sop[1][qx][qy];
1888  const double O13 = sop[2][qx][qy];
1889  const double O21 = symmetric ? O12 : sop[3][qx][qy];
1890  const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1891  const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1892  const double O31 = symmetric ? O13 : sop[6][qx][qy];
1893  const double O32 = symmetric ? O23 : sop[7][qx][qy];
1894  const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1895
1896  const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1897  (O13 * curl[qy][qx][2]);
1898  const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1899  (O23 * curl[qy][qx][2]);
1900  const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1901  (O33 * curl[qy][qx][2]);
1902
1903  const double wcx = sBc[dx][qx];
1904  const double wDx = sGc[dx][qx];
1905
1906  if (dx < D1D-1)
1907  {
1908  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1909  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1910  const double wx = sBo[dx][qx];
1911  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1912  }
1913
1914  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1915  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1916  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1917
1918  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1919  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1920  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1921  } // qx
1922  } // qy
1923  } // dx
1924  } // dy
1925  } // dz
1926
1928
1930  {
1932  {
1934  {
1935  if (dx < D1D-1)
1936  {
1937  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
1938  }
1939  if (dy < D1D-1)
1940  {
1941  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
1942  }
1943  if (dz < D1D-1)
1944  {
1945  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
1946  }
1947  }
1948  }
1949  }
1950  } // qz
1951  }; // end of element loop
1952
1953  auto host_kernel = [&] MFEM_LAMBDA (int)
1954  {
1955  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
1956  };
1957
1958  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
1959 }
1960
1961 static void PACurlL2Apply2D(const int D1D,
1962  const int D1Dtest,
1963  const int Q1D,
1964  const int NE,
1965  const Array<double> &bo,
1966  const Array<double> &bot,
1967  const Array<double> &bt,
1968  const Array<double> &gc,
1969  const Vector &pa_data,
1970  const Vector &x, // trial = H(curl)
1971  Vector &y) // test = L2 or H1
1972 {
1973  constexpr static int VDIM = 2;
1974  constexpr static int MAX_D1D = HCURL_MAX_D1D;
1975  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1976  const int H1 = (D1Dtest == D1D);
1977
1978  MFEM_VERIFY(y.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension");
1979
1980  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1981  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1982  auto Bt = Reshape(bt.Read(), D1D, Q1D);
1983  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1984  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1985  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1986  auto Y = Reshape(y.ReadWrite(), D1Dtest, D1Dtest, NE);
1987
1988  MFEM_FORALL(e, NE,
1989  {
1990  double curl[MAX_Q1D][MAX_Q1D];
1991
1992  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1993
1994  for (int qy = 0; qy < Q1D; ++qy)
1995  {
1996  for (int qx = 0; qx < Q1D; ++qx)
1997  {
1998  curl[qy][qx] = 0.0;
1999  }
2000  }
2001
2002  int osc = 0;
2003
2004  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2005  {
2006  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2007  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2008
2009  for (int dy = 0; dy < D1Dy; ++dy)
2010  {
2012  for (int qx = 0; qx < Q1D; ++qx)
2013  {
2015  }
2016
2017  for (int dx = 0; dx < D1Dx; ++dx)
2018  {
2019  const double t = X(dx + (dy * D1Dx) + osc, e);
2020  for (int qx = 0; qx < Q1D; ++qx)
2021  {
2022  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2023  }
2024  }
2025
2026  for (int qy = 0; qy < Q1D; ++qy)
2027  {
2028  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
2029  for (int qx = 0; qx < Q1D; ++qx)
2030  {
2031  curl[qy][qx] += gradX[qx] * wy;
2032  }
2033  }
2034  }
2035
2036  osc += D1Dx * D1Dy;
2037  } // loop (c) over components
2038
2039  // Apply D operator.
2040  for (int qy = 0; qy < Q1D; ++qy)
2041  {
2042  for (int qx = 0; qx < Q1D; ++qx)
2043  {
2044  curl[qy][qx] *= op(qx,qy,e);
2045  }
2046  }
2047
2048  for (int qy = 0; qy < Q1D; ++qy)
2049  {
2050  double sol_x[MAX_D1D];
2051  for (int dx = 0; dx < D1Dtest; ++dx)
2052  {
2053  sol_x[dx] = 0.0;
2054  }
2055  for (int qx = 0; qx < Q1D; ++qx)
2056  {
2057  const double s = curl[qy][qx];
2058  for (int dx = 0; dx < D1Dtest; ++dx)
2059  {
2060  sol_x[dx] += s * ((H1 == 1) ? Bt(dx,qx) : Bot(dx,qx));
2061  }
2062  }
2063  for (int dy = 0; dy < D1Dtest; ++dy)
2064  {
2065  const double wy = (H1 == 1) ? Bt(dy,qy) : Bot(dy,qy);
2066
2067  for (int dx = 0; dx < D1Dtest; ++dx)
2068  {
2069  Y(dx,dy,e) += sol_x[dx] * wy;
2070  }
2071  }
2072  } // loop qy
2073  }); // end of element loop
2074 }
2075
2076 static void PACurlL2ApplyTranspose2D(const int D1D,
2077  const int D1Dtest,
2078  const int Q1D,
2079  const int NE,
2080  const Array<double> &bo,
2081  const Array<double> &bot,
2082  const Array<double> &b,
2083  const Array<double> &gct,
2084  const Vector &pa_data,
2085  const Vector &x, // trial = H(curl)
2086  Vector &y) // test = L2 or H1
2087 {
2088  constexpr static int VDIM = 2;
2089  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2090  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2091  const int H1 = (D1Dtest == D1D);
2092
2093  MFEM_VERIFY(x.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension");
2094
2095  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2096  auto B = Reshape(b.Read(), Q1D, D1D);
2097  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2098  auto Gct = Reshape(gct.Read(), D1D, Q1D);
2099  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2100  auto X = Reshape(x.Read(), D1Dtest, D1Dtest, NE);
2101  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2102
2103  MFEM_FORALL(e, NE,
2104  {
2105  double mass[MAX_Q1D][MAX_Q1D];
2106
2107  // Zero-order term in L2 or H1 test space
2108
2109  for (int qy = 0; qy < Q1D; ++qy)
2110  {
2111  for (int qx = 0; qx < Q1D; ++qx)
2112  {
2113  mass[qy][qx] = 0.0;
2114  }
2115  }
2116
2117  for (int dy = 0; dy < D1Dtest; ++dy)
2118  {
2119  double sol_x[MAX_Q1D];
2120  for (int qy = 0; qy < Q1D; ++qy)
2121  {
2122  sol_x[qy] = 0.0;
2123  }
2124  for (int dx = 0; dx < D1Dtest; ++dx)
2125  {
2126  const double s = X(dx,dy,e);
2127  for (int qx = 0; qx < Q1D; ++qx)
2128  {
2129  sol_x[qx] += s * ((H1 == 1) ? B(qx,dx) : Bo(qx,dx));
2130  }
2131  }
2132  for (int qy = 0; qy < Q1D; ++qy)
2133  {
2134  const double d2q = (H1 == 1) ? B(qy,dy) : Bo(qy,dy);
2135  for (int qx = 0; qx < Q1D; ++qx)
2136  {
2137  mass[qy][qx] += d2q * sol_x[qx];
2138  }
2139  }
2140  }
2141
2142  // Apply D operator.
2143  for (int qy = 0; qy < Q1D; ++qy)
2144  {
2145  for (int qx = 0; qx < Q1D; ++qx)
2146  {
2147  mass[qy][qx] *= op(qx,qy,e);
2148  }
2149  }
2150
2151  for (int qy = 0; qy < Q1D; ++qy)
2152  {
2153  int osc = 0;
2154
2155  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2156  {
2157  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2158  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2159
2161  for (int dx = 0; dx < D1Dx; ++dx)
2162  {
2164  }
2165  for (int qx = 0; qx < Q1D; ++qx)
2166  {
2167  for (int dx = 0; dx < D1Dx; ++dx)
2168  {
2169  gradX[dx] += mass[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
2170  }
2171  }
2172  for (int dy = 0; dy < D1Dy; ++dy)
2173  {
2174  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
2175
2176  for (int dx = 0; dx < D1Dx; ++dx)
2177  {
2178  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
2179  }
2180  }
2181
2182  osc += D1Dx * D1Dy;
2183  } // loop c
2184  } // loop qy
2185  }); // end of element loop
2186 }
2187
2188 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
2189 {
2190  if (dim == 3)
2191  {
2193  {
2194  const int ID = (dofs1D << 4) | quad1D;
2195  switch (ID)
2196  {
2197  case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2198  mapsO->B, mapsC->B, mapsO->Bt,
2199  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2200  case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2201  mapsO->B, mapsC->B, mapsO->Bt,
2202  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2203  case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2204  mapsO->B,
2205  mapsC->B, mapsO->Bt,
2206  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2207  case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2208  mapsO->B, mapsC->B, mapsO->Bt,
2209  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2210  default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2211  mapsC->B, mapsO->Bt, mapsC->Bt,
2212  mapsC->G, mapsC->Gt, pa_data, x, y);
2213  }
2214  }
2215  else
2216  PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2217  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2218  }
2219  else if (dim == 2)
2220  {
2221  PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2222  mapsC->G, mapsC->Gt, pa_data, x, y);
2223  }
2224  else
2225  {
2226  MFEM_ABORT("Unsupported dimension!");
2227  }
2228 }
2229
2230 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2231  const int Q1D,
2232  const int NE,
2233  const Array<double> &bo,
2234  const Array<double> &gc,
2235  const Vector &pa_data,
2236  Vector &diag)
2237 {
2238  constexpr static int VDIM = 2;
2239  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2240
2241  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2242  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2243  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2244  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2245
2246  MFEM_FORALL(e, NE,
2247  {
2248  int osc = 0;
2249
2250  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2251  {
2252  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2253  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2254
2255  double t[MAX_Q1D];
2256
2257  for (int dy = 0; dy < D1Dy; ++dy)
2258  {
2259  for (int qx = 0; qx < Q1D; ++qx)
2260  {
2261  t[qx] = 0.0;
2262  for (int qy = 0; qy < Q1D; ++qy)
2263  {
2264  const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2265  t[qx] += wy * wy * op(qx,qy,e);
2266  }
2267  }
2268
2269  for (int dx = 0; dx < D1Dx; ++dx)
2270  {
2271  for (int qx = 0; qx < Q1D; ++qx)
2272  {
2273  const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2274  D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2275  }
2276  }
2277  }
2278
2279  osc += D1Dx * D1Dy;
2280  } // loop c
2281  }); // end of element loop
2282 }
2283
2284 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2285 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2286  const int Q1D,
2287  const bool symmetric,
2288  const int NE,
2289  const Array<double> &bo,
2290  const Array<double> &bc,
2291  const Array<double> &go,
2292  const Array<double> &gc,
2293  const Vector &pa_data,
2294  Vector &diag)
2295 {
2296  constexpr static int VDIM = 3;
2297  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2298  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2299
2300  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2301  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2302  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2303  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2304  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2305  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2306
2307  const int s = symmetric ? 6 : 9;
2308  const int i11 = 0;
2309  const int i12 = 1;
2310  const int i13 = 2;
2311  const int i21 = symmetric ? i12 : 3;
2312  const int i22 = symmetric ? 3 : 4;
2313  const int i23 = symmetric ? 4 : 5;
2314  const int i31 = symmetric ? i13 : 6;
2315  const int i32 = symmetric ? i23 : 7;
2316  const int i33 = symmetric ? 5 : 8;
2317
2318  MFEM_FORALL(e, NE,
2319  {
2320  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2321  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2322  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2323  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2324  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2325
2326  // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2327  // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2328
2329  int osc = 0;
2330
2331  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2332  {
2333  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2334  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2335  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2336
2337  double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2338
2339  // z contraction
2340  for (int qx = 0; qx < Q1D; ++qx)
2341  {
2342  for (int qy = 0; qy < Q1D; ++qy)
2343  {
2344  for (int dz = 0; dz < D1Dz; ++dz)
2345  {
2346  for (int i=0; i<s; ++i)
2347  {
2348  for (int d=0; d<3; ++d)
2349  {
2350  zt[qx][qy][dz][i][d] = 0.0;
2351  }
2352  }
2353
2354  for (int qz = 0; qz < Q1D; ++qz)
2355  {
2356  const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2357  const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2358
2359  for (int i=0; i<s; ++i)
2360  {
2361  zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2362  zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2363  zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2364  }
2365  }
2366  }
2367  }
2368  } // end of z contraction
2369
2370  double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2371
2372  // y contraction
2373  for (int qx = 0; qx < Q1D; ++qx)
2374  {
2375  for (int dz = 0; dz < D1Dz; ++dz)
2376  {
2377  for (int dy = 0; dy < D1Dy; ++dy)
2378  {
2379  for (int i=0; i<s; ++i)
2380  {
2381  for (int d=0; d<3; ++d)
2382  for (int j=0; j<3; ++j)
2383  {
2384  yt[qx][dy][dz][i][d][j] = 0.0;
2385  }
2386  }
2387
2388  for (int qy = 0; qy < Q1D; ++qy)
2389  {
2390  const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2391  const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2392
2393  for (int i=0; i<s; ++i)
2394  {
2395  for (int d=0; d<3; ++d)
2396  {
2397  yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2398  yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2399  yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2400  }
2401  }
2402  }
2403  }
2404  }
2405  } // end of y contraction
2406
2407  // x contraction
2408  for (int dz = 0; dz < D1Dz; ++dz)
2409  {
2410  for (int dy = 0; dy < D1Dy; ++dy)
2411  {
2412  for (int dx = 0; dx < D1Dx; ++dx)
2413  {
2414  for (int qx = 0; qx < Q1D; ++qx)
2415  {
2416  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2417  const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2418
2419  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2420  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2421  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2422  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2423  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2424
2425  /*
2426  const double O11 = op(q,0,e);
2427  const double O12 = op(q,1,e);
2428  const double O13 = op(q,2,e);
2429  const double O22 = op(q,3,e);
2430  const double O23 = op(q,4,e);
2431  const double O33 = op(q,5,e);
2432  */
2433
2434  if (c == 0)
2435  {
2436  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2437  const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2438  - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2439
2440  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2441  }
2442  else if (c == 1)
2443  {
2444  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2445  const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2446  - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2447  + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2448
2449  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2450  }
2451  else
2452  {
2453  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2454  const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2455  - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2456  + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2457
2458  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2459  }
2460  }
2461  }
2462  }
2463  } // end of x contraction
2464
2465  osc += D1Dx * D1Dy * D1Dz;
2466  } // loop c
2467  }); // end of element loop
2468 }
2469
2470 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2471 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2472  const int Q1D,
2473  const bool symmetric,
2474  const int NE,
2475  const Array<double> &bo,
2476  const Array<double> &bc,
2477  const Array<double> &go,
2478  const Array<double> &gc,
2479  const Vector &pa_data,
2480  Vector &diag)
2481 {
2482  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2483  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2484
2485  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2486  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2487  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2488  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2489  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2490  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2491
2492  const int s = symmetric ? 6 : 9;
2493  const int i11 = 0;
2494  const int i12 = 1;
2495  const int i13 = 2;
2496  const int i21 = symmetric ? i12 : 3;
2497  const int i22 = symmetric ? 3 : 4;
2498  const int i23 = symmetric ? 4 : 5;
2499  const int i31 = symmetric ? i13 : 6;
2500  const int i32 = symmetric ? i23 : 7;
2501  const int i33 = symmetric ? 5 : 8;
2502
2503  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2504  {
2505  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2506  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2507  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2508  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2509  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2510
2511  constexpr int VDIM = 3;
2512
2513  MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2514  MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2515  MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2516  MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2517
2518  double ope[9];
2519  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2520
2522  {
2524  {
2526  {
2527  for (int i=0; i<s; ++i)
2528  {
2529  ope[i] = op(qx,qy,qz,i,e);
2530  }
2531  }
2532  }
2533  }
2534
2535  const int tidx = MFEM_THREAD_ID(x);
2536  const int tidy = MFEM_THREAD_ID(y);
2537  const int tidz = MFEM_THREAD_ID(z);
2538
2539  if (tidz == 0)
2540  {
2542  {
2544  {
2545  sBc[q][d] = Bc(q,d);
2546  sGc[q][d] = Gc(q,d);
2547  if (d < D1D-1)
2548  {
2549  sBo[q][d] = Bo(q,d);
2550  sGo[q][d] = Go(q,d);
2551  }
2552  }
2553  }
2554  }
2556
2557  int osc = 0;
2558  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2559  {
2560  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2561  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2562  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2563
2564  double dxyz = 0.0;
2565
2566  for (int qz=0; qz < Q1D; ++qz)
2567  {
2568  if (tidz == qz)
2569  {
2570  for (int i=0; i<s; ++i)
2571  {
2572  sop[i][tidx][tidy] = ope[i];
2573  }
2574  }
2575
2577
2579  {
2580  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2581  const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2582
2584  {
2586  {
2587  for (int qy = 0; qy < Q1D; ++qy)
2588  {
2589  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2590  const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2591
2592  for (int qx = 0; qx < Q1D; ++qx)
2593  {
2594  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2595  const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2596
2597  if (c == 0)
2598  {
2599  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2600
2601  // (u_0)_{x_2} O22 (u_0)_{x_2}
2602  dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2603
2604  // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2605  dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2606
2607  // (u_0)_{x_1} O33 (u_0)_{x_1}
2608  dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2609  }
2610  else if (c == 1)
2611  {
2612  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2613
2614  // (u_1)_{x_2} O11 (u_1)_{x_2}
2615  dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2616
2617  // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2618  dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2619
2620  // (u_1)_{x_0} O33 (u_1)_{x_0})
2621  dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2622  }
2623  else
2624  {
2625  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2626
2627  // (u_2)_{x_1} O11 (u_2)_{x_1}
2628  dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2629
2630  // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2631  dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2632
2633  // (u_2)_{x_0} O22 (u_2)_{x_0}
2634  dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2635  }
2636  }
2637  }
2638  }
2639  }
2640  }
2641
2643  } // qz loop
2644
2646  {
2648  {
2650  {
2651  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2652  }
2653  }
2654  }
2655
2656  osc += D1Dx * D1Dy * D1Dz;
2657  } // c loop
2658  }); // end of element loop
2659 }
2660
2661 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2662 {
2663  if (dim == 3)
2664  {
2666  {
2667  const int ID = (dofs1D << 4) | quad1D;
2668  switch (ID)
2669  {
2670  case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2671  symmetric, ne,
2672  mapsO->B, mapsC->B,
2673  mapsO->G, mapsC->G,
2674  pa_data, diag);
2675  case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2676  symmetric, ne,
2677  mapsO->B, mapsC->B,
2678  mapsO->G, mapsC->G,
2679  pa_data, diag);
2680  case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2681  symmetric, ne,
2682  mapsO->B, mapsC->B,
2683  mapsO->G, mapsC->G,
2684  pa_data, diag);
2685  case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2686  symmetric, ne,
2687  mapsO->B, mapsC->B,
2688  mapsO->G, mapsC->G,
2689  pa_data, diag);
2690  default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2691  mapsO->B, mapsC->B,
2692  mapsO->G, mapsC->G,
2693  pa_data, diag);
2694  }
2695  }
2696  else
2698  mapsO->B, mapsC->B,
2699  mapsO->G, mapsC->G,
2700  pa_data, diag);
2701  }
2702  else if (dim == 2)
2703  {
2705  mapsO->B, mapsC->G, pa_data, diag);
2706  }
2707  else
2708  {
2709  MFEM_ABORT("Unsupported dimension!");
2710  }
2711 }
2712
2713 // Apply to x corresponding to DOFs in H^1 (trial), whose gradients are
2714 // integrated against H(curl) test functions corresponding to y.
2715 void PAHcurlH1Apply3D(const int D1D,
2716  const int Q1D,
2717  const int NE,
2718  const Array<double> &bc,
2719  const Array<double> &gc,
2720  const Array<double> &bot,
2721  const Array<double> &bct,
2722  const Vector &pa_data,
2723  const Vector &x,
2724  Vector &y)
2725 {
2726  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2727  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2728
2729  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2730  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2731
2732  constexpr static int VDIM = 3;
2733
2734  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2735  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2736  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2737  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2738  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2739  auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2740  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2741
2742  MFEM_FORALL(e, NE,
2743  {
2744  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2745
2746  for (int qz = 0; qz < Q1D; ++qz)
2747  {
2748  for (int qy = 0; qy < Q1D; ++qy)
2749  {
2750  for (int qx = 0; qx < Q1D; ++qx)
2751  {
2752  for (int c = 0; c < VDIM; ++c)
2753  {
2754  mass[qz][qy][qx][c] = 0.0;
2755  }
2756  }
2757  }
2758  }
2759
2760  for (int dz = 0; dz < D1D; ++dz)
2761  {
2763  for (int qy = 0; qy < Q1D; ++qy)
2764  {
2765  for (int qx = 0; qx < Q1D; ++qx)
2766  {
2770  }
2771  }
2772  for (int dy = 0; dy < D1D; ++dy)
2773  {
2775  for (int qx = 0; qx < Q1D; ++qx)
2776  {
2779  }
2780  for (int dx = 0; dx < D1D; ++dx)
2781  {
2782  const double s = X(dx,dy,dz,e);
2783  for (int qx = 0; qx < Q1D; ++qx)
2784  {
2785  gradX[qx][0] += s * Bc(qx,dx);
2786  gradX[qx][1] += s * Gc(qx,dx);
2787  }
2788  }
2789  for (int qy = 0; qy < Q1D; ++qy)
2790  {
2791  const double wy = Bc(qy,dy);
2792  const double wDy = Gc(qy,dy);
2793  for (int qx = 0; qx < Q1D; ++qx)
2794  {
2795  const double wx = gradX[qx][0];
2796  const double wDx = gradX[qx][1];
2797  gradXY[qy][qx][0] += wDx * wy;
2798  gradXY[qy][qx][1] += wx * wDy;
2799  gradXY[qy][qx][2] += wx * wy;
2800  }
2801  }
2802  }
2803  for (int qz = 0; qz < Q1D; ++qz)
2804  {
2805  const double wz = Bc(qz,dz);
2806  const double wDz = Gc(qz,dz);
2807  for (int qy = 0; qy < Q1D; ++qy)
2808  {
2809  for (int qx = 0; qx < Q1D; ++qx)
2810  {
2811  mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2812  mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2813  mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2814  }
2815  }
2816  }
2817  }
2818
2819  // Apply D operator.
2820  for (int qz = 0; qz < Q1D; ++qz)
2821  {
2822  for (int qy = 0; qy < Q1D; ++qy)
2823  {
2824  for (int qx = 0; qx < Q1D; ++qx)
2825  {
2826  const double O11 = op(qx,qy,qz,0,e);
2827  const double O12 = op(qx,qy,qz,1,e);
2828  const double O13 = op(qx,qy,qz,2,e);
2829  const double O22 = op(qx,qy,qz,3,e);
2830  const double O23 = op(qx,qy,qz,4,e);
2831  const double O33 = op(qx,qy,qz,5,e);
2832  const double massX = mass[qz][qy][qx][0];
2833  const double massY = mass[qz][qy][qx][1];
2834  const double massZ = mass[qz][qy][qx][2];
2835  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2836  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2837  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2838  }
2839  }
2840  }
2841
2842  for (int qz = 0; qz < Q1D; ++qz)
2843  {
2844  double massXY[MAX_D1D][MAX_D1D];
2845
2846  int osc = 0;
2847
2848  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2849  {
2850  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2851  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2852  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2853
2854  for (int dy = 0; dy < D1Dy; ++dy)
2855  {
2856  for (int dx = 0; dx < D1Dx; ++dx)
2857  {
2858  massXY[dy][dx] = 0.0;
2859  }
2860  }
2861  for (int qy = 0; qy < Q1D; ++qy)
2862  {
2863  double massX[MAX_D1D];
2864  for (int dx = 0; dx < D1Dx; ++dx)
2865  {
2866  massX[dx] = 0;
2867  }
2868  for (int qx = 0; qx < Q1D; ++qx)
2869  {
2870  for (int dx = 0; dx < D1Dx; ++dx)
2871  {
2872  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2873  }
2874  }
2875  for (int dy = 0; dy < D1Dy; ++dy)
2876  {
2877  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2878  for (int dx = 0; dx < D1Dx; ++dx)
2879  {
2880  massXY[dy][dx] += massX[dx] * wy;
2881  }
2882  }
2883  }
2884
2885  for (int dz = 0; dz < D1Dz; ++dz)
2886  {
2887  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2888  for (int dy = 0; dy < D1Dy; ++dy)
2889  {
2890  for (int dx = 0; dx < D1Dx; ++dx)
2891  {
2892  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2893  }
2894  }
2895  }
2896
2897  osc += D1Dx * D1Dy * D1Dz;
2898  } // loop c
2899  } // loop qz
2900  }); // end of element loop
2901 }
2902
2903 // Apply to x corresponding to DOFs in H(curl), integrated
2904 // against gradients of H^1 functions corresponding to y.
2905 void PAHcurlH1ApplyTranspose3D(const int D1D,
2906  const int Q1D,
2907  const int NE,
2908  const Array<double> &bc,
2909  const Array<double> &bo,
2910  const Array<double> &bct,
2911  const Array<double> &gct,
2912  const Vector &pa_data,
2913  const Vector &x,
2914  Vector &y)
2915 {
2916  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2917  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2918
2919  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2920  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2921
2922  constexpr static int VDIM = 3;
2923
2924  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2925  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2926  auto Bt = Reshape(bct.Read(), D1D, Q1D);
2927  auto Gt = Reshape(gct.Read(), D1D, Q1D);
2928  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2929  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2930  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
2931
2932  MFEM_FORALL(e, NE,
2933  {
2934  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2935
2936  for (int qz = 0; qz < Q1D; ++qz)
2937  {
2938  for (int qy = 0; qy < Q1D; ++qy)
2939  {
2940  for (int qx = 0; qx < Q1D; ++qx)
2941  {
2942  for (int c = 0; c < VDIM; ++c)
2943  {
2944  mass[qz][qy][qx][c] = 0.0;
2945  }
2946  }
2947  }
2948  }
2949
2950  int osc = 0;
2951
2952  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2953  {
2954  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2955  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2956  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2957
2958  for (int dz = 0; dz < D1Dz; ++dz)
2959  {
2960  double massXY[MAX_Q1D][MAX_Q1D];
2961  for (int qy = 0; qy < Q1D; ++qy)
2962  {
2963  for (int qx = 0; qx < Q1D; ++qx)
2964  {
2965  massXY[qy][qx] = 0.0;
2966  }
2967  }
2968
2969  for (int dy = 0; dy < D1Dy; ++dy)
2970  {
2971  double massX[MAX_Q1D];
2972  for (int qx = 0; qx < Q1D; ++qx)
2973  {
2974  massX[qx] = 0.0;
2975  }
2976
2977  for (int dx = 0; dx < D1Dx; ++dx)
2978  {
2979  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
2980  for (int qx = 0; qx < Q1D; ++qx)
2981  {
2982  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2983  }
2984  }
2985
2986  for (int qy = 0; qy < Q1D; ++qy)
2987  {
2988  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
2989  for (int qx = 0; qx < Q1D; ++qx)
2990  {
2991  const double wx = massX[qx];
2992  massXY[qy][qx] += wx * wy;
2993  }
2994  }
2995  }
2996
2997  for (int qz = 0; qz < Q1D; ++qz)
2998  {
2999  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
3000  for (int qy = 0; qy < Q1D; ++qy)
3001  {
3002  for (int qx = 0; qx < Q1D; ++qx)
3003  {
3004  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
3005  }
3006  }
3007  }
3008  }
3009
3010  osc += D1Dx * D1Dy * D1Dz;
3011  } // loop (c) over components
3012
3013  // Apply D operator.
3014  for (int qz = 0; qz < Q1D; ++qz)
3015  {
3016  for (int qy = 0; qy < Q1D; ++qy)
3017  {
3018  for (int qx = 0; qx < Q1D; ++qx)
3019  {
3020  const double O11 = op(qx,qy,qz,0,e);
3021  const double O12 = op(qx,qy,qz,1,e);
3022  const double O13 = op(qx,qy,qz,2,e);
3023  const double O22 = op(qx,qy,qz,3,e);
3024  const double O23 = op(qx,qy,qz,4,e);
3025  const double O33 = op(qx,qy,qz,5,e);
3026  const double massX = mass[qz][qy][qx][0];
3027  const double massY = mass[qz][qy][qx][1];
3028  const double massZ = mass[qz][qy][qx][2];
3029  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
3030  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
3031  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
3032  }
3033  }
3034  }
3035
3036  for (int qz = 0; qz < Q1D; ++qz)
3037  {
3039  for (int dy = 0; dy < D1D; ++dy)
3040  {
3041  for (int dx = 0; dx < D1D; ++dx)
3042  {
3046  }
3047  }
3048  for (int qy = 0; qy < Q1D; ++qy)
3049  {
3051  for (int dx = 0; dx < D1D; ++dx)
3052  {
3056  }
3057  for (int qx = 0; qx < Q1D; ++qx)
3058  {
3059  const double gX = mass[qz][qy][qx][0];
3060  const double gY = mass[qz][qy][qx][1];
3061  const double gZ = mass[qz][qy][qx][2];
3062  for (int dx = 0; dx < D1D; ++dx)
3063  {
3064  const double wx = Bt(dx,qx);
3065  const double wDx = Gt(dx,qx);
3066  gradX[dx][0] += gX * wDx;
3067  gradX[dx][1] += gY * wx;
3068  gradX[dx][2] += gZ * wx;
3069  }
3070  }
3071  for (int dy = 0; dy < D1D; ++dy)
3072  {
3073  const double wy = Bt(dy,qy);
3074  const double wDy = Gt(dy,qy);
3075  for (int dx = 0; dx < D1D; ++dx)
3076  {
3080  }
3081  }
3082  }
3083  for (int dz = 0; dz < D1D; ++dz)
3084  {
3085  const double wz = Bt(dz,qz);
3086  const double wDz = Gt(dz,qz);
3087  for (int dy = 0; dy < D1D; ++dy)
3088  {
3089  for (int dx = 0; dx < D1D; ++dx)
3090  {
3091  Y(dx,dy,dz,e) +=
3095  }
3096  }
3097  }
3098  } // loop qz
3099  }); // end of element loop
3100 }
3101
3102 // Apply to x corresponding to DOFs in H^1 (trial), whose gradients are
3103 // integrated against H(curl) test functions corresponding to y.
3104 void PAHcurlH1Apply2D(const int D1D,
3105  const int Q1D,
3106  const int NE,
3107  const Array<double> &bc,
3108  const Array<double> &gc,
3109  const Array<double> &bot,
3110  const Array<double> &bct,
3111  const Vector &pa_data,
3112  const Vector &x,
3113  Vector &y)
3114 {
3115  constexpr static int VDIM = 2;
3116  constexpr static int MAX_D1D = HCURL_MAX_D1D;
3117  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3118
3119  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3120  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3121  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3122  auto Bct = Reshape(bct.Read(), D1D, Q1D);
3123  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3124  auto X = Reshape(x.Read(), D1D, D1D, NE);
3125  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
3126
3127  MFEM_FORALL(e, NE,
3128  {
3129  double mass[MAX_Q1D][MAX_Q1D][VDIM];
3130
3131  for (int qy = 0; qy < Q1D; ++qy)
3132  {
3133  for (int qx = 0; qx < Q1D; ++qx)
3134  {
3135  for (int c = 0; c < VDIM; ++c)
3136  {
3137  mass[qy][qx][c] = 0.0;
3138  }
3139  }
3140  }
3141
3142  for (int dy = 0; dy < D1D; ++dy)
3143  {
3145  for (int qx = 0; qx < Q1D; ++qx)
3146  {
3149  }
3150  for (int dx = 0; dx < D1D; ++dx)
3151  {
3152  const double s = X(dx,dy,e);
3153  for (int qx = 0; qx < Q1D; ++qx)
3154  {
3155  gradX[qx][0] += s * Bc(qx,dx);
3156  gradX[qx][1] += s * Gc(qx,dx);
3157  }
3158  }
3159  for (int qy = 0; qy < Q1D; ++qy)
3160  {
3161  const double wy = Bc(qy,dy);
3162  const double wDy = Gc(qy,dy);
3163  for (int qx = 0; qx < Q1D; ++qx)
3164  {
3165  const double wx = gradX[qx][0];
3166  const double wDx = gradX[qx][1];
3167  mass[qy][qx][0] += wDx * wy;
3168  mass[qy][qx][1] += wx * wDy;
3169  }
3170  }
3171  }
3172
3173  // Apply D operator.
3174  for (int qy = 0; qy < Q1D; ++qy)
3175  {
3176  for (int qx = 0; qx < Q1D; ++qx)
3177  {
3178  const double O11 = op(qx,qy,0,e);
3179  const double O12 = op(qx,qy,1,e);
3180  const double O22 = op(qx,qy,2,e);
3181  const double massX = mass[qy][qx][0];
3182  const double massY = mass[qy][qx][1];
3183  mass[qy][qx][0] = (O11*massX)+(O12*massY);
3184  mass[qy][qx][1] = (O12*massX)+(O22*massY);
3185  }
3186  }
3187
3188  for (int qy = 0; qy < Q1D; ++qy)
3189  {
3190  int osc = 0;
3191
3192  for (int c = 0; c < VDIM; ++c) // loop over x, y components
3193  {
3194  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3195  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3196
3197  double massX[MAX_D1D];
3198  for (int dx = 0; dx < D1Dx; ++dx)
3199  {
3200  massX[dx] = 0;
3201  }
3202  for (int qx = 0; qx < Q1D; ++qx)
3203  {
3204  for (int dx = 0; dx < D1Dx; ++dx)
3205  {
3206  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3207  }
3208  }
3209
3210  for (int dy = 0; dy < D1Dy; ++dy)
3211  {
3212  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3213
3214  for (int dx = 0; dx < D1Dx; ++dx)
3215  {
3216  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
3217  }
3218  }
3219
3220  osc += D1Dx * D1Dy;
3221  } // loop c
3222  }
3223  }); // end of element loop
3224 }
3225
3226 // Apply to x corresponding to DOFs in H(curl), integrated
3227 // against gradients of H^1 functions corresponding to y.
3228 void PAHcurlH1ApplyTranspose2D(const int D1D,
3229  const int Q1D,
3230  const int NE,
3231  const Array<double> &bc,
3232  const Array<double> &bo,
3233  const Array<double> &bct,
3234  const Array<double> &gct,
3235  const Vector &pa_data,
3236  const Vector &x,
3237  Vector &y)
3238 {
3239  constexpr static int VDIM = 2;
3240  constexpr static int MAX_D1D = HCURL_MAX_D1D;
3241  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3242
3243  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3244  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3245  auto Bt = Reshape(bct.Read(), D1D, Q1D);
3246  auto Gt = Reshape(gct.Read(), D1D, Q1D);
3247  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3248  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
3249  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
3250
3251  MFEM_FORALL(e, NE,
3252  {
3253  double mass[MAX_Q1D][MAX_Q1D][VDIM];
3254
3255  for (int qy = 0; qy < Q1D; ++qy)
3256  {
3257  for (int qx = 0; qx < Q1D; ++qx)
3258  {
3259  for (int c = 0; c < VDIM; ++c)
3260  {
3261  mass[qy][qx][c] = 0.0;
3262  }
3263  }
3264  }
3265
3266  int osc = 0;
3267
3268  for (int c = 0; c < VDIM; ++c) // loop over x, y components
3269  {
3270  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3271  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3272
3273  for (int dy = 0; dy < D1Dy; ++dy)
3274  {
3275  double massX[MAX_Q1D];
3276  for (int qx = 0; qx < Q1D; ++qx)
3277  {
3278  massX[qx] = 0.0;
3279  }
3280
3281  for (int dx = 0; dx < D1Dx; ++dx)
3282  {
3283  const double t = X(dx + (dy * D1Dx) + osc, e);
3284  for (int qx = 0; qx < Q1D; ++qx)
3285  {
3286  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
3287  }
3288  }
3289
3290  for (int qy = 0; qy < Q1D; ++qy)
3291  {
3292  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
3293  for (int qx = 0; qx < Q1D; ++qx)
3294  {
3295  mass[qy][qx][c] += massX[qx] * wy;
3296  }
3297  }
3298  }
3299
3300  osc += D1Dx * D1Dy;
3301  } // loop (c) over components
3302
3303  // Apply D operator.
3304  for (int qy = 0; qy < Q1D; ++qy)
3305  {
3306  for (int qx = 0; qx < Q1D; ++qx)
3307  {
3308  const double O11 = op(qx,qy,0,e);
3309  const double O12 = op(qx,qy,1,e);
3310  const double O22 = op(qx,qy,2,e);
3311  const double massX = mass[qy][qx][0];
3312  const double massY = mass[qy][qx][1];
3313  mass[qy][qx][0] = (O11*massX)+(O12*massY);
3314  mass[qy][qx][1] = (O12*massX)+(O22*massY);
3315  }
3316  }
3317
3318  for (int qy = 0; qy < Q1D; ++qy)
3319  {
3321  for (int dx = 0; dx < D1D; ++dx)
3322  {
3325  }
3326  for (int qx = 0; qx < Q1D; ++qx)
3327  {
3328  const double gX = mass[qy][qx][0];
3329  const double gY = mass[qy][qx][1];
3330  for (int dx = 0; dx < D1D; ++dx)
3331  {
3332  const double wx = Bt(dx,qx);
3333  const double wDx = Gt(dx,qx);
3334  gradX[dx][0] += gX * wDx;
3335  gradX[dx][1] += gY * wx;
3336  }
3337  }
3338  for (int dy = 0; dy < D1D; ++dy)
3339  {
3340  const double wy = Bt(dy,qy);
3341  const double wDy = Gt(dy,qy);
3342  for (int dx = 0; dx < D1D; ++dx)
3343  {
3345  }
3346  }
3347  }
3348  }); // end of element loop
3349 }
3350
3351 // PA H(curl) Mass Assemble 3D kernel
3352 void PAHcurlL2Setup(const int NQ,
3353  const int coeffDim,
3354  const int NE,
3355  const Array<double> &w,
3356  Vector &coeff,
3357  Vector &op)
3358 {
3360  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
3361  auto y = Reshape(op.Write(), coeffDim, NQ, NE);
3362
3363  MFEM_FORALL(e, NE,
3364  {
3365  for (int q = 0; q < NQ; ++q)
3366  {
3367  for (int c=0; c<coeffDim; ++c)
3368  {
3369  y(c,q,e) = W[q] * C(c,q,e);
3370  }
3371  }
3372  });
3373 }
3374
3375 void MixedScalarCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3376  const FiniteElementSpace &test_fes)
3377 {
3378  // Assumes tensor-product elements
3379  Mesh *mesh = trial_fes.GetMesh();
3380  const FiniteElement *fel = trial_fes.GetFE(0); // In H(curl)
3381  const FiniteElement *eltest = test_fes.GetFE(0); // In scalar space
3382
3383  const VectorTensorFiniteElement *el =
3384  dynamic_cast<const VectorTensorFiniteElement*>(fel);
3385  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
3386
3387  if (el->GetDerivType() != mfem::FiniteElement::CURL)
3388  {
3389  MFEM_ABORT("Unknown kernel.");
3390  }
3391
3392  const IntegrationRule *ir
3393  = IntRule ? IntRule : &MassIntegrator::GetRule(*eltest, *eltest,
3394  *mesh->GetElementTransformation(0));
3395
3396  const int dims = el->GetDim();
3397  MFEM_VERIFY(dims == 2, "");
3398
3399  const int nq = ir->GetNPoints();
3400  dim = mesh->Dimension();
3401  MFEM_VERIFY(dim == 2, "");
3402
3403  ne = test_fes.GetNE();
3406  dofs1D = mapsC->ndof;
3408
3409  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3410
3411  if (el->GetOrder() == eltest->GetOrder())
3412  {
3413  dofs1Dtest = dofs1D;
3414  }
3415  else
3416  {
3417  dofs1Dtest = dofs1D - 1;
3418  }
3419
3420  pa_data.SetSize(nq * ne, Device::GetMemoryType());
3421
3423  CoefficientVector coeff(Q, qs, CoefficientStorage::FULL);
3424
3425  if (dim == 2)
3426  {
3427  PACurlL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
3428  }
3429  else
3430  {
3431  MFEM_ABORT("Unsupported dimension!");
3432  }
3433 }
3434
3435 void MixedScalarCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3436 {
3437  if (dim == 2)
3438  {
3439  PACurlL2Apply2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3440  mapsC->Bt, mapsC->G, pa_data, x, y);
3441  }
3442  else
3443  {
3444  MFEM_ABORT("Unsupported dimension!");
3445  }
3446 }
3447
3449  Vector &y) const
3450 {
3451  if (dim == 2)
3452  {
3453  PACurlL2ApplyTranspose2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3454  mapsC->B, mapsC->Gt, pa_data, x, y);
3455  }
3456  else
3457  {
3458  MFEM_ABORT("Unsupported dimension!");
3459  }
3460 }
3461
3462 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3463  const FiniteElementSpace &test_fes)
3464 {
3465  // Assumes tensor-product elements, with vector test and trial spaces.
3466  Mesh *mesh = trial_fes.GetMesh();
3467  const FiniteElement *trial_fel = trial_fes.GetFE(0);
3468  const FiniteElement *test_fel = test_fes.GetFE(0);
3469
3470  const VectorTensorFiniteElement *trial_el =
3471  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3472  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
3473
3474  const VectorTensorFiniteElement *test_el =
3475  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
3476  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
3477
3478  const IntegrationRule *ir
3479  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
3480  *mesh->GetElementTransformation(0));
3481  const int dims = trial_el->GetDim();
3482  MFEM_VERIFY(dims == 3, "");
3483
3484  const int nq = ir->GetNPoints();
3485  dim = mesh->Dimension();
3486  MFEM_VERIFY(dim == 3, "");
3487
3488  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
3489
3490  ne = trial_fes.GetNE();
3491  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
3496  dofs1D = mapsC->ndof;
3498  dofs1Dtest = mapsCtest->ndof;
3499
3500  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3501
3502  testType = test_el->GetDerivType();
3503  trialType = trial_el->GetDerivType();
3504
3505  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
3506  coeffDim = (DQ ? 3 : 1);
3507
3508  const bool curlSpaces = (testType == mfem::FiniteElement::CURL &&
3509  trialType == mfem::FiniteElement::CURL);
3510
3511  const int ndata = curlSpaces ? (coeffDim == 1 ? 1 : 9) : symmDims;
3512  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
3513
3515  CoefficientVector coeff(qs, CoefficientStorage::FULL);
3516  if (Q) { coeff.Project(*Q); }
3517  else if (DQ) { coeff.Project(*DQ); }
3518  else { coeff.SetConstant(1.0); }
3519
3520  if (testType == mfem::FiniteElement::CURL &&
3521  trialType == mfem::FiniteElement::CURL && dim == 3)
3522  {
3523  if (coeffDim == 1)
3524  {
3525  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
3526  }
3527  else
3528  {
3529  PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(),
3530  geom->J, coeff, pa_data);
3531  }
3532  }
3533  else if (testType == mfem::FiniteElement::DIV &&
3534  trialType == mfem::FiniteElement::CURL && dim == 3 &&
3535  test_fel->GetOrder() == trial_fel->GetOrder())
3536  {
3537  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
3538  pa_data);
3539  }
3540  else
3541  {
3542  MFEM_ABORT("Unknown kernel.");
3543  }
3544 }
3545
3546 // Apply to x corresponding to DOFs in H(curl) (trial), whose curl is
3547 // integrated against H(curl) test functions corresponding to y.
3548 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3549 static void PAHcurlL2Apply3D(const int D1D,
3550  const int Q1D,
3551  const int coeffDim,
3552  const int NE,
3553  const Array<double> &bo,
3554  const Array<double> &bc,
3555  const Array<double> &bot,
3556  const Array<double> &bct,
3557  const Array<double> &gc,
3558  const Vector &pa_data,
3559  const Vector &x,
3560  Vector &y)
3561 {
3562  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3563  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3564  // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
3565  // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
3566  // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
3567  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3568  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3569  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3570
3571  constexpr static int VDIM = 3;
3572
3573  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3574  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3575  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3576  auto Bct = Reshape(bct.Read(), D1D, Q1D);
3577  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3578  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3579  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3580  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3581
3582  MFEM_FORALL(e, NE,
3583  {
3584  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3585  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3586
3587  for (int qz = 0; qz < Q1D; ++qz)
3588  {
3589  for (int qy = 0; qy < Q1D; ++qy)
3590  {
3591  for (int qx = 0; qx < Q1D; ++qx)
3592  {
3593  for (int c = 0; c < VDIM; ++c)
3594  {
3595  curl[qz][qy][qx][c] = 0.0;
3596  }
3597  }
3598  }
3599  }
3600
3601  // We treat x, y, z components separately for optimization specific to each.
3602
3603  int osc = 0;
3604
3605  {
3606  // x component
3607  const int D1Dz = D1D;
3608  const int D1Dy = D1D;
3609  const int D1Dx = D1D - 1;
3610
3611  for (int dz = 0; dz < D1Dz; ++dz)
3612  {
3614  for (int qy = 0; qy < Q1D; ++qy)
3615  {
3616  for (int qx = 0; qx < Q1D; ++qx)
3617  {
3618  for (int d = 0; d < 2; ++d)
3619  {
3621  }
3622  }
3623  }
3624
3625  for (int dy = 0; dy < D1Dy; ++dy)
3626  {
3627  double massX[MAX_Q1D];
3628  for (int qx = 0; qx < Q1D; ++qx)
3629  {
3630  massX[qx] = 0.0;
3631  }
3632
3633  for (int dx = 0; dx < D1Dx; ++dx)
3634  {
3635  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3636  for (int qx = 0; qx < Q1D; ++qx)
3637  {
3638  massX[qx] += t * Bo(qx,dx);
3639  }
3640  }
3641
3642  for (int qy = 0; qy < Q1D; ++qy)
3643  {
3644  const double wy = Bc(qy,dy);
3645  const double wDy = Gc(qy,dy);
3646  for (int qx = 0; qx < Q1D; ++qx)
3647  {
3648  const double wx = massX[qx];
3649  gradXY[qy][qx][0] += wx * wDy;
3650  gradXY[qy][qx][1] += wx * wy;
3651  }
3652  }
3653  }
3654
3655  for (int qz = 0; qz < Q1D; ++qz)
3656  {
3657  const double wz = Bc(qz,dz);
3658  const double wDz = Gc(qz,dz);
3659  for (int qy = 0; qy < Q1D; ++qy)
3660  {
3661  for (int qx = 0; qx < Q1D; ++qx)
3662  {
3663  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3664  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3665  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3666  }
3667  }
3668  }
3669  }
3670
3671  osc += D1Dx * D1Dy * D1Dz;
3672  }
3673
3674  {
3675  // y component
3676  const int D1Dz = D1D;
3677  const int D1Dy = D1D - 1;
3678  const int D1Dx = D1D;
3679
3680  for (int dz = 0; dz < D1Dz; ++dz)
3681  {
3683  for (int qy = 0; qy < Q1D; ++qy)
3684  {
3685  for (int qx = 0; qx < Q1D; ++qx)
3686  {
3687  for (int d = 0; d < 2; ++d)
3688  {
3690  }
3691  }
3692  }
3693
3694  for (int dx = 0; dx < D1Dx; ++dx)
3695  {
3696  double massY[MAX_Q1D];
3697  for (int qy = 0; qy < Q1D; ++qy)
3698  {
3699  massY[qy] = 0.0;
3700  }
3701
3702  for (int dy = 0; dy < D1Dy; ++dy)
3703  {
3704  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3705  for (int qy = 0; qy < Q1D; ++qy)
3706  {
3707  massY[qy] += t * Bo(qy,dy);
3708  }
3709  }
3710
3711  for (int qx = 0; qx < Q1D; ++qx)
3712  {
3713  const double wx = Bc(qx,dx);
3714  const double wDx = Gc(qx,dx);
3715  for (int qy = 0; qy < Q1D; ++qy)
3716  {
3717  const double wy = massY[qy];
3718  gradXY[qy][qx][0] += wDx * wy;
3719  gradXY[qy][qx][1] += wx * wy;
3720  }
3721  }
3722  }
3723
3724  for (int qz = 0; qz < Q1D; ++qz)
3725  {
3726  const double wz = Bc(qz,dz);
3727  const double wDz = Gc(qz,dz);
3728  for (int qy = 0; qy < Q1D; ++qy)
3729  {
3730  for (int qx = 0; qx < Q1D; ++qx)
3731  {
3732  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3733  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3734  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3735  }
3736  }
3737  }
3738  }
3739
3740  osc += D1Dx * D1Dy * D1Dz;
3741  }
3742
3743  {
3744  // z component
3745  const int D1Dz = D1D - 1;
3746  const int D1Dy = D1D;
3747  const int D1Dx = D1D;
3748
3749  for (int dx = 0; dx < D1Dx; ++dx)
3750  {
3752  for (int qz = 0; qz < Q1D; ++qz)
3753  {
3754  for (int qy = 0; qy < Q1D; ++qy)
3755  {
3756  for (int d = 0; d < 2; ++d)
3757  {
3759  }
3760  }
3761  }
3762
3763  for (int dy = 0; dy < D1Dy; ++dy)
3764  {
3765  double massZ[MAX_Q1D];
3766  for (int qz = 0; qz < Q1D; ++qz)
3767  {
3768  massZ[qz] = 0.0;
3769  }
3770
3771  for (int dz = 0; dz < D1Dz; ++dz)
3772  {
3773  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3774  for (int qz = 0; qz < Q1D; ++qz)
3775  {
3776  massZ[qz] += t * Bo(qz,dz);
3777  }
3778  }
3779
3780  for (int qy = 0; qy < Q1D; ++qy)
3781  {
3782  const double wy = Bc(qy,dy);
3783  const double wDy = Gc(qy,dy);
3784  for (int qz = 0; qz < Q1D; ++qz)
3785  {
3786  const double wz = massZ[qz];
3787  gradYZ[qz][qy][0] += wz * wy;
3788  gradYZ[qz][qy][1] += wz * wDy;
3789  }
3790  }
3791  }
3792
3793  for (int qx = 0; qx < Q1D; ++qx)
3794  {
3795  const double wx = Bc(qx,dx);
3796  const double wDx = Gc(qx,dx);
3797
3798  for (int qy = 0; qy < Q1D; ++qy)
3799  {
3800  for (int qz = 0; qz < Q1D; ++qz)
3801  {
3802  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3803  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3804  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3805  }
3806  }
3807  }
3808  }
3809  }
3810
3811  // Apply D operator.
3812  for (int qz = 0; qz < Q1D; ++qz)
3813  {
3814  for (int qy = 0; qy < Q1D; ++qy)
3815  {
3816  for (int qx = 0; qx < Q1D; ++qx)
3817  {
3818  const double O11 = op(0,qx,qy,qz,e);
3819  if (coeffDim == 1)
3820  {
3821  for (int c = 0; c < VDIM; ++c)
3822  {
3823  curl[qz][qy][qx][c] *= O11;
3824  }
3825  }
3826  else
3827  {
3828  const double O21 = op(1,qx,qy,qz,e);
3829  const double O31 = op(2,qx,qy,qz,e);
3830  const double O12 = op(3,qx,qy,qz,e);
3831  const double O22 = op(4,qx,qy,qz,e);
3832  const double O32 = op(5,qx,qy,qz,e);
3833  const double O13 = op(6,qx,qy,qz,e);
3834  const double O23 = op(7,qx,qy,qz,e);
3835  const double O33 = op(8,qx,qy,qz,e);
3836  const double curlX = curl[qz][qy][qx][0];
3837  const double curlY = curl[qz][qy][qx][1];
3838  const double curlZ = curl[qz][qy][qx][2];
3839  curl[qz][qy][qx][0] = (O11*curlX)+(O12*curlY)+(O13*curlZ);
3840  curl[qz][qy][qx][1] = (O21*curlX)+(O22*curlY)+(O23*curlZ);
3841  curl[qz][qy][qx][2] = (O31*curlX)+(O32*curlY)+(O33*curlZ);
3842  }
3843  }
3844  }
3845  }
3846
3847  for (int qz = 0; qz < Q1D; ++qz)
3848  {
3849  double massXY[MAX_D1D][MAX_D1D];
3850
3851  osc = 0;
3852
3853  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3854  {
3855  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3856  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3857  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3858
3859  for (int dy = 0; dy < D1Dy; ++dy)
3860  {
3861  for (int dx = 0; dx < D1Dx; ++dx)
3862  {
3863  massXY[dy][dx] = 0;
3864  }
3865  }
3866  for (int qy = 0; qy < Q1D; ++qy)
3867  {
3868  double massX[MAX_D1D];
3869  for (int dx = 0; dx < D1Dx; ++dx)
3870  {
3871  massX[dx] = 0.0;
3872  }
3873  for (int qx = 0; qx < Q1D; ++qx)
3874  {
3875  for (int dx = 0; dx < D1Dx; ++dx)
3876  {
3877  massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3878  }
3879  }
3880
3881  for (int dy = 0; dy < D1Dy; ++dy)
3882  {
3883  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3884  for (int dx = 0; dx < D1Dx; ++dx)
3885  {
3886  massXY[dy][dx] += massX[dx] * wy;
3887  }
3888  }
3889  }
3890
3891  for (int dz = 0; dz < D1Dz; ++dz)
3892  {
3893  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
3894  for (int dy = 0; dy < D1Dy; ++dy)
3895  {
3896  for (int dx = 0; dx < D1Dx; ++dx)
3897  {
3898  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
3899  }
3900  }
3901  }
3902
3903  osc += D1Dx * D1Dy * D1Dz;
3904  } // loop c
3905  } // loop qz
3906  }); // end of element loop
3907 }
3908
3909 // Apply to x corresponding to DOFs in H(curl) (trial), whose curl is
3910 // integrated against H(curl) test functions corresponding to y.
3911 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3912 static void SmemPAHcurlL2Apply3D(const int D1D,
3913  const int Q1D,
3914  const int coeffDim,
3915  const int NE,
3916  const Array<double> &bo,
3917  const Array<double> &bc,
3918  const Array<double> &gc,
3919  const Vector &pa_data,
3920  const Vector &x,
3921  Vector &y)
3922 {
3923  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3924  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3925
3926  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3927  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3928  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3929  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3930  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3931  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3932
3933  auto device_kernel = [=] MFEM_DEVICE (int e)
3934  {
3935  constexpr int VDIM = 3;
3936  constexpr int maxCoeffDim = 9;
3937
3938  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
3939  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
3940  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
3941
3942  double opc[maxCoeffDim];
3943  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
3944  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
3945
3946  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
3947
3949  {
3951  {
3953  {
3954  for (int i=0; i<coeffDim; ++i)
3955  {
3956  opc[i] = op(i,qx,qy,qz,e);
3957  }
3958  }
3959  }
3960  }
3961
3962  const int tidx = MFEM_THREAD_ID(x);
3963  const int tidy = MFEM_THREAD_ID(y);
3964  const int tidz = MFEM_THREAD_ID(z);
3965
3966  if (tidz == 0)
3967  {
3969  {
3971  {
3972  sBc[d][q] = Bc(q,d);
3973  sGc[d][q] = Gc(q,d);
3974  if (d < D1D-1)
3975  {
3976  sBo[d][q] = Bo(q,d);
3977  }
3978  }
3979  }
3980  }
3982
3983  for (int qz=0; qz < Q1D; ++qz)
3984  {
3985  if (tidz == qz)
3986  {
3988  {
3990  {
3991  for (int i=0; i<3; ++i)
3992  {
3993  curl[qy][qx][i] = 0.0;
3994  }
3995  }
3996  }
3997  }
3998
3999  int osc = 0;
4000  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4001  {
4002  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4003  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4004  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4005
4007  {
4009  {
4011  {
4012  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4013  }
4014  }
4015  }
4017
4018  if (tidz == qz)
4019  {
4020  if (c == 0)
4021  {
4022  for (int i=0; i<coeffDim; ++i)
4023  {
4024  sop[i][tidx][tidy] = opc[i];
4025  }
4026  }
4027
4029  {
4031  {
4032  double u = 0.0;
4033  double v = 0.0;
4034
4035  // We treat x, y, z components separately for optimization specific to each.
4036  if (c == 0) // x component
4037  {
4038  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4039
4040  for (int dz = 0; dz < D1Dz; ++dz)
4041  {
4042  const double wz = sBc[dz][qz];
4043  const double wDz = sGc[dz][qz];
4044
4045  for (int dy = 0; dy < D1Dy; ++dy)
4046  {
4047  const double wy = sBc[dy][qy];
4048  const double wDy = sGc[dy][qy];
4049
4050  for (int dx = 0; dx < D1Dx; ++dx)
4051  {
4052  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
4053  u += wx * wDy * wz;
4054  v += wx * wy * wDz;
4055  }
4056  }
4057  }
4058
4059  curl[qy][qx][1] += v; // (u_0)_{x_2}
4060  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
4061  }
4062  else if (c == 1) // y component
4063  {
4064  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4065
4066  for (int dz = 0; dz < D1Dz; ++dz)
4067  {
4068  const double wz = sBc[dz][qz];
4069  const double wDz = sGc[dz][qz];
4070
4071  for (int dy = 0; dy < D1Dy; ++dy)
4072  {
4073  const double wy = sBo[dy][qy];
4074
4075  for (int dx = 0; dx < D1Dx; ++dx)
4076  {
4077  const double t = sX[dz][dy][dx];
4078  const double wx = t * sBc[dx][qx];
4079  const double wDx = t * sGc[dx][qx];
4080
4081  u += wDx * wy * wz;
4082  v += wx * wy * wDz;
4083  }
4084  }
4085  }
4086
4087  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
4088  curl[qy][qx][2] += u; // (u_1)_{x_0}
4089  }
4090  else // z component
4091  {
4092  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4093
4094  for (int dz = 0; dz < D1Dz; ++dz)
4095  {
4096  const double wz = sBo[dz][qz];
4097
4098  for (int dy = 0; dy < D1Dy; ++dy)
4099  {
4100  const double wy = sBc[dy][qy];
4101  const double wDy = sGc[dy][qy];
4102
4103  for (int dx = 0; dx < D1Dx; ++dx)
4104  {
4105  const double t = sX[dz][dy][dx];
4106  const double wx = t * sBc[dx][qx];
4107  const double wDx = t * sGc[dx][qx];
4108
4109  u += wDx * wy * wz;
4110  v += wx * wDy * wz;
4111  }
4112  }
4113  }
4114
4115  curl[qy][qx][0] += v; // (u_2)_{x_1}
4116  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
4117  }
4118  } // qx
4119  } // qy
4120  } // tidz == qz
4121
4122  osc += D1Dx * D1Dy * D1Dz;
4124  } // c
4125
4126  double dxyz1 = 0.0;
4127  double dxyz2 = 0.0;
4128  double dxyz3 = 0.0;
4129
4131  {
4132  const double wcz = sBc[dz][qz];
4133  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4134
4136  {
4138  {
4139  for (int qy = 0; qy < Q1D; ++qy)
4140  {
4141  const double wcy = sBc[dy][qy];
4142  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4143
4144  for (int qx = 0; qx < Q1D; ++qx)
4145  {
4146  const double O11 = sop[0][qx][qy];
4147  double c1, c2, c3;
4148  if (coeffDim == 1)
4149  {
4150  c1 = O11 * curl[qy][qx][0];
4151  c2 = O11 * curl[qy][qx][1];
4152  c3 = O11 * curl[qy][qx][2];
4153  }
4154  else
4155  {
4156  const double O21 = sop[1][qx][qy];
4157  const double O31 = sop[2][qx][qy];
4158  const double O12 = sop[3][qx][qy];
4159  const double O22 = sop[4][qx][qy];
4160  const double O32 = sop[5][qx][qy];
4161  const double O13 = sop[6][qx][qy];
4162  const double O23 = sop[7][qx][qy];
4163  const double O33 = sop[8][qx][qy];
4164  c1 = (O11*curl[qy][qx][0])+(O12*curl[qy][qx][1])+(O13*curl[qy][qx][2]);
4165  c2 = (O21*curl[qy][qx][0])+(O22*curl[qy][qx][1])+(O23*curl[qy][qx][2]);
4166  c3 = (O31*curl[qy][qx][0])+(O32*curl[qy][qx][1])+(O33*curl[qy][qx][2]);
4167  }
4168
4169  const double wcx = sBc[dx][qx];
4170
4171  if (dx < D1D-1)
4172  {
4173  const double wx = sBo[dx][qx];
4174  dxyz1 += c1 * wx * wcy * wcz;
4175  }
4176
4177  dxyz2 += c2 * wcx * wy * wcz;
4178  dxyz3 += c3 * wcx * wcy * wz;
4179  } // qx
4180  } // qy
4181  } // dx
4182  } // dy
4183  } // dz
4184
4186
4188  {
4190  {
4192  {
4193  if (dx < D1D-1)
4194  {
4195  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4196  }
4197  if (dy < D1D-1)
4198  {
4199  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4200  }
4201  if (dz < D1D-1)
4202  {
4203  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4204  }
4205  }
4206  }
4207  }
4208  } // qz
4209  }; // end of element loop
4210
4211  auto host_kernel = [&] MFEM_LAMBDA (int)
4212  {
4213  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
4214  };
4215
4216  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
4217 }
4218
4219 // Apply to x corresponding to DOFs in H(curl) (trial), whose curl is
4220 // integrated against H(div) test functions corresponding to y.
4221 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4222 static void PAHcurlHdivApply3D(const int D1D,
4223  const int D1Dtest,
4224  const int Q1D,
4225  const int NE,
4226  const Array<double> &bo,
4227  const Array<double> &bc,
4228  const Array<double> &bot,
4229  const Array<double> &bct,
4230  const Array<double> &gc,
4231  const Vector &pa_data,
4232  const Vector &x,
4233  Vector &y)
4234 {
4235  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4236  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4237  // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
4238  // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
4239  // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
4240  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4241  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4242  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4243
4244  constexpr static int VDIM = 3;
4245
4246  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4247  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4248  auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
4249  auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
4250  auto Gc = Reshape(gc.Read(), Q1D, D1D);
4251  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
4252  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4253  auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
4254
4255  MFEM_FORALL(e, NE,
4256  {
4257  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4258  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
4259
4260  for (int qz = 0; qz < Q1D; ++qz)
4261  {
4262  for (int qy = 0; qy < Q1D; ++qy)
4263  {
4264  for (int qx = 0; qx < Q1D; ++qx)
4265  {
4266  for (int c = 0; c < VDIM; ++c)
4267  {
4268  curl[qz][qy][qx][c] = 0.0;
4269  }
4270  }
4271  }
4272  }
4273
4274  // We treat x, y, z components separately for optimization specific to each.
4275
4276  int osc = 0;
4277
4278  {
4279  // x component
4280  const int D1Dz = D1D;
4281  const int D1Dy = D1D;
4282  const int D1Dx = D1D - 1;
4283
4284  for (int dz = 0; dz < D1Dz; ++dz)
4285  {
4287  for (int qy = 0; qy < Q1D; ++qy)
4288  {
4289  for (int qx = 0; qx < Q1D; ++qx)
4290  {
4291  for (int d = 0; d < 2; ++d)
4292  {
4294  }
4295  }
4296  }
4297
4298  for (int dy = 0; dy < D1Dy; ++dy)
4299  {
4300  double massX[MAX_Q1D];
4301  for (int qx = 0; qx < Q1D; ++qx)
4302  {
4303  massX[qx] = 0.0;
4304  }
4305
4306  for (int dx = 0; dx < D1Dx; ++dx)
4307  {
4308  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4309  for (int qx = 0; qx < Q1D; ++qx)
4310  {
4311  massX[qx] += t * Bo(qx,dx);
4312  }
4313  }
4314
4315  for (int qy = 0; qy < Q1D; ++qy)
4316  {
4317  const double wy = Bc(qy,dy);
4318  const double wDy = Gc(qy,dy);
4319  for (int qx = 0; qx < Q1D; ++qx)
4320  {
4321  const double wx = massX[qx];
4322  gradXY[qy][qx][0] += wx * wDy;
4323  gradXY[qy][qx][1] += wx * wy;
4324  }
4325  }
4326  }
4327
4328  for (int qz = 0; qz < Q1D; ++qz)
4329  {
4330  const double wz = Bc(qz,dz);
4331  const double wDz = Gc(qz,dz);
4332  for (int qy = 0; qy < Q1D; ++qy)
4333  {
4334  for (int qx = 0; qx < Q1D; ++qx)
4335  {
4336  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4337  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
4338  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
4339  }
4340  }
4341  }
4342  }
4343
4344  osc += D1Dx * D1Dy * D1Dz;
4345  }
4346
4347  {
4348  // y component
4349  const int D1Dz = D1D;
4350  const int D1Dy = D1D - 1;
4351  const int D1Dx = D1D;
4352
4353  for (int dz = 0; dz < D1Dz; ++dz)
4354  {
4356  for (int qy = 0; qy < Q1D; ++qy)
4357  {
4358  for (int qx = 0; qx < Q1D; ++qx)
4359  {
4360  for (int d = 0; d < 2; ++d)
4361  {
4363  }
4364  }
4365  }
4366
4367  for (int dx = 0; dx < D1Dx; ++dx)
4368  {
4369  double massY[MAX_Q1D];
4370  for (int qy = 0; qy < Q1D; ++qy)
4371  {
4372  massY[qy] = 0.0;
4373  }
4374
4375  for (int dy = 0; dy < D1Dy; ++dy)
4376  {
4377  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4378  for (int qy = 0; qy < Q1D; ++qy)
4379  {
4380  massY[qy] += t * Bo(qy,dy);
4381  }
4382  }
4383
4384  for (int qx = 0; qx < Q1D; ++qx)
4385  {
4386  const double wx = Bc(qx,dx);
4387  const double wDx = Gc(qx,dx);
4388  for (int qy = 0; qy < Q1D; ++qy)
4389  {
4390  const double wy = massY[qy];
4391  gradXY[qy][qx][0] += wDx * wy;
4392  gradXY[qy][qx][1] += wx * wy;
4393  }
4394  }
4395  }
4396
4397  for (int qz = 0; qz < Q1D; ++qz)
4398  {
4399  const double wz = Bc(qz,dz);
4400  const double wDz = Gc(qz,dz);
4401  for (int qy = 0; qy < Q1D; ++qy)
4402  {
4403  for (int qx = 0; qx < Q1D; ++qx)
4404  {
4405  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4406  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
4407  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
4408  }
4409  }
4410  }
4411  }
4412
4413  osc += D1Dx * D1Dy * D1Dz;
4414  }
4415
4416  {
4417  // z component
4418  const int D1Dz = D1D - 1;
4419  const int D1Dy = D1D;
4420  const int D1Dx = D1D;
4421
4422  for (int dx = 0; dx < D1Dx; ++dx)
4423  {
4425  for (int qz = 0; qz < Q1D; ++qz)
4426  {
4427  for (int qy = 0; qy < Q1D; ++qy)
4428  {
4429  for (int d = 0; d < 2; ++d)
4430  {
4432  }
4433  }
4434  }
4435
4436  for (int dy = 0; dy < D1Dy; ++dy)
4437  {
4438  double massZ[MAX_Q1D];
4439  for (int qz = 0; qz < Q1D; ++qz)
4440  {
4441  massZ[qz] = 0.0;
4442  }
4443
4444  for (int dz = 0; dz < D1Dz; ++dz)
4445  {
4446  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4447  for (int qz = 0; qz < Q1D; ++qz)
4448  {
4449  massZ[qz] += t * Bo(qz,dz);
4450  }
4451  }
4452
4453  for (int qy = 0; qy < Q1D; ++qy)
4454  {
4455  const double wy = Bc(qy,dy);
4456  const double wDy = Gc(qy,dy);
4457  for (int qz = 0; qz < Q1D; ++qz)
4458  {
4459  const double wz = massZ[qz];
4460  gradYZ[qz][qy][0] += wz * wy;
4461  gradYZ[qz][qy][1] += wz * wDy;
4462  }
4463  }
4464  }
4465
4466  for (int qx = 0; qx < Q1D; ++qx)
4467  {
4468  const double wx = Bc(qx,dx);
4469  const double wDx = Gc(qx,dx);
4470
4471  for (int qy = 0; qy < Q1D; ++qy)
4472  {
4473  for (int qz = 0; qz < Q1D; ++qz)
4474  {
4475  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4476  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
4477  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
4478  }
4479  }
4480  }
4481  }
4482  }
4483
4484  // Apply D operator.
4485  for (int qz = 0; qz < Q1D; ++qz)
4486  {
4487  for (int qy = 0; qy < Q1D; ++qy)
4488  {
4489  for (int qx = 0; qx < Q1D; ++qx)
4490  {
4491  const double O11 = op(qx,qy,qz,0,e);
4492  const double O12 = op(qx,qy,qz,1,e);
4493  const double O13 = op(qx,qy,qz,2,e);
4494  const double O22 = op(qx,qy,qz,3,e);
4495  const double O23 = op(qx,qy,qz,4,e);
4496  const double O33 = op(qx,qy,qz,5,e);
4497
4498  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
4499  (O13 * curl[qz][qy][qx][2]);
4500  const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
4501  (O23 * curl[qz][qy][qx][2]);
4502  const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
4503  (O33 * curl[qz][qy][qx][2]);
4504
4505  curl[qz][qy][qx][0] = c1;
4506  curl[qz][qy][qx][1] = c2;
4507  curl[qz][qy][qx][2] = c3;
4508  }
4509  }
4510  }
4511
4512  for (int qz = 0; qz < Q1D; ++qz)
4513  {
4514  double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
4515
4516  osc = 0;
4517
4518  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4519  {
4520  const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
4521  const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
4522  const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
4523
4524  for (int dy = 0; dy < D1Dy; ++dy)
4525  {
4526  for (int dx = 0; dx < D1Dx; ++dx)
4527  {
4528  massXY[dy][dx] = 0;
4529  }
4530  }
4531  for (int qy = 0; qy < Q1D; ++qy)
4532  {
4533  double massX[HCURL_MAX_D1D];
4534  for (int dx = 0; dx < D1Dx; ++dx)
4535  {
4536  massX[dx] = 0;
4537  }
4538  for (int qx = 0; qx < Q1D; ++qx)
4539  {
4540  for (int dx = 0; dx < D1Dx; ++dx)
4541  {
4542  massX[dx] += curl[qz][qy][qx][c] *
4543  ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
4544  }
4545  }
4546  for (int dy = 0; dy < D1Dy; ++dy)
4547  {
4548  const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
4549  for (int dx = 0; dx < D1Dx; ++dx)
4550  {
4551  massXY[dy][dx] += massX[dx] * wy;
4552  }
4553  }
4554  }
4555
4556  for (int dz = 0; dz < D1Dz; ++dz)
4557  {
4558  const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
4559  for (int dy = 0; dy < D1Dy; ++dy)
4560  {
4561  for (int dx = 0; dx < D1Dx; ++dx)
4562  {
4563  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
4564  massXY[dy][dx] * wz;
4565  }
4566  }
4567  }
4568
4569  osc += D1Dx * D1Dy * D1Dz;
4570  } // loop c
4571  } // loop qz
4572  }); // end of element loop
4573 }
4574
4575 // Apply to x corresponding to DOFs in H(div) (test), integrated against the
4576 // curl of H(curl) trial functions corresponding to y.
4577 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4578 static void PAHcurlHdivApply3DTranspose(const int D1D,
4579  const int D1Dtest,
4580  const int Q1D,
4581  const int NE,
4582  const Array<double> &bo,
4583  const Array<double> &bc,
4584  const Array<double> &bot,
4585  const Array<double> &bct,
4586  const Array<double> &gct,
4587  const Vector &pa_data,
4588  const Vector &x,
4589  Vector &y)
4590 {
4591  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4592  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4593  // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
4594  // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
4595  // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
4596  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4597  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4598  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4599
4600  constexpr static int VDIM = 3;
4601
4602  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4603  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4604  auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
4605  auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
4606  auto Gct = Reshape(gct.Read(), D1D, Q1D);
4607  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
4608  auto X = Reshape(x.Read(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
4609  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4610
4611  MFEM_FORALL(e, NE,
4612  {
4613  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
4614
4615  for (int qz = 0; qz < Q1D; ++qz)
4616  {
4617  for (int qy = 0; qy < Q1D; ++qy)
4618  {
4619  for (int qx = 0; qx < Q1D; ++qx)
4620  {
4621  for (int c = 0; c < VDIM; ++c)
4622  {
4623  mass[qz][qy][qx][c] = 0.0;
4624  }
4625  }
4626  }
4627  }
4628
4629  int osc = 0;
4630
4631  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4632  {
4633  const int D1Dz = (c == 2) ? D1D : D1D - 1;
4634  const int D1Dy = (c == 1) ? D1D : D1D - 1;
4635  const int D1Dx = (c == 0) ? D1D : D1D - 1;
4636
4637  for (int dz = 0; dz < D1Dz; ++dz)
4638  {
4639  double massXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
4640  for (int qy = 0; qy < Q1D; ++qy)
4641  {
4642  for (int qx = 0; qx < Q1D; ++qx)
4643  {
4644  massXY[qy][qx] = 0.0;
4645  }
4646  }
4647
4648  for (int dy = 0; dy < D1Dy; ++dy)
4649  {
4650  double massX[HDIV_MAX_Q1D];
4651  for (int qx = 0; qx < Q1D; ++qx)
4652  {
4653  massX[qx] = 0.0;
4654  }
4655
4656  for (int dx = 0; dx < D1Dx; ++dx)
4657  {
4658  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4659  for (int qx = 0; qx < Q1D; ++qx)
4660  {
4661  massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
4662  }
4663  }
4664
4665  for (int qy = 0; qy < Q1D; ++qy)
4666  {
4667  const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
4668  for (int qx = 0; qx < Q1D; ++qx)
4669  {
4670  const double wx = massX[qx];
4671  massXY[qy][qx] += wx * wy;
4672  }
4673  }
4674  }
4675
4676  for (int qz = 0; qz < Q1D; ++qz)
4677  {
4678  const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
4679  for (int qy = 0; qy < Q1D; ++qy)
4680  {
4681  for (int qx = 0; qx < Q1D; ++qx)
4682  {
4683  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4684  }
4685  }
4686  }
4687  }
4688
4689  osc += D1Dx * D1Dy * D1Dz;
4690  } // loop (c) over components
4691
4692  // Apply D operator.
4693  for (int qz = 0; qz < Q1D; ++qz)
4694  {
4695  for (int qy = 0; qy < Q1D; ++qy)
4696  {
4697  for (int qx = 0; qx < Q1D; ++qx)
4698  {
4699  const double O11 = op(qx,qy,qz,0,e);
4700  const double O12 = op(qx,qy,qz,1,e);
4701  const double O13 = op(qx,qy,qz,2,e);
4702  const double O22 = op(qx,qy,qz,3,e);
4703  const double O23 = op(qx,qy,qz,4,e);
4704  const double O33 = op(qx,qy,qz,5,e);
4705  const double massX = mass[qz][qy][qx][0];
4706  const double massY = mass[qz][qy][qx][1];
4707  const double massZ = mass[qz][qy][qx][2];
4708  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
4709  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
4710  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
4711  }
4712  }
4713  }
4714
4715  // x component
4716  osc = 0;
4717  {
4718  const int D1Dz = D1D;
4719  const int D1Dy = D1D;
4720  const int D1Dx = D1D - 1;
4721
4722  for (int qz = 0; qz < Q1D; ++qz)
4723  {
4726
4727  for (int dy = 0; dy < D1Dy; ++dy)
4728  {
4729  for (int dx = 0; dx < D1Dx; ++dx)
4730  {
4733  }
4734  }
4735  for (int qy = 0; qy < Q1D; ++qy)
4736  {
4737  double massX[MAX_D1D][2];
4738  for (int dx = 0; dx < D1Dx; ++dx)
4739  {
4740  for (int n = 0; n < 2; ++n)
4741  {
4742  massX[dx][n] = 0.0;
4743  }
4744  }
4745  for (int qx = 0; qx < Q1D; ++qx)
4746  {
4747  for (int dx = 0; dx < D1Dx; ++dx)
4748  {
4749  const double wx = Bot(dx,qx);
4750
4751  massX[dx][0] += wx * mass[qz][qy][qx][1];
4752  massX[dx][1] += wx * mass[qz][qy][qx][2];
4753  }
4754  }
4755  for (int dy = 0; dy < D1Dy; ++dy)
4756  {
4757  const double wy = Bct(dy,qy);
4758  const double wDy = Gct(dy,qy);
4759
4760  for (int dx = 0; dx < D1Dx; ++dx)
4761  {
4762  gradXY21[dy][dx] += massX[dx][0] * wy;
4763  gradXY12[dy][dx] += massX[dx][1] * wDy;
4764  }
4765  }
4766  }
4767
4768  for (int dz = 0; dz < D1Dz; ++dz)
4769  {
4770  const double wz = Bct(dz,qz);
4771  const double wDz = Gct(dz,qz);
4772  for (int dy = 0; dy < D1Dy; ++dy)
4773  {
4774  for (int dx = 0; dx < D1Dx; ++dx)
4775  {
4776  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4777  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
4778  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4780  }
4781  }
4782  }
4783  } // loop qz
4784
4785  osc += D1Dx * D1Dy * D1Dz;
4786  }
4787
4788  // y component
4789  {
4790  const int D1Dz = D1D;
4791  const int D1Dy = D1D - 1;
4792  const int D1Dx = D1D;
4793
4794  for (int qz = 0; qz < Q1D; ++qz)
4795  {
4798
4799  for (int dy = 0; dy < D1Dy; ++dy)
4800  {
4801  for (int dx = 0; dx < D1Dx; ++dx)
4802  {
4805  }
4806  }
4807  for (int qx = 0; qx < Q1D; ++qx)
4808  {
4809  double massY[MAX_D1D][2];
4810  for (int dy = 0; dy < D1Dy; ++dy)
4811  {
4812  massY[dy][0] = 0.0;
4813  massY[dy][1] = 0.0;
4814  }
4815  for (int qy = 0; qy < Q1D; ++qy)
4816  {
4817  for (int dy = 0; dy < D1Dy; ++dy)
4818  {
4819  const double wy = Bot(dy,qy);
4820
4821  massY[dy][0] += wy * mass[qz][qy][qx][2];
4822  massY[dy][1] += wy * mass[qz][qy][qx][0];
4823  }
4824  }
4825  for (int dx = 0; dx < D1Dx; ++dx)
4826  {
4827  const double wx = Bct(dx,qx);
4828  const double wDx = Gct(dx,qx);
4829
4830  for (int dy = 0; dy < D1Dy; ++dy)
4831  {
4832  gradXY02[dy][dx] += massY[dy][0] * wDx;
4833  gradXY20[dy][dx] += massY[dy][1] * wx;
4834  }
4835  }
4836  }
4837
4838  for (int dz = 0; dz < D1Dz; ++dz)
4839  {
4840  const double wz = Bct(dz,qz);
4841  const double wDz = Gct(dz,qz);
4842  for (int dy = 0; dy < D1Dy; ++dy)
4843  {
4844  for (int dx = 0; dx < D1Dx; ++dx)
4845  {
4846  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4847  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
4848  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4850  }
4851  }
4852  }
4853  } // loop qz
4854
4855  osc += D1Dx * D1Dy * D1Dz;
4856  }
4857
4858  // z component
4859  {
4860  const int D1Dz = D1D - 1;
4861  const int D1Dy = D1D;
4862  const int D1Dx = D1D;
4863
4864  for (int qx = 0; qx < Q1D; ++qx)
4865  {
4868
4869  for (int dy = 0; dy < D1Dy; ++dy)
4870  {
4871  for (int dz = 0; dz < D1Dz; ++dz)
4872  {
4875  }
4876  }
4877  for (int qy = 0; qy < Q1D; ++qy)
4878  {
4879  double massZ[MAX_D1D][2];
4880  for (int dz = 0; dz < D1Dz; ++dz)
4881  {
4882  for (int n = 0; n < 2; ++n)
4883  {
4884  massZ[dz][n] = 0.0;
4885  }
4886  }
4887  for (int qz = 0; qz < Q1D; ++qz)
4888  {
4889  for (int dz = 0; dz < D1Dz; ++dz)
4890  {
4891  const double wz = Bot(dz,qz);
4892
4893  massZ[dz][0] += wz * mass[qz][qy][qx][0];
4894  massZ[dz][1] += wz * mass[qz][qy][qx][1];
4895  }
4896  }
4897  for (int dy = 0; dy < D1Dy; ++dy)
4898  {
4899  const double wy = Bct(dy,qy);
4900  const double wDy = Gct(dy,qy);
4901
4902  for (int dz = 0; dz < D1Dz; ++dz)
4903  {
4904  gradYZ01[dz][dy] += wy * massZ[dz][1];
4905  gradYZ10[dz][dy] += wDy * massZ[dz][0];
4906  }
4907  }
4908  }
4909
4910  for (int dx = 0; dx < D1Dx; ++dx)
4911  {
4912  const double wx = Bct(dx,qx);
4913  const double wDx = Gct(dx,qx);
4914
4915  for (int dy = 0; dy < D1Dy; ++dy)
4916  {
4917  for (int dz = 0; dz < D1Dz; ++dz)
4918  {
4919  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4920  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
4921  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4923  }
4924  }
4925  }
4926  } // loop qx
4927  }
4928  }); // end of element loop
4929 }
4930
4931 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4932 {
4933  if (testType == mfem::FiniteElement::CURL &&
4934  trialType == mfem::FiniteElement::CURL && dim == 3)
4935  {
4936  const int ndata = coeffDim == 1 ? 1 : 9;
4937
4939  {
4940  const int ID = (dofs1D << 4) | quad1D;
4941  switch (ID)
4942  {
4943  case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, ndata, ne,
4944  mapsO->B, mapsC->B,
4945  mapsC->G, pa_data, x, y);
4946  case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, ndata, ne,
4947  mapsO->B, mapsC->B,
4948  mapsC->G, pa_data, x, y);
4949  case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, ndata, ne,
4950  mapsO->B, mapsC->B,
4951  mapsC->G, pa_data, x, y);
4952  case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, ndata, ne,
4953  mapsO->B, mapsC->B,
4954  mapsC->G, pa_data, x, y);
4955  default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne,
4956  mapsO->B, mapsC->B, mapsC->G,
4957  pa_data, x, y);
4958  }
4959  }
4960  else
4961  PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B,
4962  mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
4963  }
4964  else if (testType == mfem::FiniteElement::DIV &&
4965  trialType == mfem::FiniteElement::CURL && dim == 3)
4966  PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
4967  mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
4968  pa_data, x, y);
4969  else
4970  {
4971  MFEM_ABORT("Unsupported dimension or space!");
4972  }
4973 }
4974
4976  Vector &y) const
4977 {
4978  if (testType == mfem::FiniteElement::DIV &&
4979  trialType == mfem::FiniteElement::CURL && dim == 3)
4980  PAHcurlHdivApply3DTranspose(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
4981  mapsC->B, mapsOtest->Bt, mapsCtest->Bt,
4982  mapsC->Gt, pa_data, x, y);
4983  else
4984  {
4985  MFEM_ABORT("Unsupported dimension or space!");
4986  }
4987 }
4988
4989 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
4990  &trial_fes,
4991  const FiniteElementSpace &test_fes)
4992 {
4993  // Assumes tensor-product elements, with vector test and trial spaces.
4994  Mesh *mesh = trial_fes.GetMesh();
4995  const FiniteElement *trial_fel = trial_fes.GetFE(0);
4996  const FiniteElement *test_fel = test_fes.GetFE(0);
4997
4998  const VectorTensorFiniteElement *trial_el =
4999  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
5000  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
5001
5002  const VectorTensorFiniteElement *test_el =
5003  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
5004  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
5005
5006  const IntegrationRule *ir
5007  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
5008  *mesh->GetElementTransformation(0));
5009  const int dims = trial_el->GetDim();
5010  MFEM_VERIFY(dims == 3, "");
5011
5012  const int nq = ir->GetNPoints();
5013  dim = mesh->Dimension();
5014  MFEM_VERIFY(dim == 3, "");
5015
5016  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
5017
5018  ne = trial_fes.GetNE();
5019  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
5022  dofs1D = mapsC->ndof;
5024
5025  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
5026
5027  testType = test_el->GetDerivType();
5028  trialType = trial_el->GetDerivType();
5029
5030  const bool curlSpaces = (testType == mfem::FiniteElement::CURL &&
5031  trialType == mfem::FiniteElement::CURL);
5032
5033  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
5034
5035  coeffDim = DQ ? 3 : 1;
5036  const int ndata = curlSpaces ? (DQ ? 9 : 1) : symmDims;
5037
5038  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
5039
5041  CoefficientVector coeff(qs, CoefficientStorage::FULL);
5042  if (Q) { coeff.Project(*Q); }
5043  else if (DQ) { coeff.Project(*DQ); }
5044  else { coeff.SetConstant(1.0); }
5045
5046  if (trialType == mfem::FiniteElement::CURL && dim == 3)
5047  {
5048  if (coeffDim == 1)
5049  {
5050  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
5051  }
5052  else
5053  {
5054  PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(),
5055  geom->J, coeff, pa_data);
5056  }
5057  }
5058  else if (trialType == mfem::FiniteElement::DIV && dim == 3 &&
5059  test_el->GetOrder() == trial_el->GetOrder())
5060  {
5061  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
5062  pa_data);
5063  }
5064  else
5065  {
5066  MFEM_ABORT("Unknown kernel.");
5067  }
5068 }
5069
5070 // Apply to x corresponding to DOFs in H(curl) (trial), integrated against curl
5071 // of H(curl) test functions corresponding to y.
5072 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
5073 static void PAHcurlL2Apply3DTranspose(const int D1D,
5074  const int Q1D,
5075  const int coeffDim,
5076  const int NE,
5077  const Array<double> &bo,
5078  const Array<double> &bc,
5079  const Array<double> &bot,
5080  const Array<double> &bct,
5081  const Array<double> &gct,
5082  const Vector &pa_data,
5083  const Vector &x,
5084  Vector &y)
5085 {
5086  // See PAHcurlL2Apply3D for comments.
5087
5088  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
5089  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
5090
5091  constexpr static int VDIM = 3;
5092
5093  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
5094  auto Bc = Reshape(bc.Read(), Q1D, D1D);
5095  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
5096  auto Bct = Reshape(bct.Read(), D1D, Q1D);
5097  auto Gct = Reshape(gct.Read(), D1D, Q1D);
5098  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
5099  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
5100  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
5101
5102  MFEM_FORALL(e, NE,
5103  {
5104  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
5105
5106  for (int qz = 0; qz < Q1D; ++qz)
5107  {
5108  for (int qy = 0; qy < Q1D; ++qy)
5109  {
5110  for (int qx = 0; qx < Q1D; ++qx)
5111  {
5112  for (int c = 0; c < VDIM; ++c)
5113  {
5114  mass[qz][qy][qx][c] = 0.0;
5115  }
5116  }
5117  }
5118  }
5119
5120  int osc = 0;
5121
5122  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
5123  {
5124  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
5125  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
5126  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
5127
5128  for (int dz = 0; dz < D1Dz; ++dz)
5129  {
5130  double massXY[MAX_Q1D][MAX_Q1D];
5131  for (int qy = 0; qy < Q1D; ++qy)
5132  {
5133  for (int qx = 0; qx < Q1D; ++qx)
5134  {
5135  massXY[qy][qx] = 0.0;
5136  }
5137  }
5138
5139  for (int dy = 0; dy < D1Dy; ++dy)
5140  {
5141  double massX[MAX_Q1D];
5142  for (int qx = 0; qx < Q1D; ++qx)
5143  {
5144  massX[qx] = 0.0;
5145  }
5146
5147  for (int dx = 0; dx < D1Dx; ++dx)
5148  {
5149  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
5150  for (int qx = 0; qx < Q1D; ++qx)
5151  {
5152  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
5153  }
5154  }
5155
5156  for (int qy = 0; qy < Q1D; ++qy)
5157  {
5158  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
5159  for (int qx = 0; qx < Q1D; ++qx)
5160  {
5161  const double wx = massX[qx];
5162  massXY[qy][qx] += wx * wy;
5163  }
5164  }
5165  }
5166
5167  for (int qz = 0; qz < Q1D; ++qz)
5168  {
5169  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
5170  for (int qy = 0; qy < Q1D; ++qy)
5171  {
5172  for (int qx = 0; qx < Q1D; ++qx)
5173  {
5174  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
5175  }
5176  }
5177  }
5178  }
5179
5180  osc += D1Dx * D1Dy * D1Dz;
5181  } // loop (c) over components
5182
5183  // Apply D operator.
5184  for (int qz = 0; qz < Q1D; ++qz)
5185  {
5186  for (int qy = 0; qy < Q1D; ++qy)
5187  {
5188  for (int qx = 0; qx < Q1D; ++qx)
5189  {
5190  const double O11 = op(0,qx,qy,qz,e);
5191  if (coeffDim == 1)
5192  {
5193  for (int c = 0; c < VDIM; ++c)
5194  {
5195  mass[qz][qy][qx][c] *= O11;
5196  }
5197  }
5198  else
5199  {
5200  const double O12 = op(1,qx,qy,qz,e);
5201  const double O13 = op(2,qx,qy,qz,e);
5202  const double O21 = op(3,qx,qy,qz,e);
5203  const double O22 = op(4,qx,qy,qz,e);
5204  const double O23 = op(5,qx,qy,qz,e);
5205  const double O31 = op(6,qx,qy,qz,e);
5206  const double O32 = op(7,qx,qy,qz,e);
5207  const double O33 = op(8,qx,qy,qz,e);
5208  const double massX = mass[qz][qy][qx][0];
5209  const double massY = mass[qz][qy][qx][1];
5210  const double massZ = mass[qz][qy][qx][2];
5211  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
5212  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
5213  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
5214  }
5215  }
5216  }
5217  }
5218
5219  // x component
5220  osc = 0;
5221  {
5222  const int D1Dz = D1D;
5223  const int D1Dy = D1D;
5224  const int D1Dx = D1D - 1;
5225
5226  for (int qz = 0; qz < Q1D; ++qz)
5227  {
5230
5231  for (int dy = 0; dy < D1Dy; ++dy)
5232  {
5233  for (int dx = 0; dx < D1Dx; ++dx)
5234  {
5237  }
5238  }
5239  for (int qy = 0; qy < Q1D; ++qy)
5240  {
5241  double massX[MAX_D1D][2];
5242  for (int dx = 0; dx < D1Dx; ++dx)
5243  {
5244  for (int n = 0; n < 2; ++n)
5245  {
5246  massX[dx][n] = 0.0;
5247  }
5248  }
5249  for (int qx = 0; qx < Q1D; ++qx)
5250  {
5251  for (int dx = 0; dx < D1Dx; ++dx)
5252  {
5253  const double wx = Bot(dx,qx);
5254
5255  massX[dx][0] += wx * mass[qz][qy][qx][1];
5256  massX[dx][1] += wx * mass[qz][qy][qx][2];
5257  }
5258  }
5259  for (int dy = 0; dy < D1Dy; ++dy)
5260  {
5261  const double wy = Bct(dy,qy);
5262  const double wDy = Gct(dy,qy);
5263
5264  for (int dx = 0; dx < D1Dx; ++dx)
5265  {
5266  gradXY21[dy][dx] += massX[dx][0] * wy;
5267  gradXY12[dy][dx] += massX[dx][1] * wDy;
5268  }
5269  }
5270  }
5271
5272  for (int dz = 0; dz < D1Dz; ++dz)
5273  {
5274  const double wz = Bct(dz,qz);
5275  const double wDz = Gct(dz,qz);
5276  for (int dy = 0; dy < D1Dy; ++dy)
5277  {
5278  for (int dx = 0; dx < D1Dx; ++dx)
5279  {
5280  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
5281  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
5282  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5284  }
5285  }
5286  }
5287  } // loop qz
5288
5289  osc += D1Dx * D1Dy * D1Dz;
5290  }
5291
5292  // y component
5293  {
5294  const int D1Dz = D1D;
5295  const int D1Dy = D1D - 1;
5296  const int D1Dx = D1D;
5297
5298  for (int qz = 0; qz < Q1D; ++qz)
5299  {
5302
5303  for (int dy = 0; dy < D1Dy; ++dy)
5304  {
5305  for (int dx = 0; dx < D1Dx; ++dx)
5306  {
5309  }
5310  }
5311  for (int qx = 0; qx < Q1D; ++qx)
5312  {
5313  double massY[MAX_D1D][2];
5314  for (int dy = 0; dy < D1Dy; ++dy)
5315  {
5316  massY[dy][0] = 0.0;
5317  massY[dy][1] = 0.0;
5318  }
5319  for (int qy = 0; qy < Q1D; ++qy)
5320  {
5321  for (int dy = 0; dy < D1Dy; ++dy)
5322  {
5323  const double wy = Bot(dy,qy);
5324
5325  massY[dy][0] += wy * mass[qz][qy][qx][2];
5326  massY[dy][1] += wy * mass[qz][qy][qx][0];
5327  }
5328  }
5329  for (int dx = 0; dx < D1Dx; ++dx)
5330  {
5331  const double wx = Bct(dx,qx);
5332  const double wDx = Gct(dx,qx);
5333
5334  for (int dy = 0; dy < D1Dy; ++dy)
5335  {
5336  gradXY02[dy][dx] += massY[dy][0] * wDx;
5337  gradXY20[dy][dx] += massY[dy][1] * wx;
5338  }
5339  }
5340  }
5341
5342  for (int dz = 0; dz < D1Dz; ++dz)
5343  {
5344  const double wz = Bct(dz,qz);
5345  const double wDz = Gct(dz,qz);
5346  for (int dy = 0; dy < D1Dy; ++dy)
5347  {
5348  for (int dx = 0; dx < D1Dx; ++dx)
5349  {
5350  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
5351  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
5352  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5354  }
5355  }
5356  }
5357  } // loop qz
5358
5359  osc += D1Dx * D1Dy * D1Dz;
5360  }
5361
5362  // z component
5363  {
5364  const int D1Dz = D1D - 1;
5365  const int D1Dy = D1D;
5366  const int D1Dx = D1D;
5367
5368  for (int qx = 0; qx < Q1D; ++qx)
5369  {
5372
5373  for (int dy = 0; dy < D1Dy; ++dy)
5374  {
5375  for (int dz = 0; dz < D1Dz; ++dz)
5376  {
5379  }
5380  }
5381  for (int qy = 0; qy < Q1D; ++qy)
5382  {
5383  double massZ[MAX_D1D][2];
5384  for (int dz = 0; dz < D1Dz; ++dz)
5385  {
5386  for (int n = 0; n < 2; ++n)
5387  {
5388  massZ[dz][n] = 0.0;
5389  }
5390  }
5391  for (int qz = 0; qz < Q1D; ++qz)
5392  {
5393  for (int dz = 0; dz < D1Dz; ++dz)
5394  {
5395  const double wz = Bot(dz,qz);
5396
5397  massZ[dz][0] += wz * mass[qz][qy][qx][0];
5398  massZ[dz][1] += wz * mass[qz][qy][qx][1];
5399  }
5400  }
5401  for (int dy = 0; dy < D1Dy; ++dy)
5402  {
5403  const double wy = Bct(dy,qy);
5404  const double wDy = Gct(dy,qy);
5405
5406  for (int dz = 0; dz < D1Dz; ++dz)
5407  {
5408  gradYZ01[dz][dy] += wy * massZ[dz][1];
5409  gradYZ10[dz][dy] += wDy * massZ[dz][0];
5410  }
5411  }
5412  }
5413
5414  for (int dx = 0; dx < D1Dx; ++dx)
5415  {
5416  const double wx = Bct(dx,qx);
5417  const double wDx = Gct(dx,qx);
5418
5419  for (int dy = 0; dy < D1Dy; ++dy)
5420  {
5421  for (int dz = 0; dz < D1Dz; ++dz)
5422  {
5423  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
5424  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
5425  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5427  }
5428  }
5429  }
5430  } // loop qx
5431  }
5432  });
5433 }
5434
5435 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
5436 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
5437  const int Q1D,
5438  const int coeffDim,
5439  const int NE,
5440  const Array<double> &bo,
5441  const Array<double> &bc,
5442  const Array<double> &gc,
5443  const Vector &pa_data,
5444  const Vector &x,
5445  Vector &y)
5446 {
5447  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
5448  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
5449
5450  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
5451  auto Bc = Reshape(bc.Read(), Q1D, D1D);
5452  auto Gc = Reshape(gc.Read(), Q1D, D1D);
5453  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
5454  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
5455  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
5456
5457  auto device_kernel = [=] MFEM_DEVICE (int e)
5458  {
5459  constexpr int VDIM = 3;
5460  constexpr int maxCoeffDim = 9;
5461
5462  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
5463  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
5464  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
5465
5466  double opc[maxCoeffDim];
5467  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
5468  MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
5469
5470  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
5471
5473  {
5475  {
5477  {
5478  for (int i=0; i<coeffDim; ++i)
5479  {
5480  opc[i] = op(i,qx,qy,qz,e);
5481  }
5482  }
5483  }
5484  }
5485
5486  const int tidx = MFEM_THREAD_ID(x);
5487  const int tidy = MFEM_THREAD_ID(y);
5488  const int tidz = MFEM_THREAD_ID(z);
5489
5490  if (tidz == 0)
5491  {
5493  {
5495  {
5496  sBc[d][q] = Bc(q,d);
5497  sGc[d][q] = Gc(q,d);
5498  if (d < D1D-1)
5499  {
5500  sBo[d][q] = Bo(q,d);
5501  }
5502  }
5503  }
5504  }
5506
5507  for (int qz=0; qz < Q1D; ++qz)
5508  {
5509  if (tidz == qz)
5510  {
5512  {
5514  {
5515  for (int i=0; i<3; ++i)
5516  {
5517  mass[qy][qx][i] = 0.0;
5518  }
5519  }
5520  }
5521  }
5522
5523  int osc = 0;
5524  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
5525  {
5526  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
5527  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
5528  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
5529
5531  {
5533  {
5535  {
5536  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
5537  }
5538  }
5539  }
5541
5542  if (tidz == qz)
5543  {
5544  if (c == 0)
5545  {
5546  for (int i=0; i<coeffDim; ++i)
5547  {
5548  sop[i][tidx][tidy] = opc[i];
5549  }
5550  }
5551
5553  {
5555  {
5556  double u = 0.0;
5557
5558  for (int dz = 0; dz < D1Dz; ++dz)
5559  {
5560  const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
5561
5562  for (int dy = 0; dy < D1Dy; ++dy)
5563  {
5564  const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
5565
5566  for (int dx = 0; dx < D1Dx; ++dx)
5567  {
5568  const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
5569  u += wx * wy * wz;
5570  }
5571  }
5572  }
5573
5574  mass[qy][qx][c] += u;
5575  } // qx
5576  } // qy
5577  } // tidz == qz
5578
5579  osc += D1Dx * D1Dy * D1Dz;
5581  } // c
5582
5583  double dxyz1 = 0.0;
5584  double dxyz2 = 0.0;
5585  double dxyz3 = 0.0;
5586
5588  {
5589  const double wcz = sBc[dz][qz];
5590  const double wcDz = sGc[dz][qz];
5591  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
5592
5594  {
5596  {
5597  for (int qy = 0; qy < Q1D; ++qy)
5598  {
5599  const double wcy = sBc[dy][qy];
5600  const double wcDy = sGc[dy][qy];
5601  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
5602
5603  for (int qx = 0; qx < Q1D; ++qx)
5604  {
5605  const double O11 = sop[0][qx][qy];
5606  double c1, c2, c3;
5607  if (coeffDim == 1)
5608  {
5609  c1 = O11 * mass[qy][qx][0];
5610  c2 = O11 * mass[qy][qx][1];
5611  c3 = O11 * mass[qy][qx][2];
5612  }
5613  else
5614  {
5615  const double O12 = sop[1][qx][qy];
5616  const double O13 = sop[2][qx][qy];
5617  const double O21 = sop[3][qx][qy];
5618  const double O22 = sop[4][qx][qy];
5619  const double O23 = sop[5][qx][qy];
5620  const double O31 = sop[6][qx][qy];
5621  const double O32 = sop[7][qx][qy];
5622  const double O33 = sop[8][qx][qy];
5623
5624  c1 = (O11*mass[qy][qx][0])+(O12*mass[qy][qx][1])+(O13*mass[qy][qx][2]);
5625  c2 = (O21*mass[qy][qx][0])+(O22*mass[qy][qx][1])+(O23*mass[qy][qx][2]);
5626  c3 = (O31*mass[qy][qx][0])+(O32*mass[qy][qx][1])+(O33*mass[qy][qx][2]);
5627  }
5628
5629  const double wcx = sBc[dx][qx];
5630  const double wDx = sGc[dx][qx];
5631
5632  if (dx < D1D-1)
5633  {
5634  const double wx = sBo[dx][qx];
5635  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
5636  }
5637
5638  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
5639
5640  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
5641  } // qx
5642  } // qy
5643  } // dx
5644  } // dy
5645  } // dz
5646
5648
5650  {
5652  {
5654  {
5655  if (dx < D1D-1)
5656  {
5657  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
5658  }
5659  if (dy < D1D-1)
5660  {
5661  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
5662  }
5663  if (dz < D1D-1)
5664  {
5665  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
5666  }
5667  }
5668  }
5669  }
5670  } // qz
5671  }; // end of element loop
5672
5673  auto host_kernel = [&] MFEM_LAMBDA (int)
5674  {
5675  MFEM_ABORT_KERNEL("This kernel should only be used on GPU.");
5676  };
5677
5678  ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
5679 }
5680
5681 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
5682 {
5683  if (testType == mfem::FiniteElement::CURL &&
5684  trialType == mfem::FiniteElement::CURL && dim == 3)
5685  {
5686  const int ndata = coeffDim == 1 ? 1 : 9;
5688  {
5689  const int ID = (dofs1D << 4) | quad1D;
5690  switch (ID)
5691  {
5692  case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, ndata,
5693  ne, mapsO->B, mapsC->B,
5694  mapsC->G, pa_data, x, y);
5695  case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, ndata,
5696  ne, mapsO->B, mapsC->B,
5697  mapsC->G, pa_data, x, y);
5698  case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, ndata,
5699  ne, mapsO->B, mapsC->B,
5700  mapsC->G, pa_data, x, y);
5701  case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, ndata,
5702  ne, mapsO->B, mapsC->B,
5703  mapsC->G, pa_data, x, y);
5704  default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne,
5705  mapsO->B, mapsC->B,
5706  mapsC->G, pa_data, x, y);
5707  }
5708  }
5709  else
5710  PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B,
5711  mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
5712  }
5713  else if (testType == mfem::FiniteElement::CURL &&
5714  trialType == mfem::FiniteElement::DIV && dim == 3)
5715  {
5716  PAHcurlHdivApply3DTranspose(dofs1D, dofs1D, quad1D, ne, mapsO->B,
5717  mapsC->B, mapsO->Bt, mapsC->Bt,
5718  mapsC->Gt, pa_data, x, y);
5719  }
5720  else
5721  {
5722  MFEM_ABORT("Unsupported dimension or space!");
5723  }
5724 }
5725
5727  Vector &y) const
5728 {
5729  if (testType == mfem::FiniteElement::CURL &&
5730  trialType == mfem::FiniteElement::DIV && dim == 3)
5731  {
5732  PAHcurlHdivApply3D(dofs1D, dofs1D, quad1D, ne, mapsO->B,
5733  mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->G,
5734  pa_data, x, y);
5735  }
5736  else
5737  {
5738  MFEM_ABORT("Unsupported dimension or space!");
5739  }
5740 }
5741
5742 // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient
5743 // to get a dof in H(curl) (range). You can think of the range as the "test" space
5744 // and the domain as the "trial" space, but there's no integration.
5745 static void PAHcurlApplyGradient2D(const int c_dofs1D,
5746  const int o_dofs1D,
5747  const int NE,
5748  const Array<double> &B_,
5749  const Array<double> &G_,
5750  const Vector &x_,
5751  Vector &y_)
5752 {
5753  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5754  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5755
5756  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5757  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5758
5759  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5760  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5761
5762  MFEM_FORALL(e, NE,
5763  {
5764  double w[MAX_D1D][MAX_D1D];
5765
5766  // horizontal part
5767  for (int dx = 0; dx < c_dofs1D; ++dx)
5768  {
5769  for (int ey = 0; ey < c_dofs1D; ++ey)
5770  {
5771  w[dx][ey] = 0.0;
5772  for (int dy = 0; dy < c_dofs1D; ++dy)
5773  {
5774  w[dx][ey] += B(ey, dy) * x(dx, dy, e);
5775  }
5776  }
5777  }
5778
5779  for (int ey = 0; ey < c_dofs1D; ++ey)
5780  {
5781  for (int ex = 0; ex < o_dofs1D; ++ex)
5782  {
5783  double s = 0.0;
5784  for (int dx = 0; dx < c_dofs1D; ++dx)
5785  {
5786  s += G(ex, dx) * w[dx][ey];
5787  }
5788  const int local_index = ey*o_dofs1D + ex;
5789  y(local_index, e) += s;
5790  }
5791  }
5792
5793  // vertical part
5794  for (int dx = 0; dx < c_dofs1D; ++dx)
5795  {
5796  for (int ey = 0; ey < o_dofs1D; ++ey)
5797  {
5798  w[dx][ey] = 0.0;
5799  for (int dy = 0; dy < c_dofs1D; ++dy)
5800  {
5801  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5802  }
5803  }
5804  }
5805
5806  for (int ey = 0; ey < o_dofs1D; ++ey)
5807  {
5808  for (int ex = 0; ex < c_dofs1D; ++ex)
5809  {
5810  double s = 0.0;
5811  for (int dx = 0; dx < c_dofs1D; ++dx)
5812  {
5813  s += B(ex, dx) * w[dx][ey];
5814  }
5815  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5816  y(local_index, e) += s;
5817  }
5818  }
5819  });
5820 }
5821
5822 // Specialization of PAHcurlApplyGradient2D to the case where B is identity
5823 static void PAHcurlApplyGradient2DBId(const int c_dofs1D,
5824  const int o_dofs1D,
5825  const int NE,
5826  const Array<double> &G_,
5827  const Vector &x_,
5828  Vector &y_)
5829 {
5830  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5831
5832  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5833  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5834
5835  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5836  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5837
5838  MFEM_FORALL(e, NE,
5839  {
5840  double w[MAX_D1D][MAX_D1D];
5841
5842  // horizontal part
5843  for (int dx = 0; dx < c_dofs1D; ++dx)
5844  {
5845  for (int ey = 0; ey < c_dofs1D; ++ey)
5846  {
5847  const int dy = ey;
5848  w[dx][ey] = x(dx, dy, e);
5849  }
5850  }
5851
5852  for (int ey = 0; ey < c_dofs1D; ++ey)
5853  {
5854  for (int ex = 0; ex < o_dofs1D; ++ex)
5855  {
5856  double s = 0.0;
5857  for (int dx = 0; dx < c_dofs1D; ++dx)
5858  {
5859  s += G(ex, dx) * w[dx][ey];
5860  }
5861  const int local_index = ey*o_dofs1D + ex;
5862  y(local_index, e) += s;
5863  }
5864  }
5865
5866  // vertical part
5867  for (int dx = 0; dx < c_dofs1D; ++dx)
5868  {
5869  for (int ey = 0; ey < o_dofs1D; ++ey)
5870  {
5871  w[dx][ey] = 0.0;
5872  for (int dy = 0; dy < c_dofs1D; ++dy)
5873  {
5874  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5875  }
5876  }
5877  }
5878
5879  for (int ey = 0; ey < o_dofs1D; ++ey)
5880  {
5881  for (int ex = 0; ex < c_dofs1D; ++ex)
5882  {
5883  const int dx = ex;
5884  const double s = w[dx][ey];
5885  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5886  y(local_index, e) += s;
5887  }
5888  }
5889  });
5890 }
5891
5893  const int c_dofs1D, const int o_dofs1D, const int NE,
5894  const Array<double> &B_, const Array<double> &G_,
5895  const Vector &x_, Vector &y_)
5896 {
5897  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5898  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5899
5900  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5901  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5902
5903  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5904  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5905
5906  MFEM_FORALL(e, NE,
5907  {
5908  double w[MAX_D1D][MAX_D1D];
5909
5910  // horizontal part (open x, closed y)
5911  for (int dy = 0; dy < c_dofs1D; ++dy)
5912  {
5913  for (int ex = 0; ex < o_dofs1D; ++ex)
5914  {
5915  w[dy][ex] = 0.0;
5916  for (int ey = 0; ey < c_dofs1D; ++ey)
5917  {
5918  const int local_index = ey*o_dofs1D + ex;
5919  w[dy][ex] += B(ey, dy) * x(local_index, e);
5920  }
5921  }
5922  }
5923
5924  for (int dy = 0; dy < c_dofs1D; ++dy)
5925  {
5926  for (int dx = 0; dx < c_dofs1D; ++dx)
5927  {
5928  double s = 0.0;
5929  for (int ex = 0; ex < o_dofs1D; ++ex)
5930  {
5931  s += G(ex, dx) * w[dy][ex];
5932  }
5933  y(dx, dy, e) += s;
5934  }
5935  }
5936
5937  // vertical part (open y, closed x)
5938  for (int dy = 0; dy < c_dofs1D; ++dy)
5939  {
5940  for (int ex = 0; ex < c_dofs1D; ++ex)
5941  {
5942  w[dy][ex] = 0.0;
5943  for (int ey = 0; ey < o_dofs1D; ++ey)
5944  {
5945  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5946  w[dy][ex] += G(ey, dy) * x(local_index, e);
5947  }
5948  }
5949  }
5950
5951  for (int dy = 0; dy < c_dofs1D; ++dy)
5952  {
5953  for (int dx = 0; dx < c_dofs1D; ++dx)
5954  {
5955  double s = 0.0;
5956  for (int ex = 0; ex < c_dofs1D; ++ex)
5957  {
5958  s += B(ex, dx) * w[dy][ex];
5959  }
5960  y(dx, dy, e) += s;
5961  }
5962  }
5963  });
5964 }
5965
5966 // Specialization of PAHcurlApplyGradientTranspose2D to the case where
5967 // B is identity
5969  const int c_dofs1D, const int o_dofs1D, const int NE,
5970  const Array<double> &G_,
5971  const Vector &x_, Vector &y_)
5972 {
5973  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5974
5975  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5976  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5977
5978  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5979  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5980
5981  MFEM_FORALL(e, NE,
5982  {
5983  double w[MAX_D1D][MAX_D1D];
5984
5985  // horizontal part (open x, closed y)
5986  for (int dy = 0; dy < c_dofs1D; ++dy)
5987  {
5988  for (int ex = 0; ex < o_dofs1D; ++ex)
5989  {
5990  const int ey = dy;
5991  const int local_index = ey*o_dofs1D + ex;
5992  w[dy][ex] = x(local_index, e);
5993  }
5994  }
5995
5996  for (int dy = 0; dy < c_dofs1D; ++dy)
5997  {
5998  for (int dx = 0; dx < c_dofs1D; ++dx)
5999  {
6000  double s = 0.0;
6001  for (int ex = 0; ex < o_dofs1D; ++ex)
6002  {
6003  s += G(ex, dx) * w[dy][ex];
6004  }
6005  y(dx, dy, e) += s;
6006  }
6007  }
6008
6009  // vertical part (open y, closed x)
6010  for (int dy = 0; dy < c_dofs1D; ++dy)
6011  {
6012  for (int ex = 0; ex < c_dofs1D; ++ex)
6013  {
6014  w[dy][ex] = 0.0;
6015  for (int ey = 0; ey < o_dofs1D; ++ey)
6016  {
6017  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
6018  w[dy][ex] += G(ey, dy) * x(local_index, e);
6019  }
6020  }
6021  }
6022
6023  for (int dy = 0; dy < c_dofs1D; ++dy)
6024  {
6025  for (int dx = 0; dx < c_dofs1D; ++dx)
6026  {
6027  const int ex = dx;
6028  const double s = w[dy][ex];
6029  y(dx, dy, e) += s;
6030  }
6031  }
6032  });
6033 }
6034
6035 static void PAHcurlApplyGradient3D(const int c_dofs1D,
6036  const int o_dofs1D,
6037  const int NE,
6038  const Array<double> &B_,
6039  const Array<double> &G_,
6040  const Vector &x_,
6041  Vector &y_)
6042 {
6043  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
6044  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6045
6046  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6047  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6048
6049  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6050  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6051
6052  MFEM_FORALL(e, NE,
6053  {
6054  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6055  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6056
6057  // ---
6058  // dofs that point parallel to x-axis (open in x, closed in y, z)
6059  // ---
6060
6061  // contract in z
6062  for (int ez = 0; ez < c_dofs1D; ++ez)
6063  {
6064  for (int dx = 0; dx < c_dofs1D; ++dx)
6065  {
6066  for (int dy = 0; dy < c_dofs1D; ++dy)
6067  {
6068  w1[dx][dy][ez] = 0.0;
6069  for (int dz = 0; dz < c_dofs1D; ++dz)
6070  {
6071  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
6072  }
6073  }
6074  }
6075  }
6076
6077  // contract in y
6078  for (int ez = 0; ez < c_dofs1D; ++ez)
6079  {
6080  for (int ey = 0; ey < c_dofs1D; ++ey)
6081  {
6082  for (int dx = 0; dx < c_dofs1D; ++dx)
6083  {
6084  w2[dx][ey][ez] = 0.0;
6085  for (int dy = 0; dy < c_dofs1D; ++dy)
6086  {
6087  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
6088  }
6089  }
6090  }
6091  }
6092
6093  // contract in x
6094  for (int ez = 0; ez < c_dofs1D; ++ez)
6095  {
6096  for (int ey = 0; ey < c_dofs1D; ++ey)
6097  {
6098  for (int ex = 0; ex < o_dofs1D; ++ex)
6099  {
6100  double s = 0.0;
6101  for (int dx = 0; dx < c_dofs1D; ++dx)
6102  {
6103  s += G(ex, dx) * w2[dx][ey][ez];
6104  }
6105  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6106  y(local_index, e) += s;
6107  }
6108  }
6109  }
6110
6111  // ---
6112  // dofs that point parallel to y-axis (open in y, closed in x, z)
6113  // ---
6114
6115  // contract in z
6116  for (int ez = 0; ez < c_dofs1D; ++ez)
6117  {
6118  for (int dx = 0; dx < c_dofs1D; ++dx)
6119  {
6120  for (int dy = 0; dy < c_dofs1D; ++dy)
6121  {
6122  w1[dx][dy][ez] = 0.0;
6123  for (int dz = 0; dz < c_dofs1D; ++dz)
6124  {
6125  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
6126  }
6127  }
6128  }
6129  }
6130
6131  // contract in y
6132  for (int ez = 0; ez < c_dofs1D; ++ez)
6133  {
6134  for (int ey = 0; ey < o_dofs1D; ++ey)
6135  {
6136  for (int dx = 0; dx < c_dofs1D; ++dx)
6137  {
6138  w2[dx][ey][ez] = 0.0;
6139  for (int dy = 0; dy < c_dofs1D; ++dy)
6140  {
6141  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
6142  }
6143  }
6144  }
6145  }
6146
6147  // contract in x
6148  for (int ez = 0; ez < c_dofs1D; ++ez)
6149  {
6150  for (int ey = 0; ey < o_dofs1D; ++ey)
6151  {
6152  for (int ex = 0; ex < c_dofs1D; ++ex)
6153  {
6154  double s = 0.0;
6155  for (int dx = 0; dx < c_dofs1D; ++dx)
6156  {
6157  s += B(ex, dx) * w2[dx][ey][ez];
6158  }
6159  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6160  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6161  y(local_index, e) += s;
6162  }
6163  }
6164  }
6165
6166  // ---
6167  // dofs that point parallel to z-axis (open in z, closed in x, y)
6168  // ---
6169
6170  // contract in z
6171  for (int ez = 0; ez < o_dofs1D; ++ez)
6172  {
6173  for (int dx = 0; dx < c_dofs1D; ++dx)
6174  {
6175  for (int dy = 0; dy < c_dofs1D; ++dy)
6176  {
6177  w1[dx][dy][ez] = 0.0;
6178  for (int dz = 0; dz < c_dofs1D; ++dz)
6179  {
6180  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
6181  }
6182  }
6183  }
6184  }
6185
6186  // contract in y
6187  for (int ez = 0; ez < o_dofs1D; ++ez)
6188  {
6189  for (int ey = 0; ey < c_dofs1D; ++ey)
6190  {
6191  for (int dx = 0; dx < c_dofs1D; ++dx)
6192  {
6193  w2[dx][ey][ez] = 0.0;
6194  for (int dy = 0; dy < c_dofs1D; ++dy)
6195  {
6196  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
6197  }
6198  }
6199  }
6200  }
6201
6202  // contract in x
6203  for (int ez = 0; ez < o_dofs1D; ++ez)
6204  {
6205  for (int ey = 0; ey < c_dofs1D; ++ey)
6206  {
6207  for (int ex = 0; ex < c_dofs1D; ++ex)
6208  {
6209  double s = 0.0;
6210  for (int dx = 0; dx < c_dofs1D; ++dx)
6211  {
6212  s += B(ex, dx) * w2[dx][ey][ez];
6213  }
6214  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6215  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6216  y(local_index, e) += s;
6217  }
6218  }
6219  }
6220  });
6221 }
6222
6223 // Specialization of PAHcurlApplyGradient3D to the case where
6224 static void PAHcurlApplyGradient3DBId(const int c_dofs1D,
6225  const int o_dofs1D,
6226  const int NE,
6227  const Array<double> &G_,
6228  const Vector &x_,
6229  Vector &y_)
6230 {
6231  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6232
6233  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6234  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6235
6236  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6237  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6238
6239  MFEM_FORALL(e, NE,
6240  {
6241  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6242  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6243
6244  // ---
6245  // dofs that point parallel to x-axis (open in x, closed in y, z)
6246  // ---
6247
6248  // contract in z
6249  for (int ez = 0; ez < c_dofs1D; ++ez)
6250  {
6251  for (int dx = 0; dx < c_dofs1D; ++dx)
6252  {
6253  for (int dy = 0; dy < c_dofs1D; ++dy)
6254  {
6255  const int dz = ez;
6256  w1[dx][dy][ez] = x(dx, dy, dz, e);
6257  }
6258  }
6259  }
6260
6261  // contract in y
6262  for (int ez = 0; ez < c_dofs1D; ++ez)
6263  {
6264  for (int ey = 0; ey < c_dofs1D; ++ey)
6265  {
6266  for (int dx = 0; dx < c_dofs1D; ++dx)
6267  {
6268  const int dy = ey;
6269  w2[dx][ey][ez] = w1[dx][dy][ez];
6270  }
6271  }
6272  }
6273
6274  // contract in x
6275  for (int ez = 0; ez < c_dofs1D; ++ez)
6276  {
6277  for (int ey = 0; ey < c_dofs1D; ++ey)
6278  {
6279  for (int ex = 0; ex < o_dofs1D; ++ex)
6280  {
6281  double s = 0.0;
6282  for (int dx = 0; dx < c_dofs1D; ++dx)
6283  {
6284  s += G(ex, dx) * w2[dx][ey][ez];
6285  }
6286  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6287  y(local_index, e) += s;
6288  }
6289  }
6290  }
6291
6292  // ---
6293  // dofs that point parallel to y-axis (open in y, closed in x, z)
6294  // ---
6295
6296  // contract in z
6297  for (int ez = 0; ez < c_dofs1D; ++ez)
6298  {
6299  for (int dx = 0; dx < c_dofs1D; ++dx)
6300  {
6301  for (int dy = 0; dy < c_dofs1D; ++dy)
6302  {
6303  const int dz = ez;
6304  w1[dx][dy][ez] = x(dx, dy, dz, e);
6305  }
6306  }
6307  }
6308
6309  // contract in y
6310  for (int ez = 0; ez < c_dofs1D; ++ez)
6311  {
6312  for (int ey = 0; ey < o_dofs1D; ++ey)
6313  {
6314  for (int dx = 0; dx < c_dofs1D; ++dx)
6315  {
6316  w2[dx][ey][ez] = 0.0;
6317  for (int dy = 0; dy < c_dofs1D; ++dy)
6318  {
6319  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
6320  }
6321  }
6322  }
6323  }
6324
6325  // contract in x
6326  for (int ez = 0; ez < c_dofs1D; ++ez)
6327  {
6328  for (int ey = 0; ey < o_dofs1D; ++ey)
6329  {
6330  for (int ex = 0; ex < c_dofs1D; ++ex)
6331  {
6332  const int dx = ex;
6333  const double s = w2[dx][ey][ez];
6334  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6335  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6336  y(local_index, e) += s;
6337  }
6338  }
6339  }
6340
6341  // ---
6342  // dofs that point parallel to z-axis (open in z, closed in x, y)
6343  // ---
6344
6345  // contract in z
6346  for (int ez = 0; ez < o_dofs1D; ++ez)
6347  {
6348  for (int dx = 0; dx < c_dofs1D; ++dx)
6349  {
6350  for (int dy = 0; dy < c_dofs1D; ++dy)
6351  {
6352  w1[dx][dy][ez] = 0.0;
6353  for (int dz = 0; dz < c_dofs1D; ++dz)
6354  {
6355  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
6356  }
6357  }
6358  }
6359  }
6360
6361  // contract in y
6362  for (int ez = 0; ez < o_dofs1D; ++ez)
6363  {
6364  for (int ey = 0; ey < c_dofs1D; ++ey)
6365  {
6366  for (int dx = 0; dx < c_dofs1D; ++dx)
6367  {
6368  const int dy = ey;
6369  w2[dx][ey][ez] = w1[dx][dy][ez];
6370  }
6371  }
6372  }
6373
6374  // contract in x
6375  for (int ez = 0; ez < o_dofs1D; ++ez)
6376  {
6377  for (int ey = 0; ey < c_dofs1D; ++ey)
6378  {
6379  for (int ex = 0; ex < c_dofs1D; ++ex)
6380  {
6381  const int dx = ex;
6382  const double s = w2[dx][ey][ez];
6383  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6384  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6385  y(local_index, e) += s;
6386  }
6387  }
6388  }
6389  });
6390 }
6391
6393  const int c_dofs1D, const int o_dofs1D, const int NE,
6394  const Array<double> &B_, const Array<double> &G_,
6395  const Vector &x_, Vector &y_)
6396 {
6397  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
6398  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6399
6400  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6401  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6402
6403  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6404  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6405
6406  MFEM_FORALL(e, NE,
6407  {
6408  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6409  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6410  // ---
6411  // dofs that point parallel to x-axis (open in x, closed in y, z)
6412  // ---
6413
6414  // contract in z
6415  for (int dz = 0; dz < c_dofs1D; ++dz)
6416  {
6417  for (int ex = 0; ex < o_dofs1D; ++ex)
6418  {
6419  for (int ey = 0; ey < c_dofs1D; ++ey)
6420  {
6421  w1[ex][ey][dz] = 0.0;
6422  for (int ez = 0; ez < c_dofs1D; ++ez)
6423  {
6424  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6425  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6426  }
6427  }
6428  }
6429  }
6430
6431  // contract in y
6432  for (int dz = 0; dz < c_dofs1D; ++dz)
6433  {
6434  for (int dy = 0; dy < c_dofs1D; ++dy)
6435  {
6436  for (int ex = 0; ex < o_dofs1D; ++ex)
6437  {
6438  w2[ex][dy][dz] = 0.0;
6439  for (int ey = 0; ey < c_dofs1D; ++ey)
6440  {
6441  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6442  }
6443  }
6444  }
6445  }
6446
6447  // contract in x
6448  for (int dz = 0; dz < c_dofs1D; ++dz)
6449  {
6450  for (int dy = 0; dy < c_dofs1D; ++dy)
6451  {
6452  for (int dx = 0; dx < c_dofs1D; ++dx)
6453  {
6454  double s = 0.0;
6455  for (int ex = 0; ex < o_dofs1D; ++ex)
6456  {
6457  s += G(ex, dx) * w2[ex][dy][dz];
6458  }
6459  y(dx, dy, dz, e) += s;
6460  }
6461  }
6462  }
6463
6464  // ---
6465  // dofs that point parallel to y-axis (open in y, closed in x, z)
6466  // ---
6467
6468  // contract in z
6469  for (int dz = 0; dz < c_dofs1D; ++dz)
6470  {
6471  for (int ex = 0; ex < c_dofs1D; ++ex)
6472  {
6473  for (int ey = 0; ey < o_dofs1D; ++ey)
6474  {
6475  w1[ex][ey][dz] = 0.0;
6476  for (int ez = 0; ez < c_dofs1D; ++ez)
6477  {
6478  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6479  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6480  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6481  }
6482  }
6483  }
6484  }
6485
6486  // contract in y
6487  for (int dz = 0; dz < c_dofs1D; ++dz)
6488  {
6489  for (int dy = 0; dy < c_dofs1D; ++dy)
6490  {
6491  for (int ex = 0; ex < c_dofs1D; ++ex)
6492  {
6493  w2[ex][dy][dz] = 0.0;
6494  for (int ey = 0; ey < o_dofs1D; ++ey)
6495  {
6496  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6497  }
6498  }
6499  }
6500  }
6501
6502  // contract in x
6503  for (int dz = 0; dz < c_dofs1D; ++dz)
6504  {
6505  for (int dy = 0; dy < c_dofs1D; ++dy)
6506  {
6507  for (int dx = 0; dx < c_dofs1D; ++dx)
6508  {
6509  double s = 0.0;
6510  for (int ex = 0; ex < c_dofs1D; ++ex)
6511  {
6512  s += B(ex, dx) * w2[ex][dy][dz];
6513  }
6514  y(dx, dy, dz, e) += s;
6515  }
6516  }
6517  }
6518
6519  // ---
6520  // dofs that point parallel to z-axis (open in z, closed in x, y)
6521  // ---
6522
6523  // contract in z
6524  for (int dz = 0; dz < c_dofs1D; ++dz)
6525  {
6526  for (int ex = 0; ex < c_dofs1D; ++ex)
6527  {
6528  for (int ey = 0; ey < c_dofs1D; ++ey)
6529  {
6530  w1[ex][ey][dz] = 0.0;
6531  for (int ez = 0; ez < o_dofs1D; ++ez)
6532  {
6533  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6534  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6535  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6536  }
6537  }
6538  }
6539  }
6540
6541  // contract in y
6542  for (int dz = 0; dz < c_dofs1D; ++dz)
6543  {
6544  for (int dy = 0; dy < c_dofs1D; ++dy)
6545  {
6546  for (int ex = 0; ex < c_dofs1D; ++ex)
6547  {
6548  w2[ex][dy][dz] = 0.0;
6549  for (int ey = 0; ey < c_dofs1D; ++ey)
6550  {
6551  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6552  }
6553  }
6554  }
6555  }
6556
6557  // contract in x
6558  for (int dz = 0; dz < c_dofs1D; ++dz)
6559  {
6560  for (int dy = 0; dy < c_dofs1D; ++dy)
6561  {
6562  for (int dx = 0; dx < c_dofs1D; ++dx)
6563  {
6564  double s = 0.0;
6565  for (int ex = 0; ex < c_dofs1D; ++ex)
6566  {
6567  s += B(ex, dx) * w2[ex][dy][dz];
6568  }
6569  y(dx, dy, dz, e) += s;
6570  }
6571  }
6572  }
6573  });
6574 }
6575
6576 // Specialization of PAHcurlApplyGradientTranspose3D to the case where
6578  const int c_dofs1D, const int o_dofs1D, const int NE,
6579  const Array<double> &G_,
6580  const Vector &x_, Vector &y_)
6581 {
6582  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6583
6584  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6585  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6586
6587  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6588  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6589
6590  MFEM_FORALL(e, NE,
6591  {
6592  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6593  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6594  // ---
6595  // dofs that point parallel to x-axis (open in x, closed in y, z)
6596  // ---
6597
6598  // contract in z
6599  for (int dz = 0; dz < c_dofs1D; ++dz)
6600  {
6601  for (int ex = 0; ex < o_dofs1D; ++ex)
6602  {
6603  for (int ey = 0; ey < c_dofs1D; ++ey)
6604  {
6605  const int ez = dz;
6606  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6607  w1[ex][ey][dz] = x(local_index, e);
6608  }
6609  }
6610  }
6611
6612  // contract in y
6613  for (int dz = 0; dz < c_dofs1D; ++dz)
6614  {
6615  for (int dy = 0; dy < c_dofs1D; ++dy)
6616  {
6617  for (int ex = 0; ex < o_dofs1D; ++ex)
6618  {
6619  const int ey = dy;
6620  w2[ex][dy][dz] = w1[ex][ey][dz];
6621  }
6622  }
6623  }
6624
6625  // contract in x
6626  for (int dz = 0; dz < c_dofs1D; ++dz)
6627  {
6628  for (int dy = 0; dy < c_dofs1D; ++dy)
6629  {
6630  for (int dx = 0; dx < c_dofs1D; ++dx)
6631  {
6632  double s = 0.0;
6633  for (int ex = 0; ex < o_dofs1D; ++ex)
6634  {
6635  s += G(ex, dx) * w2[ex][dy][dz];
6636  }
6637  y(dx, dy, dz, e) += s;
6638  }
6639  }
6640  }
6641
6642  // ---
6643  // dofs that point parallel to y-axis (open in y, closed in x, z)
6644  // ---
6645
6646  // contract in z
6647  for (int dz = 0; dz < c_dofs1D; ++dz)
6648  {
6649  for (int ex = 0; ex < c_dofs1D; ++ex)
6650  {
6651  for (int ey = 0; ey < o_dofs1D; ++ey)
6652  {
6653  const int ez = dz;
6654  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6655  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6656  w1[ex][ey][dz] = x(local_index, e);
6657  }
6658  }
6659  }
6660
6661  // contract in y
6662  for (int dz = 0; dz < c_dofs1D; ++dz)
6663  {
6664  for (int dy = 0; dy < c_dofs1D; ++dy)
6665  {
6666  for (int ex = 0; ex < c_dofs1D; ++ex)
6667  {
6668  w2[ex][dy][dz] = 0.0;
6669  for (int ey = 0; ey < o_dofs1D; ++ey)
6670  {
6671  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6672  }
6673  }
6674  }
6675  }
6676
6677  // contract in x
6678  for (int dz = 0; dz < c_dofs1D; ++dz)
6679  {
6680  for (int dy = 0; dy < c_dofs1D; ++dy)
6681  {
6682  for (int dx = 0; dx < c_dofs1D; ++dx)
6683  {
6684  const int ex = dx;
6685  double s = w2[ex][dy][dz];
6686  y(dx, dy, dz, e) += s;
6687  }
6688  }
6689  }
6690
6691  // ---
6692  // dofs that point parallel to z-axis (open in z, closed in x, y)
6693  // ---
6694
6695  // contract in z
6696  for (int dz = 0; dz < c_dofs1D; ++dz)
6697  {
6698  for (int ex = 0; ex < c_dofs1D; ++ex)
6699  {
6700  for (int ey = 0; ey < c_dofs1D; ++ey)
6701  {
6702  w1[ex][ey][dz] = 0.0;
6703  for (int ez = 0; ez < o_dofs1D; ++ez)
6704  {
6705  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6706  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6707  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6708  }
6709  }
6710  }
6711  }
6712
6713  // contract in y
6714  for (int dz = 0; dz < c_dofs1D; ++dz)
6715  {
6716  for (int dy = 0; dy < c_dofs1D; ++dy)
6717  {
6718  for (int ex = 0; ex < c_dofs1D; ++ex)
6719  {
6720  const int ey = dy;
6721  w2[ex][dy][dz] = w1[ex][ey][dz];
6722  }
6723  }
6724  }
6725
6726  // contract in x
6727  for (int dz = 0; dz < c_dofs1D; ++dz)
6728  {
6729  for (int dy = 0; dy < c_dofs1D; ++dy)
6730  {
6731  for (int dx = 0; dx < c_dofs1D; ++dx)
6732  {
6733  const int ex = dx;
6734  double s = w2[ex][dy][dz];
6735  y(dx, dy, dz, e) += s;
6736  }
6737  }
6738  }
6739  });
6740 }
6741
6743  const FiniteElementSpace &test_fes)
6744 {
6745  // Assumes tensor-product elements, with a vector test space and H^1 trial space.
6746  Mesh *mesh = trial_fes.GetMesh();
6747  const FiniteElement *trial_fel = trial_fes.GetFE(0);
6748  const FiniteElement *test_fel = test_fes.GetFE(0);
6749
6750  const NodalTensorFiniteElement *trial_el =
6751  dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
6752  MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
6753
6754  const VectorTensorFiniteElement *test_el =
6755  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
6756  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
6757
6758  const int dims = trial_el->GetDim();
6759  MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!");
6760  dim = mesh->Dimension();
6761  MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!");
6762  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(),
6763  "Orders do not match!");
6764  ne = trial_fes.GetNE();
6765
6766  const int order = trial_el->GetOrder();
6767  dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType());